i386.c: Fix a typo in comment.
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51 #include "dwarf2.h"
52 #include "tm-constrs.h"
53 #include "params.h"
54
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
57 #endif
58
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
65 : 4)
66
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
70
71 static const
72 struct processor_costs size_cost = { /* costs for tuning for size */
73 COSTS_N_BYTES (2), /* cost of an add instruction */
74 COSTS_N_BYTES (3), /* cost of a lea instruction */
75 COSTS_N_BYTES (2), /* variable shift costs */
76 COSTS_N_BYTES (3), /* constant shift costs */
77 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
78 COSTS_N_BYTES (3), /* HI */
79 COSTS_N_BYTES (3), /* SI */
80 COSTS_N_BYTES (3), /* DI */
81 COSTS_N_BYTES (5)}, /* other */
82 0, /* cost of multiply per each bit set */
83 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
84 COSTS_N_BYTES (3), /* HI */
85 COSTS_N_BYTES (3), /* SI */
86 COSTS_N_BYTES (3), /* DI */
87 COSTS_N_BYTES (5)}, /* other */
88 COSTS_N_BYTES (3), /* cost of movsx */
89 COSTS_N_BYTES (3), /* cost of movzx */
90 0, /* "large" insn */
91 2, /* MOVE_RATIO */
92 2, /* cost for loading QImode using movzbl */
93 {2, 2, 2}, /* cost of loading integer registers
94 in QImode, HImode and SImode.
95 Relative to reg-reg move (2). */
96 {2, 2, 2}, /* cost of storing integer registers */
97 2, /* cost of reg,reg fld/fst */
98 {2, 2, 2}, /* cost of loading fp registers
99 in SFmode, DFmode and XFmode */
100 {2, 2, 2}, /* cost of storing fp registers
101 in SFmode, DFmode and XFmode */
102 3, /* cost of moving MMX register */
103 {3, 3}, /* cost of loading MMX registers
104 in SImode and DImode */
105 {3, 3}, /* cost of storing MMX registers
106 in SImode and DImode */
107 3, /* cost of moving SSE register */
108 {3, 3, 3}, /* cost of loading SSE registers
109 in SImode, DImode and TImode */
110 {3, 3, 3}, /* cost of storing SSE registers
111 in SImode, DImode and TImode */
112 3, /* MMX or SSE register to integer */
113 0, /* size of prefetch block */
114 0, /* number of parallel prefetches */
115 2, /* Branch cost */
116 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
117 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
118 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
119 COSTS_N_BYTES (2), /* cost of FABS instruction. */
120 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
121 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
122 };
123
124 /* Processor costs (relative to an add) */
125 static const
126 struct processor_costs i386_cost = { /* 386 specific costs */
127 COSTS_N_INSNS (1), /* cost of an add instruction */
128 COSTS_N_INSNS (1), /* cost of a lea instruction */
129 COSTS_N_INSNS (3), /* variable shift costs */
130 COSTS_N_INSNS (2), /* constant shift costs */
131 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
132 COSTS_N_INSNS (6), /* HI */
133 COSTS_N_INSNS (6), /* SI */
134 COSTS_N_INSNS (6), /* DI */
135 COSTS_N_INSNS (6)}, /* other */
136 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
137 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
138 COSTS_N_INSNS (23), /* HI */
139 COSTS_N_INSNS (23), /* SI */
140 COSTS_N_INSNS (23), /* DI */
141 COSTS_N_INSNS (23)}, /* other */
142 COSTS_N_INSNS (3), /* cost of movsx */
143 COSTS_N_INSNS (2), /* cost of movzx */
144 15, /* "large" insn */
145 3, /* MOVE_RATIO */
146 4, /* cost for loading QImode using movzbl */
147 {2, 4, 2}, /* cost of loading integer registers
148 in QImode, HImode and SImode.
149 Relative to reg-reg move (2). */
150 {2, 4, 2}, /* cost of storing integer registers */
151 2, /* cost of reg,reg fld/fst */
152 {8, 8, 8}, /* cost of loading fp registers
153 in SFmode, DFmode and XFmode */
154 {8, 8, 8}, /* cost of storing fp registers
155 in SFmode, DFmode and XFmode */
156 2, /* cost of moving MMX register */
157 {4, 8}, /* cost of loading MMX registers
158 in SImode and DImode */
159 {4, 8}, /* cost of storing MMX registers
160 in SImode and DImode */
161 2, /* cost of moving SSE register */
162 {4, 8, 16}, /* cost of loading SSE registers
163 in SImode, DImode and TImode */
164 {4, 8, 16}, /* cost of storing SSE registers
165 in SImode, DImode and TImode */
166 3, /* MMX or SSE register to integer */
167 0, /* size of prefetch block */
168 0, /* number of parallel prefetches */
169 1, /* Branch cost */
170 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
171 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
172 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
173 COSTS_N_INSNS (22), /* cost of FABS instruction. */
174 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
175 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
176 };
177
178 static const
179 struct processor_costs i486_cost = { /* 486 specific costs */
180 COSTS_N_INSNS (1), /* cost of an add instruction */
181 COSTS_N_INSNS (1), /* cost of a lea instruction */
182 COSTS_N_INSNS (3), /* variable shift costs */
183 COSTS_N_INSNS (2), /* constant shift costs */
184 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
185 COSTS_N_INSNS (12), /* HI */
186 COSTS_N_INSNS (12), /* SI */
187 COSTS_N_INSNS (12), /* DI */
188 COSTS_N_INSNS (12)}, /* other */
189 1, /* cost of multiply per each bit set */
190 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
191 COSTS_N_INSNS (40), /* HI */
192 COSTS_N_INSNS (40), /* SI */
193 COSTS_N_INSNS (40), /* DI */
194 COSTS_N_INSNS (40)}, /* other */
195 COSTS_N_INSNS (3), /* cost of movsx */
196 COSTS_N_INSNS (2), /* cost of movzx */
197 15, /* "large" insn */
198 3, /* MOVE_RATIO */
199 4, /* cost for loading QImode using movzbl */
200 {2, 4, 2}, /* cost of loading integer registers
201 in QImode, HImode and SImode.
202 Relative to reg-reg move (2). */
203 {2, 4, 2}, /* cost of storing integer registers */
204 2, /* cost of reg,reg fld/fst */
205 {8, 8, 8}, /* cost of loading fp registers
206 in SFmode, DFmode and XFmode */
207 {8, 8, 8}, /* cost of storing fp registers
208 in SFmode, DFmode and XFmode */
209 2, /* cost of moving MMX register */
210 {4, 8}, /* cost of loading MMX registers
211 in SImode and DImode */
212 {4, 8}, /* cost of storing MMX registers
213 in SImode and DImode */
214 2, /* cost of moving SSE register */
215 {4, 8, 16}, /* cost of loading SSE registers
216 in SImode, DImode and TImode */
217 {4, 8, 16}, /* cost of storing SSE registers
218 in SImode, DImode and TImode */
219 3, /* MMX or SSE register to integer */
220 0, /* size of prefetch block */
221 0, /* number of parallel prefetches */
222 1, /* Branch cost */
223 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
224 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
225 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
226 COSTS_N_INSNS (3), /* cost of FABS instruction. */
227 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
228 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
229 };
230
231 static const
232 struct processor_costs pentium_cost = {
233 COSTS_N_INSNS (1), /* cost of an add instruction */
234 COSTS_N_INSNS (1), /* cost of a lea instruction */
235 COSTS_N_INSNS (4), /* variable shift costs */
236 COSTS_N_INSNS (1), /* constant shift costs */
237 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
238 COSTS_N_INSNS (11), /* HI */
239 COSTS_N_INSNS (11), /* SI */
240 COSTS_N_INSNS (11), /* DI */
241 COSTS_N_INSNS (11)}, /* other */
242 0, /* cost of multiply per each bit set */
243 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
244 COSTS_N_INSNS (25), /* HI */
245 COSTS_N_INSNS (25), /* SI */
246 COSTS_N_INSNS (25), /* DI */
247 COSTS_N_INSNS (25)}, /* other */
248 COSTS_N_INSNS (3), /* cost of movsx */
249 COSTS_N_INSNS (2), /* cost of movzx */
250 8, /* "large" insn */
251 6, /* MOVE_RATIO */
252 6, /* cost for loading QImode using movzbl */
253 {2, 4, 2}, /* cost of loading integer registers
254 in QImode, HImode and SImode.
255 Relative to reg-reg move (2). */
256 {2, 4, 2}, /* cost of storing integer registers */
257 2, /* cost of reg,reg fld/fst */
258 {2, 2, 6}, /* cost of loading fp registers
259 in SFmode, DFmode and XFmode */
260 {4, 4, 6}, /* cost of storing fp registers
261 in SFmode, DFmode and XFmode */
262 8, /* cost of moving MMX register */
263 {8, 8}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {8, 8}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {4, 8, 16}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {4, 8, 16}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 3, /* MMX or SSE register to integer */
273 0, /* size of prefetch block */
274 0, /* number of parallel prefetches */
275 2, /* Branch cost */
276 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
277 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
278 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
279 COSTS_N_INSNS (1), /* cost of FABS instruction. */
280 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
281 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
282 };
283
284 static const
285 struct processor_costs pentiumpro_cost = {
286 COSTS_N_INSNS (1), /* cost of an add instruction */
287 COSTS_N_INSNS (1), /* cost of a lea instruction */
288 COSTS_N_INSNS (1), /* variable shift costs */
289 COSTS_N_INSNS (1), /* constant shift costs */
290 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
291 COSTS_N_INSNS (4), /* HI */
292 COSTS_N_INSNS (4), /* SI */
293 COSTS_N_INSNS (4), /* DI */
294 COSTS_N_INSNS (4)}, /* other */
295 0, /* cost of multiply per each bit set */
296 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
297 COSTS_N_INSNS (17), /* HI */
298 COSTS_N_INSNS (17), /* SI */
299 COSTS_N_INSNS (17), /* DI */
300 COSTS_N_INSNS (17)}, /* other */
301 COSTS_N_INSNS (1), /* cost of movsx */
302 COSTS_N_INSNS (1), /* cost of movzx */
303 8, /* "large" insn */
304 6, /* MOVE_RATIO */
305 2, /* cost for loading QImode using movzbl */
306 {4, 4, 4}, /* cost of loading integer registers
307 in QImode, HImode and SImode.
308 Relative to reg-reg move (2). */
309 {2, 2, 2}, /* cost of storing integer registers */
310 2, /* cost of reg,reg fld/fst */
311 {2, 2, 6}, /* cost of loading fp registers
312 in SFmode, DFmode and XFmode */
313 {4, 4, 6}, /* cost of storing fp registers
314 in SFmode, DFmode and XFmode */
315 2, /* cost of moving MMX register */
316 {2, 2}, /* cost of loading MMX registers
317 in SImode and DImode */
318 {2, 2}, /* cost of storing MMX registers
319 in SImode and DImode */
320 2, /* cost of moving SSE register */
321 {2, 2, 8}, /* cost of loading SSE registers
322 in SImode, DImode and TImode */
323 {2, 2, 8}, /* cost of storing SSE registers
324 in SImode, DImode and TImode */
325 3, /* MMX or SSE register to integer */
326 32, /* size of prefetch block */
327 6, /* number of parallel prefetches */
328 2, /* Branch cost */
329 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
330 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
331 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
332 COSTS_N_INSNS (2), /* cost of FABS instruction. */
333 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
334 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
335 };
336
337 static const
338 struct processor_costs geode_cost = {
339 COSTS_N_INSNS (1), /* cost of an add instruction */
340 COSTS_N_INSNS (1), /* cost of a lea instruction */
341 COSTS_N_INSNS (2), /* variable shift costs */
342 COSTS_N_INSNS (1), /* constant shift costs */
343 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
344 COSTS_N_INSNS (4), /* HI */
345 COSTS_N_INSNS (7), /* SI */
346 COSTS_N_INSNS (7), /* DI */
347 COSTS_N_INSNS (7)}, /* other */
348 0, /* cost of multiply per each bit set */
349 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
350 COSTS_N_INSNS (23), /* HI */
351 COSTS_N_INSNS (39), /* SI */
352 COSTS_N_INSNS (39), /* DI */
353 COSTS_N_INSNS (39)}, /* other */
354 COSTS_N_INSNS (1), /* cost of movsx */
355 COSTS_N_INSNS (1), /* cost of movzx */
356 8, /* "large" insn */
357 4, /* MOVE_RATIO */
358 1, /* cost for loading QImode using movzbl */
359 {1, 1, 1}, /* cost of loading integer registers
360 in QImode, HImode and SImode.
361 Relative to reg-reg move (2). */
362 {1, 1, 1}, /* cost of storing integer registers */
363 1, /* cost of reg,reg fld/fst */
364 {1, 1, 1}, /* cost of loading fp registers
365 in SFmode, DFmode and XFmode */
366 {4, 6, 6}, /* cost of storing fp registers
367 in SFmode, DFmode and XFmode */
368
369 1, /* cost of moving MMX register */
370 {1, 1}, /* cost of loading MMX registers
371 in SImode and DImode */
372 {1, 1}, /* cost of storing MMX registers
373 in SImode and DImode */
374 1, /* cost of moving SSE register */
375 {1, 1, 1}, /* cost of loading SSE registers
376 in SImode, DImode and TImode */
377 {1, 1, 1}, /* cost of storing SSE registers
378 in SImode, DImode and TImode */
379 1, /* MMX or SSE register to integer */
380 32, /* size of prefetch block */
381 1, /* number of parallel prefetches */
382 1, /* Branch cost */
383 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
384 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
385 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
386 COSTS_N_INSNS (1), /* cost of FABS instruction. */
387 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
388 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
389 };
390
391 static const
392 struct processor_costs k6_cost = {
393 COSTS_N_INSNS (1), /* cost of an add instruction */
394 COSTS_N_INSNS (2), /* cost of a lea instruction */
395 COSTS_N_INSNS (1), /* variable shift costs */
396 COSTS_N_INSNS (1), /* constant shift costs */
397 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
398 COSTS_N_INSNS (3), /* HI */
399 COSTS_N_INSNS (3), /* SI */
400 COSTS_N_INSNS (3), /* DI */
401 COSTS_N_INSNS (3)}, /* other */
402 0, /* cost of multiply per each bit set */
403 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
404 COSTS_N_INSNS (18), /* HI */
405 COSTS_N_INSNS (18), /* SI */
406 COSTS_N_INSNS (18), /* DI */
407 COSTS_N_INSNS (18)}, /* other */
408 COSTS_N_INSNS (2), /* cost of movsx */
409 COSTS_N_INSNS (2), /* cost of movzx */
410 8, /* "large" insn */
411 4, /* MOVE_RATIO */
412 3, /* cost for loading QImode using movzbl */
413 {4, 5, 4}, /* cost of loading integer registers
414 in QImode, HImode and SImode.
415 Relative to reg-reg move (2). */
416 {2, 3, 2}, /* cost of storing integer registers */
417 4, /* cost of reg,reg fld/fst */
418 {6, 6, 6}, /* cost of loading fp registers
419 in SFmode, DFmode and XFmode */
420 {4, 4, 4}, /* cost of storing fp registers
421 in SFmode, DFmode and XFmode */
422 2, /* cost of moving MMX register */
423 {2, 2}, /* cost of loading MMX registers
424 in SImode and DImode */
425 {2, 2}, /* cost of storing MMX registers
426 in SImode and DImode */
427 2, /* cost of moving SSE register */
428 {2, 2, 8}, /* cost of loading SSE registers
429 in SImode, DImode and TImode */
430 {2, 2, 8}, /* cost of storing SSE registers
431 in SImode, DImode and TImode */
432 6, /* MMX or SSE register to integer */
433 32, /* size of prefetch block */
434 1, /* number of parallel prefetches */
435 1, /* Branch cost */
436 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
437 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
438 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
439 COSTS_N_INSNS (2), /* cost of FABS instruction. */
440 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
441 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
442 };
443
444 static const
445 struct processor_costs athlon_cost = {
446 COSTS_N_INSNS (1), /* cost of an add instruction */
447 COSTS_N_INSNS (2), /* cost of a lea instruction */
448 COSTS_N_INSNS (1), /* variable shift costs */
449 COSTS_N_INSNS (1), /* constant shift costs */
450 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
451 COSTS_N_INSNS (5), /* HI */
452 COSTS_N_INSNS (5), /* SI */
453 COSTS_N_INSNS (5), /* DI */
454 COSTS_N_INSNS (5)}, /* other */
455 0, /* cost of multiply per each bit set */
456 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
457 COSTS_N_INSNS (26), /* HI */
458 COSTS_N_INSNS (42), /* SI */
459 COSTS_N_INSNS (74), /* DI */
460 COSTS_N_INSNS (74)}, /* other */
461 COSTS_N_INSNS (1), /* cost of movsx */
462 COSTS_N_INSNS (1), /* cost of movzx */
463 8, /* "large" insn */
464 9, /* MOVE_RATIO */
465 4, /* cost for loading QImode using movzbl */
466 {3, 4, 3}, /* cost of loading integer registers
467 in QImode, HImode and SImode.
468 Relative to reg-reg move (2). */
469 {3, 4, 3}, /* cost of storing integer registers */
470 4, /* cost of reg,reg fld/fst */
471 {4, 4, 12}, /* cost of loading fp registers
472 in SFmode, DFmode and XFmode */
473 {6, 6, 8}, /* cost of storing fp registers
474 in SFmode, DFmode and XFmode */
475 2, /* cost of moving MMX register */
476 {4, 4}, /* cost of loading MMX registers
477 in SImode and DImode */
478 {4, 4}, /* cost of storing MMX registers
479 in SImode and DImode */
480 2, /* cost of moving SSE register */
481 {4, 4, 6}, /* cost of loading SSE registers
482 in SImode, DImode and TImode */
483 {4, 4, 5}, /* cost of storing SSE registers
484 in SImode, DImode and TImode */
485 5, /* MMX or SSE register to integer */
486 64, /* size of prefetch block */
487 6, /* number of parallel prefetches */
488 5, /* Branch cost */
489 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
490 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
491 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
492 COSTS_N_INSNS (2), /* cost of FABS instruction. */
493 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
494 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
495 };
496
497 static const
498 struct processor_costs k8_cost = {
499 COSTS_N_INSNS (1), /* cost of an add instruction */
500 COSTS_N_INSNS (2), /* cost of a lea instruction */
501 COSTS_N_INSNS (1), /* variable shift costs */
502 COSTS_N_INSNS (1), /* constant shift costs */
503 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
504 COSTS_N_INSNS (4), /* HI */
505 COSTS_N_INSNS (3), /* SI */
506 COSTS_N_INSNS (4), /* DI */
507 COSTS_N_INSNS (5)}, /* other */
508 0, /* cost of multiply per each bit set */
509 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
510 COSTS_N_INSNS (26), /* HI */
511 COSTS_N_INSNS (42), /* SI */
512 COSTS_N_INSNS (74), /* DI */
513 COSTS_N_INSNS (74)}, /* other */
514 COSTS_N_INSNS (1), /* cost of movsx */
515 COSTS_N_INSNS (1), /* cost of movzx */
516 8, /* "large" insn */
517 9, /* MOVE_RATIO */
518 4, /* cost for loading QImode using movzbl */
519 {3, 4, 3}, /* cost of loading integer registers
520 in QImode, HImode and SImode.
521 Relative to reg-reg move (2). */
522 {3, 4, 3}, /* cost of storing integer registers */
523 4, /* cost of reg,reg fld/fst */
524 {4, 4, 12}, /* cost of loading fp registers
525 in SFmode, DFmode and XFmode */
526 {6, 6, 8}, /* cost of storing fp registers
527 in SFmode, DFmode and XFmode */
528 2, /* cost of moving MMX register */
529 {3, 3}, /* cost of loading MMX registers
530 in SImode and DImode */
531 {4, 4}, /* cost of storing MMX registers
532 in SImode and DImode */
533 2, /* cost of moving SSE register */
534 {4, 3, 6}, /* cost of loading SSE registers
535 in SImode, DImode and TImode */
536 {4, 4, 5}, /* cost of storing SSE registers
537 in SImode, DImode and TImode */
538 5, /* MMX or SSE register to integer */
539 64, /* size of prefetch block */
540 /* New AMD processors never drop prefetches; if they cannot be performed
541 immediately, they are queued. We set number of simultaneous prefetches
542 to a large constant to reflect this (it probably is not a good idea not
543 to limit number of prefetches at all, as their execution also takes some
544 time). */
545 100, /* number of parallel prefetches */
546 5, /* Branch cost */
547 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
548 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
549 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
550 COSTS_N_INSNS (2), /* cost of FABS instruction. */
551 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
552 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
553 };
554
555 static const
556 struct processor_costs pentium4_cost = {
557 COSTS_N_INSNS (1), /* cost of an add instruction */
558 COSTS_N_INSNS (3), /* cost of a lea instruction */
559 COSTS_N_INSNS (4), /* variable shift costs */
560 COSTS_N_INSNS (4), /* constant shift costs */
561 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
562 COSTS_N_INSNS (15), /* HI */
563 COSTS_N_INSNS (15), /* SI */
564 COSTS_N_INSNS (15), /* DI */
565 COSTS_N_INSNS (15)}, /* other */
566 0, /* cost of multiply per each bit set */
567 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
568 COSTS_N_INSNS (56), /* HI */
569 COSTS_N_INSNS (56), /* SI */
570 COSTS_N_INSNS (56), /* DI */
571 COSTS_N_INSNS (56)}, /* other */
572 COSTS_N_INSNS (1), /* cost of movsx */
573 COSTS_N_INSNS (1), /* cost of movzx */
574 16, /* "large" insn */
575 6, /* MOVE_RATIO */
576 2, /* cost for loading QImode using movzbl */
577 {4, 5, 4}, /* cost of loading integer registers
578 in QImode, HImode and SImode.
579 Relative to reg-reg move (2). */
580 {2, 3, 2}, /* cost of storing integer registers */
581 2, /* cost of reg,reg fld/fst */
582 {2, 2, 6}, /* cost of loading fp registers
583 in SFmode, DFmode and XFmode */
584 {4, 4, 6}, /* cost of storing fp registers
585 in SFmode, DFmode and XFmode */
586 2, /* cost of moving MMX register */
587 {2, 2}, /* cost of loading MMX registers
588 in SImode and DImode */
589 {2, 2}, /* cost of storing MMX registers
590 in SImode and DImode */
591 12, /* cost of moving SSE register */
592 {12, 12, 12}, /* cost of loading SSE registers
593 in SImode, DImode and TImode */
594 {2, 2, 8}, /* cost of storing SSE registers
595 in SImode, DImode and TImode */
596 10, /* MMX or SSE register to integer */
597 64, /* size of prefetch block */
598 6, /* number of parallel prefetches */
599 2, /* Branch cost */
600 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
601 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
602 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
603 COSTS_N_INSNS (2), /* cost of FABS instruction. */
604 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
605 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
606 };
607
608 static const
609 struct processor_costs nocona_cost = {
610 COSTS_N_INSNS (1), /* cost of an add instruction */
611 COSTS_N_INSNS (1), /* cost of a lea instruction */
612 COSTS_N_INSNS (1), /* variable shift costs */
613 COSTS_N_INSNS (1), /* constant shift costs */
614 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
615 COSTS_N_INSNS (10), /* HI */
616 COSTS_N_INSNS (10), /* SI */
617 COSTS_N_INSNS (10), /* DI */
618 COSTS_N_INSNS (10)}, /* other */
619 0, /* cost of multiply per each bit set */
620 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
621 COSTS_N_INSNS (66), /* HI */
622 COSTS_N_INSNS (66), /* SI */
623 COSTS_N_INSNS (66), /* DI */
624 COSTS_N_INSNS (66)}, /* other */
625 COSTS_N_INSNS (1), /* cost of movsx */
626 COSTS_N_INSNS (1), /* cost of movzx */
627 16, /* "large" insn */
628 17, /* MOVE_RATIO */
629 4, /* cost for loading QImode using movzbl */
630 {4, 4, 4}, /* cost of loading integer registers
631 in QImode, HImode and SImode.
632 Relative to reg-reg move (2). */
633 {4, 4, 4}, /* cost of storing integer registers */
634 3, /* cost of reg,reg fld/fst */
635 {12, 12, 12}, /* cost of loading fp registers
636 in SFmode, DFmode and XFmode */
637 {4, 4, 4}, /* cost of storing fp registers
638 in SFmode, DFmode and XFmode */
639 6, /* cost of moving MMX register */
640 {12, 12}, /* cost of loading MMX registers
641 in SImode and DImode */
642 {12, 12}, /* cost of storing MMX registers
643 in SImode and DImode */
644 6, /* cost of moving SSE register */
645 {12, 12, 12}, /* cost of loading SSE registers
646 in SImode, DImode and TImode */
647 {12, 12, 12}, /* cost of storing SSE registers
648 in SImode, DImode and TImode */
649 8, /* MMX or SSE register to integer */
650 128, /* size of prefetch block */
651 8, /* number of parallel prefetches */
652 1, /* Branch cost */
653 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
654 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
655 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
656 COSTS_N_INSNS (3), /* cost of FABS instruction. */
657 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
658 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
659 };
660
661 /* Generic64 should produce code tuned for Nocona and K8. */
662 static const
663 struct processor_costs generic64_cost = {
664 COSTS_N_INSNS (1), /* cost of an add instruction */
665 /* On all chips taken into consideration lea is 2 cycles and more. With
666 this cost however our current implementation of synth_mult results in
667 use of unnecessary temporary registers causing regression on several
668 SPECfp benchmarks. */
669 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
670 COSTS_N_INSNS (1), /* variable shift costs */
671 COSTS_N_INSNS (1), /* constant shift costs */
672 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
673 COSTS_N_INSNS (4), /* HI */
674 COSTS_N_INSNS (3), /* SI */
675 COSTS_N_INSNS (4), /* DI */
676 COSTS_N_INSNS (2)}, /* other */
677 0, /* cost of multiply per each bit set */
678 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
679 COSTS_N_INSNS (26), /* HI */
680 COSTS_N_INSNS (42), /* SI */
681 COSTS_N_INSNS (74), /* DI */
682 COSTS_N_INSNS (74)}, /* other */
683 COSTS_N_INSNS (1), /* cost of movsx */
684 COSTS_N_INSNS (1), /* cost of movzx */
685 8, /* "large" insn */
686 17, /* MOVE_RATIO */
687 4, /* cost for loading QImode using movzbl */
688 {4, 4, 4}, /* cost of loading integer registers
689 in QImode, HImode and SImode.
690 Relative to reg-reg move (2). */
691 {4, 4, 4}, /* cost of storing integer registers */
692 4, /* cost of reg,reg fld/fst */
693 {12, 12, 12}, /* cost of loading fp registers
694 in SFmode, DFmode and XFmode */
695 {6, 6, 8}, /* cost of storing fp registers
696 in SFmode, DFmode and XFmode */
697 2, /* cost of moving MMX register */
698 {8, 8}, /* cost of loading MMX registers
699 in SImode and DImode */
700 {8, 8}, /* cost of storing MMX registers
701 in SImode and DImode */
702 2, /* cost of moving SSE register */
703 {8, 8, 8}, /* cost of loading SSE registers
704 in SImode, DImode and TImode */
705 {8, 8, 8}, /* cost of storing SSE registers
706 in SImode, DImode and TImode */
707 5, /* MMX or SSE register to integer */
708 64, /* size of prefetch block */
709 6, /* number of parallel prefetches */
710 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
711 is increased to perhaps more appropriate value of 5. */
712 3, /* Branch cost */
713 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
714 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
715 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
716 COSTS_N_INSNS (8), /* cost of FABS instruction. */
717 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
718 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
719 };
720
721 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
722 static const
723 struct processor_costs generic32_cost = {
724 COSTS_N_INSNS (1), /* cost of an add instruction */
725 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
726 COSTS_N_INSNS (1), /* variable shift costs */
727 COSTS_N_INSNS (1), /* constant shift costs */
728 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
729 COSTS_N_INSNS (4), /* HI */
730 COSTS_N_INSNS (3), /* SI */
731 COSTS_N_INSNS (4), /* DI */
732 COSTS_N_INSNS (2)}, /* other */
733 0, /* cost of multiply per each bit set */
734 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
735 COSTS_N_INSNS (26), /* HI */
736 COSTS_N_INSNS (42), /* SI */
737 COSTS_N_INSNS (74), /* DI */
738 COSTS_N_INSNS (74)}, /* other */
739 COSTS_N_INSNS (1), /* cost of movsx */
740 COSTS_N_INSNS (1), /* cost of movzx */
741 8, /* "large" insn */
742 17, /* MOVE_RATIO */
743 4, /* cost for loading QImode using movzbl */
744 {4, 4, 4}, /* cost of loading integer registers
745 in QImode, HImode and SImode.
746 Relative to reg-reg move (2). */
747 {4, 4, 4}, /* cost of storing integer registers */
748 4, /* cost of reg,reg fld/fst */
749 {12, 12, 12}, /* cost of loading fp registers
750 in SFmode, DFmode and XFmode */
751 {6, 6, 8}, /* cost of storing fp registers
752 in SFmode, DFmode and XFmode */
753 2, /* cost of moving MMX register */
754 {8, 8}, /* cost of loading MMX registers
755 in SImode and DImode */
756 {8, 8}, /* cost of storing MMX registers
757 in SImode and DImode */
758 2, /* cost of moving SSE register */
759 {8, 8, 8}, /* cost of loading SSE registers
760 in SImode, DImode and TImode */
761 {8, 8, 8}, /* cost of storing SSE registers
762 in SImode, DImode and TImode */
763 5, /* MMX or SSE register to integer */
764 64, /* size of prefetch block */
765 6, /* number of parallel prefetches */
766 3, /* Branch cost */
767 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
768 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
769 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
770 COSTS_N_INSNS (8), /* cost of FABS instruction. */
771 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
772 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
773 };
774
775 const struct processor_costs *ix86_cost = &pentium_cost;
776
777 /* Processor feature/optimization bitmasks. */
778 #define m_386 (1<<PROCESSOR_I386)
779 #define m_486 (1<<PROCESSOR_I486)
780 #define m_PENT (1<<PROCESSOR_PENTIUM)
781 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
782 #define m_GEODE (1<<PROCESSOR_GEODE)
783 #define m_K6_GEODE (m_K6 | m_GEODE)
784 #define m_K6 (1<<PROCESSOR_K6)
785 #define m_ATHLON (1<<PROCESSOR_ATHLON)
786 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
787 #define m_K8 (1<<PROCESSOR_K8)
788 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
789 #define m_NOCONA (1<<PROCESSOR_NOCONA)
790 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
791 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
792 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
793
794 /* Generic instruction choice should be common subset of supported CPUs
795 (PPro/PENT4/NOCONA/Athlon/K8). */
796
797 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
798 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
799 generic because it is not working well with PPro base chips. */
800 const int x86_use_leave = m_386 | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC64;
801 const int x86_push_memory = m_386 | m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
802 const int x86_zero_extend_with_and = m_486 | m_PENT;
803 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC | m_GEODE /* m_386 | m_K6 */;
804 const int x86_double_with_add = ~m_386;
805 const int x86_use_bit_test = m_386;
806 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_GENERIC;
807 const int x86_cmove = m_PPRO | m_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
808 const int x86_3dnow_a = m_ATHLON_K8;
809 const int x86_deep_branch = m_PPRO | m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
810 /* Branch hints were put in P4 based on simulation result. But
811 after P4 was made, no performance benefit was observed with
812 branch hints. It also increases the code size. As the result,
813 icc never generates branch hints. */
814 const int x86_branch_hints = 0;
815 const int x86_use_sahf = m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
816 /* We probably ought to watch for partial register stalls on Generic32
817 compilation setting as well. However in current implementation the
818 partial register stalls are not eliminated very well - they can
819 be introduced via subregs synthesized by combine and can happen
820 in caller/callee saving sequences.
821 Because this option pays back little on PPro based chips and is in conflict
822 with partial reg. dependencies used by Athlon/P4 based chips, it is better
823 to leave it off for generic32 for now. */
824 const int x86_partial_reg_stall = m_PPRO;
825 const int x86_partial_flag_reg_stall = m_GENERIC;
826 const int x86_use_himode_fiop = m_386 | m_486 | m_K6_GEODE;
827 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_GENERIC);
828 const int x86_use_mov0 = m_K6;
829 const int x86_use_cltd = ~(m_PENT | m_K6 | m_GENERIC);
830 const int x86_read_modify_write = ~m_PENT;
831 const int x86_read_modify = ~(m_PENT | m_PPRO);
832 const int x86_split_long_moves = m_PPRO;
833 const int x86_promote_QImode = m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_GENERIC; /* m_PENT4 ? */
834 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
835 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
836 const int x86_qimode_math = ~(0);
837 const int x86_promote_qi_regs = 0;
838 /* On PPro this flag is meant to avoid partial register stalls. Just like
839 the x86_partial_reg_stall this option might be considered for Generic32
840 if our scheme for avoiding partial stalls was more effective. */
841 const int x86_himode_math = ~(m_PPRO);
842 const int x86_promote_hi_regs = m_PPRO;
843 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC;
844 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
845 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC;
846 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6_GEODE | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
847 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC | m_GEODE);
848 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
849 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
850 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
851 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
852 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
853 const int x86_shift1 = ~m_486;
854 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
855 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
856 that thread 128bit SSE registers as single units versus K8 based chips that
857 divide SSE registers to two 64bit halves.
858 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
859 to allow register renaming on 128bit SSE units, but usually results in one
860 extra microop on 64bit SSE units. Experimental results shows that disabling
861 this option on P4 brings over 20% SPECfp regression, while enabling it on
862 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
863 of moves. */
864 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
865 /* Set for machines where the type and dependencies are resolved on SSE
866 register parts instead of whole registers, so we may maintain just
867 lower part of scalar values in proper format leaving the upper part
868 undefined. */
869 const int x86_sse_split_regs = m_ATHLON_K8;
870 const int x86_sse_typeless_stores = m_ATHLON_K8;
871 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
872 const int x86_use_ffreep = m_ATHLON_K8;
873 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6_GEODE;
874 const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_GENERIC);
875
876 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
877 integer data in xmm registers. Which results in pretty abysmal code. */
878 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
879
880 const int x86_ext_80387_constants = m_K6_GEODE | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC32;
881 /* Some CPU cores are not able to predict more than 4 branch instructions in
882 the 16 byte window. */
883 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
884 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6_GEODE | m_PENT | m_GENERIC;
885 const int x86_use_bt = m_ATHLON_K8;
886 /* Compare and exchange was added for 80486. */
887 const int x86_cmpxchg = ~m_386;
888 /* Compare and exchange 8 bytes was added for pentium. */
889 const int x86_cmpxchg8b = ~(m_386 | m_486);
890 /* Compare and exchange 16 bytes was added for nocona. */
891 const int x86_cmpxchg16b = m_NOCONA;
892 /* Exchange and add was added for 80486. */
893 const int x86_xadd = ~m_386;
894 /* Byteswap was added for 80486. */
895 const int x86_bswap = ~m_386;
896 const int x86_pad_returns = m_ATHLON_K8 | m_GENERIC;
897
898 /* In case the average insn count for single function invocation is
899 lower than this constant, emit fast (but longer) prologue and
900 epilogue code. */
901 #define FAST_PROLOGUE_INSN_COUNT 20
902
903 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
904 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
905 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
906 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
907
908 /* Array of the smallest class containing reg number REGNO, indexed by
909 REGNO. Used by REGNO_REG_CLASS in i386.h. */
910
911 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
912 {
913 /* ax, dx, cx, bx */
914 AREG, DREG, CREG, BREG,
915 /* si, di, bp, sp */
916 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
917 /* FP registers */
918 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
919 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
920 /* arg pointer */
921 NON_Q_REGS,
922 /* flags, fpsr, fpcr, dirflag, frame */
923 NO_REGS, NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
924 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
925 SSE_REGS, SSE_REGS,
926 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
927 MMX_REGS, MMX_REGS,
928 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
929 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
930 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
931 SSE_REGS, SSE_REGS,
932 };
933
934 /* The "default" register map used in 32bit mode. */
935
936 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
937 {
938 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
939 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
940 -1, -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
941 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
942 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
943 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
944 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
945 };
946
947 static int const x86_64_int_parameter_registers[6] =
948 {
949 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
950 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
951 };
952
953 static int const x86_64_int_return_registers[4] =
954 {
955 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
956 };
957
958 /* The "default" register map used in 64bit mode. */
959 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
960 {
961 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
962 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
963 -1, -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
964 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
965 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
966 8,9,10,11,12,13,14,15, /* extended integer registers */
967 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
968 };
969
970 /* Define the register numbers to be used in Dwarf debugging information.
971 The SVR4 reference port C compiler uses the following register numbers
972 in its Dwarf output code:
973 0 for %eax (gcc regno = 0)
974 1 for %ecx (gcc regno = 2)
975 2 for %edx (gcc regno = 1)
976 3 for %ebx (gcc regno = 3)
977 4 for %esp (gcc regno = 7)
978 5 for %ebp (gcc regno = 6)
979 6 for %esi (gcc regno = 4)
980 7 for %edi (gcc regno = 5)
981 The following three DWARF register numbers are never generated by
982 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
983 believes these numbers have these meanings.
984 8 for %eip (no gcc equivalent)
985 9 for %eflags (gcc regno = 17)
986 10 for %trapno (no gcc equivalent)
987 It is not at all clear how we should number the FP stack registers
988 for the x86 architecture. If the version of SDB on x86/svr4 were
989 a bit less brain dead with respect to floating-point then we would
990 have a precedent to follow with respect to DWARF register numbers
991 for x86 FP registers, but the SDB on x86/svr4 is so completely
992 broken with respect to FP registers that it is hardly worth thinking
993 of it as something to strive for compatibility with.
994 The version of x86/svr4 SDB I have at the moment does (partially)
995 seem to believe that DWARF register number 11 is associated with
996 the x86 register %st(0), but that's about all. Higher DWARF
997 register numbers don't seem to be associated with anything in
998 particular, and even for DWARF regno 11, SDB only seems to under-
999 stand that it should say that a variable lives in %st(0) (when
1000 asked via an `=' command) if we said it was in DWARF regno 11,
1001 but SDB still prints garbage when asked for the value of the
1002 variable in question (via a `/' command).
1003 (Also note that the labels SDB prints for various FP stack regs
1004 when doing an `x' command are all wrong.)
1005 Note that these problems generally don't affect the native SVR4
1006 C compiler because it doesn't allow the use of -O with -g and
1007 because when it is *not* optimizing, it allocates a memory
1008 location for each floating-point variable, and the memory
1009 location is what gets described in the DWARF AT_location
1010 attribute for the variable in question.
1011 Regardless of the severe mental illness of the x86/svr4 SDB, we
1012 do something sensible here and we use the following DWARF
1013 register numbers. Note that these are all stack-top-relative
1014 numbers.
1015 11 for %st(0) (gcc regno = 8)
1016 12 for %st(1) (gcc regno = 9)
1017 13 for %st(2) (gcc regno = 10)
1018 14 for %st(3) (gcc regno = 11)
1019 15 for %st(4) (gcc regno = 12)
1020 16 for %st(5) (gcc regno = 13)
1021 17 for %st(6) (gcc regno = 14)
1022 18 for %st(7) (gcc regno = 15)
1023 */
1024 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1025 {
1026 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1027 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1028 -1, 9, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
1029 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1030 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1031 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1032 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1033 };
1034
1035 /* Test and compare insns in i386.md store the information needed to
1036 generate branch and scc insns here. */
1037
1038 rtx ix86_compare_op0 = NULL_RTX;
1039 rtx ix86_compare_op1 = NULL_RTX;
1040 rtx ix86_compare_emitted = NULL_RTX;
1041
1042 /* Size of the register save area. */
1043 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1044
1045 /* Define the structure for the machine field in struct function. */
1046
1047 struct stack_local_entry GTY(())
1048 {
1049 unsigned short mode;
1050 unsigned short n;
1051 rtx rtl;
1052 struct stack_local_entry *next;
1053 };
1054
1055 /* Structure describing stack frame layout.
1056 Stack grows downward:
1057
1058 [arguments]
1059 <- ARG_POINTER
1060 saved pc
1061
1062 saved frame pointer if frame_pointer_needed
1063 <- HARD_FRAME_POINTER
1064 [saved regs]
1065
1066 [padding1] \
1067 )
1068 [va_arg registers] (
1069 > to_allocate <- FRAME_POINTER
1070 [frame] (
1071 )
1072 [padding2] /
1073 */
1074 struct ix86_frame
1075 {
1076 int nregs;
1077 int padding1;
1078 int va_arg_size;
1079 HOST_WIDE_INT frame;
1080 int padding2;
1081 int outgoing_arguments_size;
1082 int red_zone_size;
1083
1084 HOST_WIDE_INT to_allocate;
1085 /* The offsets relative to ARG_POINTER. */
1086 HOST_WIDE_INT frame_pointer_offset;
1087 HOST_WIDE_INT hard_frame_pointer_offset;
1088 HOST_WIDE_INT stack_pointer_offset;
1089
1090 /* When save_regs_using_mov is set, emit prologue using
1091 move instead of push instructions. */
1092 bool save_regs_using_mov;
1093 };
1094
1095 /* Code model option. */
1096 enum cmodel ix86_cmodel;
1097 /* Asm dialect. */
1098 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1099 /* TLS dialects. */
1100 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1101
1102 /* Which unit we are generating floating point math for. */
1103 enum fpmath_unit ix86_fpmath;
1104
1105 /* Which cpu are we scheduling for. */
1106 enum processor_type ix86_tune;
1107 /* Which instruction set architecture to use. */
1108 enum processor_type ix86_arch;
1109
1110 /* true if sse prefetch instruction is not NOOP. */
1111 int x86_prefetch_sse;
1112
1113 /* ix86_regparm_string as a number */
1114 static int ix86_regparm;
1115
1116 /* -mstackrealign option */
1117 extern int ix86_force_align_arg_pointer;
1118 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1119
1120 /* Preferred alignment for stack boundary in bits. */
1121 unsigned int ix86_preferred_stack_boundary;
1122
1123 /* Values 1-5: see jump.c */
1124 int ix86_branch_cost;
1125
1126 /* Variables which are this size or smaller are put in the data/bss
1127 or ldata/lbss sections. */
1128
1129 int ix86_section_threshold = 65536;
1130
1131 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1132 char internal_label_prefix[16];
1133 int internal_label_prefix_len;
1134 \f
1135 static bool ix86_handle_option (size_t, const char *, int);
1136 static void output_pic_addr_const (FILE *, rtx, int);
1137 static void put_condition_code (enum rtx_code, enum machine_mode,
1138 int, int, FILE *);
1139 static const char *get_some_local_dynamic_name (void);
1140 static int get_some_local_dynamic_name_1 (rtx *, void *);
1141 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1142 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1143 rtx *);
1144 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1145 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1146 enum machine_mode);
1147 static rtx get_thread_pointer (int);
1148 static rtx legitimize_tls_address (rtx, enum tls_model, int);
1149 static void get_pc_thunk_name (char [32], unsigned int);
1150 static rtx gen_push (rtx);
1151 static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1152 static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1153 static struct machine_function * ix86_init_machine_status (void);
1154 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1155 static int ix86_nsaved_regs (void);
1156 static void ix86_emit_save_regs (void);
1157 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1158 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1159 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1160 static HOST_WIDE_INT ix86_GOT_alias_set (void);
1161 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1162 static rtx ix86_expand_aligntest (rtx, int);
1163 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1164 static int ix86_issue_rate (void);
1165 static int ix86_adjust_cost (rtx, rtx, rtx, int);
1166 static int ia32_multipass_dfa_lookahead (void);
1167 static void ix86_init_mmx_sse_builtins (void);
1168 static rtx x86_this_parameter (tree);
1169 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1170 HOST_WIDE_INT, tree);
1171 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1172 static void x86_file_start (void);
1173 static void ix86_reorg (void);
1174 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1175 static tree ix86_build_builtin_va_list (void);
1176 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1177 tree, int *, int);
1178 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1179 static bool ix86_scalar_mode_supported_p (enum machine_mode);
1180 static bool ix86_vector_mode_supported_p (enum machine_mode);
1181
1182 static int ix86_address_cost (rtx);
1183 static bool ix86_cannot_force_const_mem (rtx);
1184 static rtx ix86_delegitimize_address (rtx);
1185
1186 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1187
1188 struct builtin_description;
1189 static rtx ix86_expand_sse_comi (const struct builtin_description *,
1190 tree, rtx);
1191 static rtx ix86_expand_sse_compare (const struct builtin_description *,
1192 tree, rtx);
1193 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1194 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1195 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1196 static rtx ix86_expand_store_builtin (enum insn_code, tree);
1197 static rtx safe_vector_operand (rtx, enum machine_mode);
1198 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1199 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1200 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1201 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1202 static int ix86_fp_comparison_cost (enum rtx_code code);
1203 static unsigned int ix86_select_alt_pic_regnum (void);
1204 static int ix86_save_reg (unsigned int, int);
1205 static void ix86_compute_frame_layout (struct ix86_frame *);
1206 static int ix86_comp_type_attributes (tree, tree);
1207 static int ix86_function_regparm (tree, tree);
1208 const struct attribute_spec ix86_attribute_table[];
1209 static bool ix86_function_ok_for_sibcall (tree, tree);
1210 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1211 static int ix86_value_regno (enum machine_mode, tree, tree);
1212 static bool contains_128bit_aligned_vector_p (tree);
1213 static rtx ix86_struct_value_rtx (tree, int);
1214 static bool ix86_ms_bitfield_layout_p (tree);
1215 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1216 static int extended_reg_mentioned_1 (rtx *, void *);
1217 static bool ix86_rtx_costs (rtx, int, int, int *);
1218 static int min_insn_size (rtx);
1219 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1220 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1221 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1222 tree, bool);
1223 static void ix86_init_builtins (void);
1224 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1225 static const char *ix86_mangle_fundamental_type (tree);
1226 static tree ix86_stack_protect_fail (void);
1227 static rtx ix86_internal_arg_pointer (void);
1228 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1229
1230 /* This function is only used on Solaris. */
1231 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1232 ATTRIBUTE_UNUSED;
1233
1234 /* Register class used for passing given 64bit part of the argument.
1235 These represent classes as documented by the PS ABI, with the exception
1236 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1237 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1238
1239 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1240 whenever possible (upper half does contain padding).
1241 */
1242 enum x86_64_reg_class
1243 {
1244 X86_64_NO_CLASS,
1245 X86_64_INTEGER_CLASS,
1246 X86_64_INTEGERSI_CLASS,
1247 X86_64_SSE_CLASS,
1248 X86_64_SSESF_CLASS,
1249 X86_64_SSEDF_CLASS,
1250 X86_64_SSEUP_CLASS,
1251 X86_64_X87_CLASS,
1252 X86_64_X87UP_CLASS,
1253 X86_64_COMPLEX_X87_CLASS,
1254 X86_64_MEMORY_CLASS
1255 };
1256 static const char * const x86_64_reg_class_name[] = {
1257 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1258 "sseup", "x87", "x87up", "cplx87", "no"
1259 };
1260
1261 #define MAX_CLASSES 4
1262
1263 /* Table of constants used by fldpi, fldln2, etc.... */
1264 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1265 static bool ext_80387_constants_init = 0;
1266 static void init_ext_80387_constants (void);
1267 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1268 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1269 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1270 static section *x86_64_elf_select_section (tree decl, int reloc,
1271 unsigned HOST_WIDE_INT align)
1272 ATTRIBUTE_UNUSED;
1273 \f
1274 /* Initialize the GCC target structure. */
1275 #undef TARGET_ATTRIBUTE_TABLE
1276 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1277 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1278 # undef TARGET_MERGE_DECL_ATTRIBUTES
1279 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1280 #endif
1281
1282 #undef TARGET_COMP_TYPE_ATTRIBUTES
1283 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1284
1285 #undef TARGET_INIT_BUILTINS
1286 #define TARGET_INIT_BUILTINS ix86_init_builtins
1287 #undef TARGET_EXPAND_BUILTIN
1288 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1289
1290 #undef TARGET_ASM_FUNCTION_EPILOGUE
1291 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1292
1293 #undef TARGET_ENCODE_SECTION_INFO
1294 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1295 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1296 #else
1297 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1298 #endif
1299
1300 #undef TARGET_ASM_OPEN_PAREN
1301 #define TARGET_ASM_OPEN_PAREN ""
1302 #undef TARGET_ASM_CLOSE_PAREN
1303 #define TARGET_ASM_CLOSE_PAREN ""
1304
1305 #undef TARGET_ASM_ALIGNED_HI_OP
1306 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1307 #undef TARGET_ASM_ALIGNED_SI_OP
1308 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1309 #ifdef ASM_QUAD
1310 #undef TARGET_ASM_ALIGNED_DI_OP
1311 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1312 #endif
1313
1314 #undef TARGET_ASM_UNALIGNED_HI_OP
1315 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1316 #undef TARGET_ASM_UNALIGNED_SI_OP
1317 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1318 #undef TARGET_ASM_UNALIGNED_DI_OP
1319 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1320
1321 #undef TARGET_SCHED_ADJUST_COST
1322 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1323 #undef TARGET_SCHED_ISSUE_RATE
1324 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1325 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1326 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1327 ia32_multipass_dfa_lookahead
1328
1329 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1330 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1331
1332 #ifdef HAVE_AS_TLS
1333 #undef TARGET_HAVE_TLS
1334 #define TARGET_HAVE_TLS true
1335 #endif
1336 #undef TARGET_CANNOT_FORCE_CONST_MEM
1337 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1338 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1339 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1340
1341 #undef TARGET_DELEGITIMIZE_ADDRESS
1342 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1343
1344 #undef TARGET_MS_BITFIELD_LAYOUT_P
1345 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1346
1347 #if TARGET_MACHO
1348 #undef TARGET_BINDS_LOCAL_P
1349 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1350 #endif
1351
1352 #undef TARGET_ASM_OUTPUT_MI_THUNK
1353 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1354 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1355 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1356
1357 #undef TARGET_ASM_FILE_START
1358 #define TARGET_ASM_FILE_START x86_file_start
1359
1360 #undef TARGET_DEFAULT_TARGET_FLAGS
1361 #define TARGET_DEFAULT_TARGET_FLAGS \
1362 (TARGET_DEFAULT \
1363 | TARGET_64BIT_DEFAULT \
1364 | TARGET_SUBTARGET_DEFAULT \
1365 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1366
1367 #undef TARGET_HANDLE_OPTION
1368 #define TARGET_HANDLE_OPTION ix86_handle_option
1369
1370 #undef TARGET_RTX_COSTS
1371 #define TARGET_RTX_COSTS ix86_rtx_costs
1372 #undef TARGET_ADDRESS_COST
1373 #define TARGET_ADDRESS_COST ix86_address_cost
1374
1375 #undef TARGET_FIXED_CONDITION_CODE_REGS
1376 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1377 #undef TARGET_CC_MODES_COMPATIBLE
1378 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1379
1380 #undef TARGET_MACHINE_DEPENDENT_REORG
1381 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1382
1383 #undef TARGET_BUILD_BUILTIN_VA_LIST
1384 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1385
1386 #undef TARGET_MD_ASM_CLOBBERS
1387 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1388
1389 #undef TARGET_PROMOTE_PROTOTYPES
1390 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1391 #undef TARGET_STRUCT_VALUE_RTX
1392 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1393 #undef TARGET_SETUP_INCOMING_VARARGS
1394 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1395 #undef TARGET_MUST_PASS_IN_STACK
1396 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1397 #undef TARGET_PASS_BY_REFERENCE
1398 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1399 #undef TARGET_INTERNAL_ARG_POINTER
1400 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1401 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1402 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1403
1404 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1405 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1406
1407 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1408 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1409
1410 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1411 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1412
1413 #ifdef HAVE_AS_TLS
1414 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1415 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1416 #endif
1417
1418 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1419 #undef TARGET_INSERT_ATTRIBUTES
1420 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1421 #endif
1422
1423 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1424 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1425
1426 #undef TARGET_STACK_PROTECT_FAIL
1427 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1428
1429 #undef TARGET_FUNCTION_VALUE
1430 #define TARGET_FUNCTION_VALUE ix86_function_value
1431
1432 struct gcc_target targetm = TARGET_INITIALIZER;
1433
1434 \f
1435 /* The svr4 ABI for the i386 says that records and unions are returned
1436 in memory. */
1437 #ifndef DEFAULT_PCC_STRUCT_RETURN
1438 #define DEFAULT_PCC_STRUCT_RETURN 1
1439 #endif
1440
1441 /* Implement TARGET_HANDLE_OPTION. */
1442
1443 static bool
1444 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1445 {
1446 switch (code)
1447 {
1448 case OPT_m3dnow:
1449 if (!value)
1450 {
1451 target_flags &= ~MASK_3DNOW_A;
1452 target_flags_explicit |= MASK_3DNOW_A;
1453 }
1454 return true;
1455
1456 case OPT_mmmx:
1457 if (!value)
1458 {
1459 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1460 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1461 }
1462 return true;
1463
1464 case OPT_msse:
1465 if (!value)
1466 {
1467 target_flags &= ~(MASK_SSE2 | MASK_SSE3);
1468 target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
1469 }
1470 return true;
1471
1472 case OPT_msse2:
1473 if (!value)
1474 {
1475 target_flags &= ~MASK_SSE3;
1476 target_flags_explicit |= MASK_SSE3;
1477 }
1478 return true;
1479
1480 default:
1481 return true;
1482 }
1483 }
1484
1485 /* Sometimes certain combinations of command options do not make
1486 sense on a particular target machine. You can define a macro
1487 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1488 defined, is executed once just after all the command options have
1489 been parsed.
1490
1491 Don't use this macro to turn on various extra optimizations for
1492 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1493
1494 void
1495 override_options (void)
1496 {
1497 int i;
1498 int ix86_tune_defaulted = 0;
1499
1500 /* Comes from final.c -- no real reason to change it. */
1501 #define MAX_CODE_ALIGN 16
1502
1503 static struct ptt
1504 {
1505 const struct processor_costs *cost; /* Processor costs */
1506 const int target_enable; /* Target flags to enable. */
1507 const int target_disable; /* Target flags to disable. */
1508 const int align_loop; /* Default alignments. */
1509 const int align_loop_max_skip;
1510 const int align_jump;
1511 const int align_jump_max_skip;
1512 const int align_func;
1513 }
1514 const processor_target_table[PROCESSOR_max] =
1515 {
1516 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1517 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1518 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1519 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1520 {&geode_cost, 0, 0, 0, 0, 0, 0, 0},
1521 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1522 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1523 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1524 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1525 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1526 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1527 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16}
1528 };
1529
1530 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1531 static struct pta
1532 {
1533 const char *const name; /* processor name or nickname. */
1534 const enum processor_type processor;
1535 const enum pta_flags
1536 {
1537 PTA_SSE = 1,
1538 PTA_SSE2 = 2,
1539 PTA_SSE3 = 4,
1540 PTA_MMX = 8,
1541 PTA_PREFETCH_SSE = 16,
1542 PTA_3DNOW = 32,
1543 PTA_3DNOW_A = 64,
1544 PTA_64BIT = 128,
1545 PTA_SSSE3 = 256
1546 } flags;
1547 }
1548 const processor_alias_table[] =
1549 {
1550 {"i386", PROCESSOR_I386, 0},
1551 {"i486", PROCESSOR_I486, 0},
1552 {"i586", PROCESSOR_PENTIUM, 0},
1553 {"pentium", PROCESSOR_PENTIUM, 0},
1554 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1555 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1556 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1557 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1558 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1559 {"i686", PROCESSOR_PENTIUMPRO, 0},
1560 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1561 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1562 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1563 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1564 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1565 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1566 | PTA_MMX | PTA_PREFETCH_SSE},
1567 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1568 | PTA_MMX | PTA_PREFETCH_SSE},
1569 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1570 | PTA_MMX | PTA_PREFETCH_SSE},
1571 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1572 | PTA_MMX | PTA_PREFETCH_SSE},
1573 {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1574 | PTA_3DNOW_A},
1575 {"k6", PROCESSOR_K6, PTA_MMX},
1576 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1577 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1578 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1579 | PTA_3DNOW_A},
1580 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1581 | PTA_3DNOW | PTA_3DNOW_A},
1582 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1583 | PTA_3DNOW_A | PTA_SSE},
1584 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1585 | PTA_3DNOW_A | PTA_SSE},
1586 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1587 | PTA_3DNOW_A | PTA_SSE},
1588 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1589 | PTA_SSE | PTA_SSE2 },
1590 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1591 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1592 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1593 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1594 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1595 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1596 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1597 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1598 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1599 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1600 };
1601
1602 int const pta_size = ARRAY_SIZE (processor_alias_table);
1603
1604 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1605 SUBTARGET_OVERRIDE_OPTIONS;
1606 #endif
1607
1608 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1609 SUBSUBTARGET_OVERRIDE_OPTIONS;
1610 #endif
1611
1612 /* -fPIC is the default for x86_64. */
1613 if (TARGET_MACHO && TARGET_64BIT)
1614 flag_pic = 2;
1615
1616 /* Set the default values for switches whose default depends on TARGET_64BIT
1617 in case they weren't overwritten by command line options. */
1618 if (TARGET_64BIT)
1619 {
1620 /* Mach-O doesn't support omitting the frame pointer for now. */
1621 if (flag_omit_frame_pointer == 2)
1622 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1623 if (flag_asynchronous_unwind_tables == 2)
1624 flag_asynchronous_unwind_tables = 1;
1625 if (flag_pcc_struct_return == 2)
1626 flag_pcc_struct_return = 0;
1627 }
1628 else
1629 {
1630 if (flag_omit_frame_pointer == 2)
1631 flag_omit_frame_pointer = 0;
1632 if (flag_asynchronous_unwind_tables == 2)
1633 flag_asynchronous_unwind_tables = 0;
1634 if (flag_pcc_struct_return == 2)
1635 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1636 }
1637
1638 /* Need to check -mtune=generic first. */
1639 if (ix86_tune_string)
1640 {
1641 if (!strcmp (ix86_tune_string, "generic")
1642 || !strcmp (ix86_tune_string, "i686")
1643 /* As special support for cross compilers we read -mtune=native
1644 as -mtune=generic. With native compilers we won't see the
1645 -mtune=native, as it was changed by the driver. */
1646 || !strcmp (ix86_tune_string, "native"))
1647 {
1648 if (TARGET_64BIT)
1649 ix86_tune_string = "generic64";
1650 else
1651 ix86_tune_string = "generic32";
1652 }
1653 else if (!strncmp (ix86_tune_string, "generic", 7))
1654 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1655 }
1656 else
1657 {
1658 if (ix86_arch_string)
1659 ix86_tune_string = ix86_arch_string;
1660 if (!ix86_tune_string)
1661 {
1662 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1663 ix86_tune_defaulted = 1;
1664 }
1665
1666 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1667 need to use a sensible tune option. */
1668 if (!strcmp (ix86_tune_string, "generic")
1669 || !strcmp (ix86_tune_string, "x86-64")
1670 || !strcmp (ix86_tune_string, "i686"))
1671 {
1672 if (TARGET_64BIT)
1673 ix86_tune_string = "generic64";
1674 else
1675 ix86_tune_string = "generic32";
1676 }
1677 }
1678 if (!strcmp (ix86_tune_string, "x86-64"))
1679 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1680 "-mtune=generic instead as appropriate.");
1681
1682 if (!ix86_arch_string)
1683 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1684 if (!strcmp (ix86_arch_string, "generic"))
1685 error ("generic CPU can be used only for -mtune= switch");
1686 if (!strncmp (ix86_arch_string, "generic", 7))
1687 error ("bad value (%s) for -march= switch", ix86_arch_string);
1688
1689 if (ix86_cmodel_string != 0)
1690 {
1691 if (!strcmp (ix86_cmodel_string, "small"))
1692 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1693 else if (!strcmp (ix86_cmodel_string, "medium"))
1694 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1695 else if (flag_pic)
1696 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1697 else if (!strcmp (ix86_cmodel_string, "32"))
1698 ix86_cmodel = CM_32;
1699 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1700 ix86_cmodel = CM_KERNEL;
1701 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1702 ix86_cmodel = CM_LARGE;
1703 else
1704 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1705 }
1706 else
1707 {
1708 ix86_cmodel = CM_32;
1709 if (TARGET_64BIT)
1710 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1711 }
1712 if (ix86_asm_string != 0)
1713 {
1714 if (! TARGET_MACHO
1715 && !strcmp (ix86_asm_string, "intel"))
1716 ix86_asm_dialect = ASM_INTEL;
1717 else if (!strcmp (ix86_asm_string, "att"))
1718 ix86_asm_dialect = ASM_ATT;
1719 else
1720 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1721 }
1722 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1723 error ("code model %qs not supported in the %s bit mode",
1724 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1725 if (ix86_cmodel == CM_LARGE)
1726 sorry ("code model %<large%> not supported yet");
1727 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1728 sorry ("%i-bit mode not compiled in",
1729 (target_flags & MASK_64BIT) ? 64 : 32);
1730
1731 for (i = 0; i < pta_size; i++)
1732 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1733 {
1734 ix86_arch = processor_alias_table[i].processor;
1735 /* Default cpu tuning to the architecture. */
1736 ix86_tune = ix86_arch;
1737 if (processor_alias_table[i].flags & PTA_MMX
1738 && !(target_flags_explicit & MASK_MMX))
1739 target_flags |= MASK_MMX;
1740 if (processor_alias_table[i].flags & PTA_3DNOW
1741 && !(target_flags_explicit & MASK_3DNOW))
1742 target_flags |= MASK_3DNOW;
1743 if (processor_alias_table[i].flags & PTA_3DNOW_A
1744 && !(target_flags_explicit & MASK_3DNOW_A))
1745 target_flags |= MASK_3DNOW_A;
1746 if (processor_alias_table[i].flags & PTA_SSE
1747 && !(target_flags_explicit & MASK_SSE))
1748 target_flags |= MASK_SSE;
1749 if (processor_alias_table[i].flags & PTA_SSE2
1750 && !(target_flags_explicit & MASK_SSE2))
1751 target_flags |= MASK_SSE2;
1752 if (processor_alias_table[i].flags & PTA_SSE3
1753 && !(target_flags_explicit & MASK_SSE3))
1754 target_flags |= MASK_SSE3;
1755 if (processor_alias_table[i].flags & PTA_SSSE3
1756 && !(target_flags_explicit & MASK_SSSE3))
1757 target_flags |= MASK_SSSE3;
1758 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1759 x86_prefetch_sse = true;
1760 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1761 error ("CPU you selected does not support x86-64 "
1762 "instruction set");
1763 break;
1764 }
1765
1766 if (i == pta_size)
1767 error ("bad value (%s) for -march= switch", ix86_arch_string);
1768
1769 for (i = 0; i < pta_size; i++)
1770 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1771 {
1772 ix86_tune = processor_alias_table[i].processor;
1773 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1774 {
1775 if (ix86_tune_defaulted)
1776 {
1777 ix86_tune_string = "x86-64";
1778 for (i = 0; i < pta_size; i++)
1779 if (! strcmp (ix86_tune_string,
1780 processor_alias_table[i].name))
1781 break;
1782 ix86_tune = processor_alias_table[i].processor;
1783 }
1784 else
1785 error ("CPU you selected does not support x86-64 "
1786 "instruction set");
1787 }
1788 /* Intel CPUs have always interpreted SSE prefetch instructions as
1789 NOPs; so, we can enable SSE prefetch instructions even when
1790 -mtune (rather than -march) points us to a processor that has them.
1791 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1792 higher processors. */
1793 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1794 x86_prefetch_sse = true;
1795 break;
1796 }
1797 if (i == pta_size)
1798 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1799
1800 if (optimize_size)
1801 ix86_cost = &size_cost;
1802 else
1803 ix86_cost = processor_target_table[ix86_tune].cost;
1804 target_flags |= processor_target_table[ix86_tune].target_enable;
1805 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1806
1807 /* Arrange to set up i386_stack_locals for all functions. */
1808 init_machine_status = ix86_init_machine_status;
1809
1810 /* Validate -mregparm= value. */
1811 if (ix86_regparm_string)
1812 {
1813 i = atoi (ix86_regparm_string);
1814 if (i < 0 || i > REGPARM_MAX)
1815 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1816 else
1817 ix86_regparm = i;
1818 }
1819 else
1820 if (TARGET_64BIT)
1821 ix86_regparm = REGPARM_MAX;
1822
1823 /* If the user has provided any of the -malign-* options,
1824 warn and use that value only if -falign-* is not set.
1825 Remove this code in GCC 3.2 or later. */
1826 if (ix86_align_loops_string)
1827 {
1828 warning (0, "-malign-loops is obsolete, use -falign-loops");
1829 if (align_loops == 0)
1830 {
1831 i = atoi (ix86_align_loops_string);
1832 if (i < 0 || i > MAX_CODE_ALIGN)
1833 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1834 else
1835 align_loops = 1 << i;
1836 }
1837 }
1838
1839 if (ix86_align_jumps_string)
1840 {
1841 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1842 if (align_jumps == 0)
1843 {
1844 i = atoi (ix86_align_jumps_string);
1845 if (i < 0 || i > MAX_CODE_ALIGN)
1846 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1847 else
1848 align_jumps = 1 << i;
1849 }
1850 }
1851
1852 if (ix86_align_funcs_string)
1853 {
1854 warning (0, "-malign-functions is obsolete, use -falign-functions");
1855 if (align_functions == 0)
1856 {
1857 i = atoi (ix86_align_funcs_string);
1858 if (i < 0 || i > MAX_CODE_ALIGN)
1859 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1860 else
1861 align_functions = 1 << i;
1862 }
1863 }
1864
1865 /* Default align_* from the processor table. */
1866 if (align_loops == 0)
1867 {
1868 align_loops = processor_target_table[ix86_tune].align_loop;
1869 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1870 }
1871 if (align_jumps == 0)
1872 {
1873 align_jumps = processor_target_table[ix86_tune].align_jump;
1874 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1875 }
1876 if (align_functions == 0)
1877 {
1878 align_functions = processor_target_table[ix86_tune].align_func;
1879 }
1880
1881 /* Validate -mbranch-cost= value, or provide default. */
1882 ix86_branch_cost = ix86_cost->branch_cost;
1883 if (ix86_branch_cost_string)
1884 {
1885 i = atoi (ix86_branch_cost_string);
1886 if (i < 0 || i > 5)
1887 error ("-mbranch-cost=%d is not between 0 and 5", i);
1888 else
1889 ix86_branch_cost = i;
1890 }
1891 if (ix86_section_threshold_string)
1892 {
1893 i = atoi (ix86_section_threshold_string);
1894 if (i < 0)
1895 error ("-mlarge-data-threshold=%d is negative", i);
1896 else
1897 ix86_section_threshold = i;
1898 }
1899
1900 if (ix86_tls_dialect_string)
1901 {
1902 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1903 ix86_tls_dialect = TLS_DIALECT_GNU;
1904 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
1905 ix86_tls_dialect = TLS_DIALECT_GNU2;
1906 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1907 ix86_tls_dialect = TLS_DIALECT_SUN;
1908 else
1909 error ("bad value (%s) for -mtls-dialect= switch",
1910 ix86_tls_dialect_string);
1911 }
1912
1913 /* Keep nonleaf frame pointers. */
1914 if (flag_omit_frame_pointer)
1915 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1916 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1917 flag_omit_frame_pointer = 1;
1918
1919 /* If we're doing fast math, we don't care about comparison order
1920 wrt NaNs. This lets us use a shorter comparison sequence. */
1921 if (flag_finite_math_only)
1922 target_flags &= ~MASK_IEEE_FP;
1923
1924 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1925 since the insns won't need emulation. */
1926 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1927 target_flags &= ~MASK_NO_FANCY_MATH_387;
1928
1929 /* Likewise, if the target doesn't have a 387, or we've specified
1930 software floating point, don't use 387 inline intrinsics. */
1931 if (!TARGET_80387)
1932 target_flags |= MASK_NO_FANCY_MATH_387;
1933
1934 /* Turn on SSE3 builtins for -mssse3. */
1935 if (TARGET_SSSE3)
1936 target_flags |= MASK_SSE3;
1937
1938 /* Turn on SSE2 builtins for -msse3. */
1939 if (TARGET_SSE3)
1940 target_flags |= MASK_SSE2;
1941
1942 /* Turn on SSE builtins for -msse2. */
1943 if (TARGET_SSE2)
1944 target_flags |= MASK_SSE;
1945
1946 /* Turn on MMX builtins for -msse. */
1947 if (TARGET_SSE)
1948 {
1949 target_flags |= MASK_MMX & ~target_flags_explicit;
1950 x86_prefetch_sse = true;
1951 }
1952
1953 /* Turn on MMX builtins for 3Dnow. */
1954 if (TARGET_3DNOW)
1955 target_flags |= MASK_MMX;
1956
1957 if (TARGET_64BIT)
1958 {
1959 if (TARGET_ALIGN_DOUBLE)
1960 error ("-malign-double makes no sense in the 64bit mode");
1961 if (TARGET_RTD)
1962 error ("-mrtd calling convention not supported in the 64bit mode");
1963
1964 /* Enable by default the SSE and MMX builtins. Do allow the user to
1965 explicitly disable any of these. In particular, disabling SSE and
1966 MMX for kernel code is extremely useful. */
1967 target_flags
1968 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1969 & ~target_flags_explicit);
1970 }
1971 else
1972 {
1973 /* i386 ABI does not specify red zone. It still makes sense to use it
1974 when programmer takes care to stack from being destroyed. */
1975 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1976 target_flags |= MASK_NO_RED_ZONE;
1977 }
1978
1979 /* Validate -mpreferred-stack-boundary= value, or provide default.
1980 The default of 128 bits is for Pentium III's SSE __m128. We can't
1981 change it because of optimize_size. Otherwise, we can't mix object
1982 files compiled with -Os and -On. */
1983 ix86_preferred_stack_boundary = 128;
1984 if (ix86_preferred_stack_boundary_string)
1985 {
1986 i = atoi (ix86_preferred_stack_boundary_string);
1987 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1988 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1989 TARGET_64BIT ? 4 : 2);
1990 else
1991 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1992 }
1993
1994 /* Accept -msseregparm only if at least SSE support is enabled. */
1995 if (TARGET_SSEREGPARM
1996 && ! TARGET_SSE)
1997 error ("-msseregparm used without SSE enabled");
1998
1999 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2000
2001 if (ix86_fpmath_string != 0)
2002 {
2003 if (! strcmp (ix86_fpmath_string, "387"))
2004 ix86_fpmath = FPMATH_387;
2005 else if (! strcmp (ix86_fpmath_string, "sse"))
2006 {
2007 if (!TARGET_SSE)
2008 {
2009 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2010 ix86_fpmath = FPMATH_387;
2011 }
2012 else
2013 ix86_fpmath = FPMATH_SSE;
2014 }
2015 else if (! strcmp (ix86_fpmath_string, "387,sse")
2016 || ! strcmp (ix86_fpmath_string, "sse,387"))
2017 {
2018 if (!TARGET_SSE)
2019 {
2020 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2021 ix86_fpmath = FPMATH_387;
2022 }
2023 else if (!TARGET_80387)
2024 {
2025 warning (0, "387 instruction set disabled, using SSE arithmetics");
2026 ix86_fpmath = FPMATH_SSE;
2027 }
2028 else
2029 ix86_fpmath = FPMATH_SSE | FPMATH_387;
2030 }
2031 else
2032 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2033 }
2034
2035 /* If the i387 is disabled, then do not return values in it. */
2036 if (!TARGET_80387)
2037 target_flags &= ~MASK_FLOAT_RETURNS;
2038
2039 if ((x86_accumulate_outgoing_args & TUNEMASK)
2040 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2041 && !optimize_size)
2042 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2043
2044 /* ??? Unwind info is not correct around the CFG unless either a frame
2045 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2046 unwind info generation to be aware of the CFG and propagating states
2047 around edges. */
2048 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2049 || flag_exceptions || flag_non_call_exceptions)
2050 && flag_omit_frame_pointer
2051 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2052 {
2053 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2054 warning (0, "unwind tables currently require either a frame pointer "
2055 "or -maccumulate-outgoing-args for correctness");
2056 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2057 }
2058
2059 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2060 {
2061 char *p;
2062 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2063 p = strchr (internal_label_prefix, 'X');
2064 internal_label_prefix_len = p - internal_label_prefix;
2065 *p = '\0';
2066 }
2067
2068 /* When scheduling description is not available, disable scheduler pass
2069 so it won't slow down the compilation and make x87 code slower. */
2070 if (!TARGET_SCHEDULE)
2071 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2072
2073 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2074 set_param_value ("simultaneous-prefetches",
2075 ix86_cost->simultaneous_prefetches);
2076 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2077 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2078 }
2079 \f
2080 /* switch to the appropriate section for output of DECL.
2081 DECL is either a `VAR_DECL' node or a constant of some sort.
2082 RELOC indicates whether forming the initial value of DECL requires
2083 link-time relocations. */
2084
2085 static section *
2086 x86_64_elf_select_section (tree decl, int reloc,
2087 unsigned HOST_WIDE_INT align)
2088 {
2089 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2090 && ix86_in_large_data_p (decl))
2091 {
2092 const char *sname = NULL;
2093 unsigned int flags = SECTION_WRITE;
2094 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2095 {
2096 case SECCAT_DATA:
2097 sname = ".ldata";
2098 break;
2099 case SECCAT_DATA_REL:
2100 sname = ".ldata.rel";
2101 break;
2102 case SECCAT_DATA_REL_LOCAL:
2103 sname = ".ldata.rel.local";
2104 break;
2105 case SECCAT_DATA_REL_RO:
2106 sname = ".ldata.rel.ro";
2107 break;
2108 case SECCAT_DATA_REL_RO_LOCAL:
2109 sname = ".ldata.rel.ro.local";
2110 break;
2111 case SECCAT_BSS:
2112 sname = ".lbss";
2113 flags |= SECTION_BSS;
2114 break;
2115 case SECCAT_RODATA:
2116 case SECCAT_RODATA_MERGE_STR:
2117 case SECCAT_RODATA_MERGE_STR_INIT:
2118 case SECCAT_RODATA_MERGE_CONST:
2119 sname = ".lrodata";
2120 flags = 0;
2121 break;
2122 case SECCAT_SRODATA:
2123 case SECCAT_SDATA:
2124 case SECCAT_SBSS:
2125 gcc_unreachable ();
2126 case SECCAT_TEXT:
2127 case SECCAT_TDATA:
2128 case SECCAT_TBSS:
2129 /* We don't split these for medium model. Place them into
2130 default sections and hope for best. */
2131 break;
2132 }
2133 if (sname)
2134 {
2135 /* We might get called with string constants, but get_named_section
2136 doesn't like them as they are not DECLs. Also, we need to set
2137 flags in that case. */
2138 if (!DECL_P (decl))
2139 return get_section (sname, flags, NULL);
2140 return get_named_section (decl, sname, reloc);
2141 }
2142 }
2143 return default_elf_select_section (decl, reloc, align);
2144 }
2145
2146 /* Build up a unique section name, expressed as a
2147 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2148 RELOC indicates whether the initial value of EXP requires
2149 link-time relocations. */
2150
2151 static void
2152 x86_64_elf_unique_section (tree decl, int reloc)
2153 {
2154 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2155 && ix86_in_large_data_p (decl))
2156 {
2157 const char *prefix = NULL;
2158 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2159 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2160
2161 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2162 {
2163 case SECCAT_DATA:
2164 case SECCAT_DATA_REL:
2165 case SECCAT_DATA_REL_LOCAL:
2166 case SECCAT_DATA_REL_RO:
2167 case SECCAT_DATA_REL_RO_LOCAL:
2168 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2169 break;
2170 case SECCAT_BSS:
2171 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2172 break;
2173 case SECCAT_RODATA:
2174 case SECCAT_RODATA_MERGE_STR:
2175 case SECCAT_RODATA_MERGE_STR_INIT:
2176 case SECCAT_RODATA_MERGE_CONST:
2177 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2178 break;
2179 case SECCAT_SRODATA:
2180 case SECCAT_SDATA:
2181 case SECCAT_SBSS:
2182 gcc_unreachable ();
2183 case SECCAT_TEXT:
2184 case SECCAT_TDATA:
2185 case SECCAT_TBSS:
2186 /* We don't split these for medium model. Place them into
2187 default sections and hope for best. */
2188 break;
2189 }
2190 if (prefix)
2191 {
2192 const char *name;
2193 size_t nlen, plen;
2194 char *string;
2195 plen = strlen (prefix);
2196
2197 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2198 name = targetm.strip_name_encoding (name);
2199 nlen = strlen (name);
2200
2201 string = alloca (nlen + plen + 1);
2202 memcpy (string, prefix, plen);
2203 memcpy (string + plen, name, nlen + 1);
2204
2205 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2206 return;
2207 }
2208 }
2209 default_unique_section (decl, reloc);
2210 }
2211
2212 #ifdef COMMON_ASM_OP
2213 /* This says how to output assembler code to declare an
2214 uninitialized external linkage data object.
2215
2216 For medium model x86-64 we need to use .largecomm opcode for
2217 large objects. */
2218 void
2219 x86_elf_aligned_common (FILE *file,
2220 const char *name, unsigned HOST_WIDE_INT size,
2221 int align)
2222 {
2223 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2224 && size > (unsigned int)ix86_section_threshold)
2225 fprintf (file, ".largecomm\t");
2226 else
2227 fprintf (file, "%s", COMMON_ASM_OP);
2228 assemble_name (file, name);
2229 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2230 size, align / BITS_PER_UNIT);
2231 }
2232
2233 /* Utility function for targets to use in implementing
2234 ASM_OUTPUT_ALIGNED_BSS. */
2235
2236 void
2237 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2238 const char *name, unsigned HOST_WIDE_INT size,
2239 int align)
2240 {
2241 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2242 && size > (unsigned int)ix86_section_threshold)
2243 switch_to_section (get_named_section (decl, ".lbss", 0));
2244 else
2245 switch_to_section (bss_section);
2246 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2247 #ifdef ASM_DECLARE_OBJECT_NAME
2248 last_assemble_variable_decl = decl;
2249 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2250 #else
2251 /* Standard thing is just output label for the object. */
2252 ASM_OUTPUT_LABEL (file, name);
2253 #endif /* ASM_DECLARE_OBJECT_NAME */
2254 ASM_OUTPUT_SKIP (file, size ? size : 1);
2255 }
2256 #endif
2257 \f
2258 void
2259 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2260 {
2261 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2262 make the problem with not enough registers even worse. */
2263 #ifdef INSN_SCHEDULING
2264 if (level > 1)
2265 flag_schedule_insns = 0;
2266 #endif
2267
2268 if (TARGET_MACHO)
2269 /* The Darwin libraries never set errno, so we might as well
2270 avoid calling them when that's the only reason we would. */
2271 flag_errno_math = 0;
2272
2273 /* The default values of these switches depend on the TARGET_64BIT
2274 that is not known at this moment. Mark these values with 2 and
2275 let user the to override these. In case there is no command line option
2276 specifying them, we will set the defaults in override_options. */
2277 if (optimize >= 1)
2278 flag_omit_frame_pointer = 2;
2279 flag_pcc_struct_return = 2;
2280 flag_asynchronous_unwind_tables = 2;
2281 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2282 SUBTARGET_OPTIMIZATION_OPTIONS;
2283 #endif
2284 }
2285 \f
2286 /* Table of valid machine attributes. */
2287 const struct attribute_spec ix86_attribute_table[] =
2288 {
2289 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2290 /* Stdcall attribute says callee is responsible for popping arguments
2291 if they are not variable. */
2292 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2293 /* Fastcall attribute says callee is responsible for popping arguments
2294 if they are not variable. */
2295 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2296 /* Cdecl attribute says the callee is a normal C declaration */
2297 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2298 /* Regparm attribute specifies how many integer arguments are to be
2299 passed in registers. */
2300 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2301 /* Sseregparm attribute says we are using x86_64 calling conventions
2302 for FP arguments. */
2303 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2304 /* force_align_arg_pointer says this function realigns the stack at entry. */
2305 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2306 false, true, true, ix86_handle_cconv_attribute },
2307 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2308 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2309 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2310 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2311 #endif
2312 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2313 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2314 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2315 SUBTARGET_ATTRIBUTE_TABLE,
2316 #endif
2317 { NULL, 0, 0, false, false, false, NULL }
2318 };
2319
2320 /* Decide whether we can make a sibling call to a function. DECL is the
2321 declaration of the function being targeted by the call and EXP is the
2322 CALL_EXPR representing the call. */
2323
2324 static bool
2325 ix86_function_ok_for_sibcall (tree decl, tree exp)
2326 {
2327 tree func;
2328 rtx a, b;
2329
2330 /* If we are generating position-independent code, we cannot sibcall
2331 optimize any indirect call, or a direct call to a global function,
2332 as the PLT requires %ebx be live. */
2333 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2334 return false;
2335
2336 if (decl)
2337 func = decl;
2338 else
2339 {
2340 func = TREE_TYPE (TREE_OPERAND (exp, 0));
2341 if (POINTER_TYPE_P (func))
2342 func = TREE_TYPE (func);
2343 }
2344
2345 /* Check that the return value locations are the same. Like
2346 if we are returning floats on the 80387 register stack, we cannot
2347 make a sibcall from a function that doesn't return a float to a
2348 function that does or, conversely, from a function that does return
2349 a float to a function that doesn't; the necessary stack adjustment
2350 would not be executed. This is also the place we notice
2351 differences in the return value ABI. Note that it is ok for one
2352 of the functions to have void return type as long as the return
2353 value of the other is passed in a register. */
2354 a = ix86_function_value (TREE_TYPE (exp), func, false);
2355 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2356 cfun->decl, false);
2357 if (STACK_REG_P (a) || STACK_REG_P (b))
2358 {
2359 if (!rtx_equal_p (a, b))
2360 return false;
2361 }
2362 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2363 ;
2364 else if (!rtx_equal_p (a, b))
2365 return false;
2366
2367 /* If this call is indirect, we'll need to be able to use a call-clobbered
2368 register for the address of the target function. Make sure that all
2369 such registers are not used for passing parameters. */
2370 if (!decl && !TARGET_64BIT)
2371 {
2372 tree type;
2373
2374 /* We're looking at the CALL_EXPR, we need the type of the function. */
2375 type = TREE_OPERAND (exp, 0); /* pointer expression */
2376 type = TREE_TYPE (type); /* pointer type */
2377 type = TREE_TYPE (type); /* function type */
2378
2379 if (ix86_function_regparm (type, NULL) >= 3)
2380 {
2381 /* ??? Need to count the actual number of registers to be used,
2382 not the possible number of registers. Fix later. */
2383 return false;
2384 }
2385 }
2386
2387 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2388 /* Dllimport'd functions are also called indirectly. */
2389 if (decl && DECL_DLLIMPORT_P (decl)
2390 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2391 return false;
2392 #endif
2393
2394 /* If we forced aligned the stack, then sibcalling would unalign the
2395 stack, which may break the called function. */
2396 if (cfun->machine->force_align_arg_pointer)
2397 return false;
2398
2399 /* Otherwise okay. That also includes certain types of indirect calls. */
2400 return true;
2401 }
2402
2403 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2404 calling convention attributes;
2405 arguments as in struct attribute_spec.handler. */
2406
2407 static tree
2408 ix86_handle_cconv_attribute (tree *node, tree name,
2409 tree args,
2410 int flags ATTRIBUTE_UNUSED,
2411 bool *no_add_attrs)
2412 {
2413 if (TREE_CODE (*node) != FUNCTION_TYPE
2414 && TREE_CODE (*node) != METHOD_TYPE
2415 && TREE_CODE (*node) != FIELD_DECL
2416 && TREE_CODE (*node) != TYPE_DECL)
2417 {
2418 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2419 IDENTIFIER_POINTER (name));
2420 *no_add_attrs = true;
2421 return NULL_TREE;
2422 }
2423
2424 /* Can combine regparm with all attributes but fastcall. */
2425 if (is_attribute_p ("regparm", name))
2426 {
2427 tree cst;
2428
2429 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2430 {
2431 error ("fastcall and regparm attributes are not compatible");
2432 }
2433
2434 cst = TREE_VALUE (args);
2435 if (TREE_CODE (cst) != INTEGER_CST)
2436 {
2437 warning (OPT_Wattributes,
2438 "%qs attribute requires an integer constant argument",
2439 IDENTIFIER_POINTER (name));
2440 *no_add_attrs = true;
2441 }
2442 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2443 {
2444 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2445 IDENTIFIER_POINTER (name), REGPARM_MAX);
2446 *no_add_attrs = true;
2447 }
2448
2449 if (!TARGET_64BIT
2450 && lookup_attribute (ix86_force_align_arg_pointer_string,
2451 TYPE_ATTRIBUTES (*node))
2452 && compare_tree_int (cst, REGPARM_MAX-1))
2453 {
2454 error ("%s functions limited to %d register parameters",
2455 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2456 }
2457
2458 return NULL_TREE;
2459 }
2460
2461 if (TARGET_64BIT)
2462 {
2463 warning (OPT_Wattributes, "%qs attribute ignored",
2464 IDENTIFIER_POINTER (name));
2465 *no_add_attrs = true;
2466 return NULL_TREE;
2467 }
2468
2469 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2470 if (is_attribute_p ("fastcall", name))
2471 {
2472 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2473 {
2474 error ("fastcall and cdecl attributes are not compatible");
2475 }
2476 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2477 {
2478 error ("fastcall and stdcall attributes are not compatible");
2479 }
2480 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2481 {
2482 error ("fastcall and regparm attributes are not compatible");
2483 }
2484 }
2485
2486 /* Can combine stdcall with fastcall (redundant), regparm and
2487 sseregparm. */
2488 else if (is_attribute_p ("stdcall", name))
2489 {
2490 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2491 {
2492 error ("stdcall and cdecl attributes are not compatible");
2493 }
2494 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2495 {
2496 error ("stdcall and fastcall attributes are not compatible");
2497 }
2498 }
2499
2500 /* Can combine cdecl with regparm and sseregparm. */
2501 else if (is_attribute_p ("cdecl", name))
2502 {
2503 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2504 {
2505 error ("stdcall and cdecl attributes are not compatible");
2506 }
2507 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2508 {
2509 error ("fastcall and cdecl attributes are not compatible");
2510 }
2511 }
2512
2513 /* Can combine sseregparm with all attributes. */
2514
2515 return NULL_TREE;
2516 }
2517
2518 /* Return 0 if the attributes for two types are incompatible, 1 if they
2519 are compatible, and 2 if they are nearly compatible (which causes a
2520 warning to be generated). */
2521
2522 static int
2523 ix86_comp_type_attributes (tree type1, tree type2)
2524 {
2525 /* Check for mismatch of non-default calling convention. */
2526 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2527
2528 if (TREE_CODE (type1) != FUNCTION_TYPE)
2529 return 1;
2530
2531 /* Check for mismatched fastcall/regparm types. */
2532 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2533 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2534 || (ix86_function_regparm (type1, NULL)
2535 != ix86_function_regparm (type2, NULL)))
2536 return 0;
2537
2538 /* Check for mismatched sseregparm types. */
2539 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2540 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2541 return 0;
2542
2543 /* Check for mismatched return types (cdecl vs stdcall). */
2544 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2545 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2546 return 0;
2547
2548 return 1;
2549 }
2550 \f
2551 /* Return the regparm value for a function with the indicated TYPE and DECL.
2552 DECL may be NULL when calling function indirectly
2553 or considering a libcall. */
2554
2555 static int
2556 ix86_function_regparm (tree type, tree decl)
2557 {
2558 tree attr;
2559 int regparm = ix86_regparm;
2560 bool user_convention = false;
2561
2562 if (!TARGET_64BIT)
2563 {
2564 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2565 if (attr)
2566 {
2567 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2568 user_convention = true;
2569 }
2570
2571 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2572 {
2573 regparm = 2;
2574 user_convention = true;
2575 }
2576
2577 /* Use register calling convention for local functions when possible. */
2578 if (!TARGET_64BIT && !user_convention && decl
2579 && flag_unit_at_a_time && !profile_flag)
2580 {
2581 struct cgraph_local_info *i = cgraph_local_info (decl);
2582 if (i && i->local)
2583 {
2584 int local_regparm, globals = 0, regno;
2585
2586 /* Make sure no regparm register is taken by a global register
2587 variable. */
2588 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2589 if (global_regs[local_regparm])
2590 break;
2591 /* We can't use regparm(3) for nested functions as these use
2592 static chain pointer in third argument. */
2593 if (local_regparm == 3
2594 && decl_function_context (decl)
2595 && !DECL_NO_STATIC_CHAIN (decl))
2596 local_regparm = 2;
2597 /* If the function realigns its stackpointer, the
2598 prologue will clobber %ecx. If we've already
2599 generated code for the callee, the callee
2600 DECL_STRUCT_FUNCTION is gone, so we fall back to
2601 scanning the attributes for the self-realigning
2602 property. */
2603 if ((DECL_STRUCT_FUNCTION (decl)
2604 && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
2605 || (!DECL_STRUCT_FUNCTION (decl)
2606 && lookup_attribute (ix86_force_align_arg_pointer_string,
2607 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2608 local_regparm = 2;
2609 /* Each global register variable increases register preassure,
2610 so the more global reg vars there are, the smaller regparm
2611 optimization use, unless requested by the user explicitly. */
2612 for (regno = 0; regno < 6; regno++)
2613 if (global_regs[regno])
2614 globals++;
2615 local_regparm
2616 = globals < local_regparm ? local_regparm - globals : 0;
2617
2618 if (local_regparm > regparm)
2619 regparm = local_regparm;
2620 }
2621 }
2622 }
2623 return regparm;
2624 }
2625
2626 /* Return 1 or 2, if we can pass up to 8 SFmode (1) and DFmode (2) arguments
2627 in SSE registers for a function with the indicated TYPE and DECL.
2628 DECL may be NULL when calling function indirectly
2629 or considering a libcall. Otherwise return 0. */
2630
2631 static int
2632 ix86_function_sseregparm (tree type, tree decl)
2633 {
2634 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2635 by the sseregparm attribute. */
2636 if (TARGET_SSEREGPARM
2637 || (type
2638 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2639 {
2640 if (!TARGET_SSE)
2641 {
2642 if (decl)
2643 error ("Calling %qD with attribute sseregparm without "
2644 "SSE/SSE2 enabled", decl);
2645 else
2646 error ("Calling %qT with attribute sseregparm without "
2647 "SSE/SSE2 enabled", type);
2648 return 0;
2649 }
2650
2651 return 2;
2652 }
2653
2654 /* For local functions, pass SFmode (and DFmode for SSE2) arguments
2655 in SSE registers even for 32-bit mode and not just 3, but up to
2656 8 SSE arguments in registers. */
2657 if (!TARGET_64BIT && decl
2658 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2659 {
2660 struct cgraph_local_info *i = cgraph_local_info (decl);
2661 if (i && i->local)
2662 return TARGET_SSE2 ? 2 : 1;
2663 }
2664
2665 return 0;
2666 }
2667
2668 /* Return true if EAX is live at the start of the function. Used by
2669 ix86_expand_prologue to determine if we need special help before
2670 calling allocate_stack_worker. */
2671
2672 static bool
2673 ix86_eax_live_at_start_p (void)
2674 {
2675 /* Cheat. Don't bother working forward from ix86_function_regparm
2676 to the function type to whether an actual argument is located in
2677 eax. Instead just look at cfg info, which is still close enough
2678 to correct at this point. This gives false positives for broken
2679 functions that might use uninitialized data that happens to be
2680 allocated in eax, but who cares? */
2681 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2682 }
2683
2684 /* Value is the number of bytes of arguments automatically
2685 popped when returning from a subroutine call.
2686 FUNDECL is the declaration node of the function (as a tree),
2687 FUNTYPE is the data type of the function (as a tree),
2688 or for a library call it is an identifier node for the subroutine name.
2689 SIZE is the number of bytes of arguments passed on the stack.
2690
2691 On the 80386, the RTD insn may be used to pop them if the number
2692 of args is fixed, but if the number is variable then the caller
2693 must pop them all. RTD can't be used for library calls now
2694 because the library is compiled with the Unix compiler.
2695 Use of RTD is a selectable option, since it is incompatible with
2696 standard Unix calling sequences. If the option is not selected,
2697 the caller must always pop the args.
2698
2699 The attribute stdcall is equivalent to RTD on a per module basis. */
2700
2701 int
2702 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2703 {
2704 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2705
2706 /* Cdecl functions override -mrtd, and never pop the stack. */
2707 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2708
2709 /* Stdcall and fastcall functions will pop the stack if not
2710 variable args. */
2711 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2712 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2713 rtd = 1;
2714
2715 if (rtd
2716 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2717 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2718 == void_type_node)))
2719 return size;
2720 }
2721
2722 /* Lose any fake structure return argument if it is passed on the stack. */
2723 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2724 && !TARGET_64BIT
2725 && !KEEP_AGGREGATE_RETURN_POINTER)
2726 {
2727 int nregs = ix86_function_regparm (funtype, fundecl);
2728
2729 if (!nregs)
2730 return GET_MODE_SIZE (Pmode);
2731 }
2732
2733 return 0;
2734 }
2735 \f
2736 /* Argument support functions. */
2737
2738 /* Return true when register may be used to pass function parameters. */
2739 bool
2740 ix86_function_arg_regno_p (int regno)
2741 {
2742 int i;
2743 if (!TARGET_64BIT)
2744 return (regno < REGPARM_MAX
2745 || (TARGET_MMX && MMX_REGNO_P (regno)
2746 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2747 || (TARGET_SSE && SSE_REGNO_P (regno)
2748 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2749
2750 if (TARGET_SSE && SSE_REGNO_P (regno)
2751 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2752 return true;
2753 /* RAX is used as hidden argument to va_arg functions. */
2754 if (!regno)
2755 return true;
2756 for (i = 0; i < REGPARM_MAX; i++)
2757 if (regno == x86_64_int_parameter_registers[i])
2758 return true;
2759 return false;
2760 }
2761
2762 /* Return if we do not know how to pass TYPE solely in registers. */
2763
2764 static bool
2765 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2766 {
2767 if (must_pass_in_stack_var_size_or_pad (mode, type))
2768 return true;
2769
2770 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2771 The layout_type routine is crafty and tries to trick us into passing
2772 currently unsupported vector types on the stack by using TImode. */
2773 return (!TARGET_64BIT && mode == TImode
2774 && type && TREE_CODE (type) != VECTOR_TYPE);
2775 }
2776
2777 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2778 for a call to a function whose data type is FNTYPE.
2779 For a library call, FNTYPE is 0. */
2780
2781 void
2782 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
2783 tree fntype, /* tree ptr for function decl */
2784 rtx libname, /* SYMBOL_REF of library name or 0 */
2785 tree fndecl)
2786 {
2787 static CUMULATIVE_ARGS zero_cum;
2788 tree param, next_param;
2789
2790 if (TARGET_DEBUG_ARG)
2791 {
2792 fprintf (stderr, "\ninit_cumulative_args (");
2793 if (fntype)
2794 fprintf (stderr, "fntype code = %s, ret code = %s",
2795 tree_code_name[(int) TREE_CODE (fntype)],
2796 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
2797 else
2798 fprintf (stderr, "no fntype");
2799
2800 if (libname)
2801 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
2802 }
2803
2804 *cum = zero_cum;
2805
2806 /* Set up the number of registers to use for passing arguments. */
2807 cum->nregs = ix86_regparm;
2808 if (TARGET_SSE)
2809 cum->sse_nregs = SSE_REGPARM_MAX;
2810 if (TARGET_MMX)
2811 cum->mmx_nregs = MMX_REGPARM_MAX;
2812 cum->warn_sse = true;
2813 cum->warn_mmx = true;
2814 cum->maybe_vaarg = false;
2815
2816 /* Use ecx and edx registers if function has fastcall attribute,
2817 else look for regparm information. */
2818 if (fntype && !TARGET_64BIT)
2819 {
2820 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2821 {
2822 cum->nregs = 2;
2823 cum->fastcall = 1;
2824 }
2825 else
2826 cum->nregs = ix86_function_regparm (fntype, fndecl);
2827 }
2828
2829 /* Set up the number of SSE registers used for passing SFmode
2830 and DFmode arguments. Warn for mismatching ABI. */
2831 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
2832
2833 /* Determine if this function has variable arguments. This is
2834 indicated by the last argument being 'void_type_mode' if there
2835 are no variable arguments. If there are variable arguments, then
2836 we won't pass anything in registers in 32-bit mode. */
2837
2838 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2839 {
2840 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2841 param != 0; param = next_param)
2842 {
2843 next_param = TREE_CHAIN (param);
2844 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2845 {
2846 if (!TARGET_64BIT)
2847 {
2848 cum->nregs = 0;
2849 cum->sse_nregs = 0;
2850 cum->mmx_nregs = 0;
2851 cum->warn_sse = 0;
2852 cum->warn_mmx = 0;
2853 cum->fastcall = 0;
2854 cum->float_in_sse = 0;
2855 }
2856 cum->maybe_vaarg = true;
2857 }
2858 }
2859 }
2860 if ((!fntype && !libname)
2861 || (fntype && !TYPE_ARG_TYPES (fntype)))
2862 cum->maybe_vaarg = true;
2863
2864 if (TARGET_DEBUG_ARG)
2865 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2866
2867 return;
2868 }
2869
2870 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2871 But in the case of vector types, it is some vector mode.
2872
2873 When we have only some of our vector isa extensions enabled, then there
2874 are some modes for which vector_mode_supported_p is false. For these
2875 modes, the generic vector support in gcc will choose some non-vector mode
2876 in order to implement the type. By computing the natural mode, we'll
2877 select the proper ABI location for the operand and not depend on whatever
2878 the middle-end decides to do with these vector types. */
2879
2880 static enum machine_mode
2881 type_natural_mode (tree type)
2882 {
2883 enum machine_mode mode = TYPE_MODE (type);
2884
2885 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2886 {
2887 HOST_WIDE_INT size = int_size_in_bytes (type);
2888 if ((size == 8 || size == 16)
2889 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2890 && TYPE_VECTOR_SUBPARTS (type) > 1)
2891 {
2892 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2893
2894 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2895 mode = MIN_MODE_VECTOR_FLOAT;
2896 else
2897 mode = MIN_MODE_VECTOR_INT;
2898
2899 /* Get the mode which has this inner mode and number of units. */
2900 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2901 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2902 && GET_MODE_INNER (mode) == innermode)
2903 return mode;
2904
2905 gcc_unreachable ();
2906 }
2907 }
2908
2909 return mode;
2910 }
2911
2912 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2913 this may not agree with the mode that the type system has chosen for the
2914 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2915 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2916
2917 static rtx
2918 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2919 unsigned int regno)
2920 {
2921 rtx tmp;
2922
2923 if (orig_mode != BLKmode)
2924 tmp = gen_rtx_REG (orig_mode, regno);
2925 else
2926 {
2927 tmp = gen_rtx_REG (mode, regno);
2928 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2929 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2930 }
2931
2932 return tmp;
2933 }
2934
2935 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2936 of this code is to classify each 8bytes of incoming argument by the register
2937 class and assign registers accordingly. */
2938
2939 /* Return the union class of CLASS1 and CLASS2.
2940 See the x86-64 PS ABI for details. */
2941
2942 static enum x86_64_reg_class
2943 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2944 {
2945 /* Rule #1: If both classes are equal, this is the resulting class. */
2946 if (class1 == class2)
2947 return class1;
2948
2949 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2950 the other class. */
2951 if (class1 == X86_64_NO_CLASS)
2952 return class2;
2953 if (class2 == X86_64_NO_CLASS)
2954 return class1;
2955
2956 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2957 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2958 return X86_64_MEMORY_CLASS;
2959
2960 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2961 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2962 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2963 return X86_64_INTEGERSI_CLASS;
2964 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2965 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2966 return X86_64_INTEGER_CLASS;
2967
2968 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2969 MEMORY is used. */
2970 if (class1 == X86_64_X87_CLASS
2971 || class1 == X86_64_X87UP_CLASS
2972 || class1 == X86_64_COMPLEX_X87_CLASS
2973 || class2 == X86_64_X87_CLASS
2974 || class2 == X86_64_X87UP_CLASS
2975 || class2 == X86_64_COMPLEX_X87_CLASS)
2976 return X86_64_MEMORY_CLASS;
2977
2978 /* Rule #6: Otherwise class SSE is used. */
2979 return X86_64_SSE_CLASS;
2980 }
2981
2982 /* Classify the argument of type TYPE and mode MODE.
2983 CLASSES will be filled by the register class used to pass each word
2984 of the operand. The number of words is returned. In case the parameter
2985 should be passed in memory, 0 is returned. As a special case for zero
2986 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2987
2988 BIT_OFFSET is used internally for handling records and specifies offset
2989 of the offset in bits modulo 256 to avoid overflow cases.
2990
2991 See the x86-64 PS ABI for details.
2992 */
2993
2994 static int
2995 classify_argument (enum machine_mode mode, tree type,
2996 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2997 {
2998 HOST_WIDE_INT bytes =
2999 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3000 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3001
3002 /* Variable sized entities are always passed/returned in memory. */
3003 if (bytes < 0)
3004 return 0;
3005
3006 if (mode != VOIDmode
3007 && targetm.calls.must_pass_in_stack (mode, type))
3008 return 0;
3009
3010 if (type && AGGREGATE_TYPE_P (type))
3011 {
3012 int i;
3013 tree field;
3014 enum x86_64_reg_class subclasses[MAX_CLASSES];
3015
3016 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3017 if (bytes > 16)
3018 return 0;
3019
3020 for (i = 0; i < words; i++)
3021 classes[i] = X86_64_NO_CLASS;
3022
3023 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3024 signalize memory class, so handle it as special case. */
3025 if (!words)
3026 {
3027 classes[0] = X86_64_NO_CLASS;
3028 return 1;
3029 }
3030
3031 /* Classify each field of record and merge classes. */
3032 switch (TREE_CODE (type))
3033 {
3034 case RECORD_TYPE:
3035 /* And now merge the fields of structure. */
3036 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3037 {
3038 if (TREE_CODE (field) == FIELD_DECL)
3039 {
3040 int num;
3041
3042 if (TREE_TYPE (field) == error_mark_node)
3043 continue;
3044
3045 /* Bitfields are always classified as integer. Handle them
3046 early, since later code would consider them to be
3047 misaligned integers. */
3048 if (DECL_BIT_FIELD (field))
3049 {
3050 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3051 i < ((int_bit_position (field) + (bit_offset % 64))
3052 + tree_low_cst (DECL_SIZE (field), 0)
3053 + 63) / 8 / 8; i++)
3054 classes[i] =
3055 merge_classes (X86_64_INTEGER_CLASS,
3056 classes[i]);
3057 }
3058 else
3059 {
3060 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3061 TREE_TYPE (field), subclasses,
3062 (int_bit_position (field)
3063 + bit_offset) % 256);
3064 if (!num)
3065 return 0;
3066 for (i = 0; i < num; i++)
3067 {
3068 int pos =
3069 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3070 classes[i + pos] =
3071 merge_classes (subclasses[i], classes[i + pos]);
3072 }
3073 }
3074 }
3075 }
3076 break;
3077
3078 case ARRAY_TYPE:
3079 /* Arrays are handled as small records. */
3080 {
3081 int num;
3082 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3083 TREE_TYPE (type), subclasses, bit_offset);
3084 if (!num)
3085 return 0;
3086
3087 /* The partial classes are now full classes. */
3088 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3089 subclasses[0] = X86_64_SSE_CLASS;
3090 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3091 subclasses[0] = X86_64_INTEGER_CLASS;
3092
3093 for (i = 0; i < words; i++)
3094 classes[i] = subclasses[i % num];
3095
3096 break;
3097 }
3098 case UNION_TYPE:
3099 case QUAL_UNION_TYPE:
3100 /* Unions are similar to RECORD_TYPE but offset is always 0.
3101 */
3102 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3103 {
3104 if (TREE_CODE (field) == FIELD_DECL)
3105 {
3106 int num;
3107
3108 if (TREE_TYPE (field) == error_mark_node)
3109 continue;
3110
3111 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3112 TREE_TYPE (field), subclasses,
3113 bit_offset);
3114 if (!num)
3115 return 0;
3116 for (i = 0; i < num; i++)
3117 classes[i] = merge_classes (subclasses[i], classes[i]);
3118 }
3119 }
3120 break;
3121
3122 default:
3123 gcc_unreachable ();
3124 }
3125
3126 /* Final merger cleanup. */
3127 for (i = 0; i < words; i++)
3128 {
3129 /* If one class is MEMORY, everything should be passed in
3130 memory. */
3131 if (classes[i] == X86_64_MEMORY_CLASS)
3132 return 0;
3133
3134 /* The X86_64_SSEUP_CLASS should be always preceded by
3135 X86_64_SSE_CLASS. */
3136 if (classes[i] == X86_64_SSEUP_CLASS
3137 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3138 classes[i] = X86_64_SSE_CLASS;
3139
3140 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3141 if (classes[i] == X86_64_X87UP_CLASS
3142 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3143 classes[i] = X86_64_SSE_CLASS;
3144 }
3145 return words;
3146 }
3147
3148 /* Compute alignment needed. We align all types to natural boundaries with
3149 exception of XFmode that is aligned to 64bits. */
3150 if (mode != VOIDmode && mode != BLKmode)
3151 {
3152 int mode_alignment = GET_MODE_BITSIZE (mode);
3153
3154 if (mode == XFmode)
3155 mode_alignment = 128;
3156 else if (mode == XCmode)
3157 mode_alignment = 256;
3158 if (COMPLEX_MODE_P (mode))
3159 mode_alignment /= 2;
3160 /* Misaligned fields are always returned in memory. */
3161 if (bit_offset % mode_alignment)
3162 return 0;
3163 }
3164
3165 /* for V1xx modes, just use the base mode */
3166 if (VECTOR_MODE_P (mode)
3167 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3168 mode = GET_MODE_INNER (mode);
3169
3170 /* Classification of atomic types. */
3171 switch (mode)
3172 {
3173 case SDmode:
3174 case DDmode:
3175 classes[0] = X86_64_SSE_CLASS;
3176 return 1;
3177 case TDmode:
3178 classes[0] = X86_64_SSE_CLASS;
3179 classes[1] = X86_64_SSEUP_CLASS;
3180 return 2;
3181 case DImode:
3182 case SImode:
3183 case HImode:
3184 case QImode:
3185 case CSImode:
3186 case CHImode:
3187 case CQImode:
3188 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3189 classes[0] = X86_64_INTEGERSI_CLASS;
3190 else
3191 classes[0] = X86_64_INTEGER_CLASS;
3192 return 1;
3193 case CDImode:
3194 case TImode:
3195 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3196 return 2;
3197 case CTImode:
3198 return 0;
3199 case SFmode:
3200 if (!(bit_offset % 64))
3201 classes[0] = X86_64_SSESF_CLASS;
3202 else
3203 classes[0] = X86_64_SSE_CLASS;
3204 return 1;
3205 case DFmode:
3206 classes[0] = X86_64_SSEDF_CLASS;
3207 return 1;
3208 case XFmode:
3209 classes[0] = X86_64_X87_CLASS;
3210 classes[1] = X86_64_X87UP_CLASS;
3211 return 2;
3212 case TFmode:
3213 classes[0] = X86_64_SSE_CLASS;
3214 classes[1] = X86_64_SSEUP_CLASS;
3215 return 2;
3216 case SCmode:
3217 classes[0] = X86_64_SSE_CLASS;
3218 return 1;
3219 case DCmode:
3220 classes[0] = X86_64_SSEDF_CLASS;
3221 classes[1] = X86_64_SSEDF_CLASS;
3222 return 2;
3223 case XCmode:
3224 classes[0] = X86_64_COMPLEX_X87_CLASS;
3225 return 1;
3226 case TCmode:
3227 /* This modes is larger than 16 bytes. */
3228 return 0;
3229 case V4SFmode:
3230 case V4SImode:
3231 case V16QImode:
3232 case V8HImode:
3233 case V2DFmode:
3234 case V2DImode:
3235 classes[0] = X86_64_SSE_CLASS;
3236 classes[1] = X86_64_SSEUP_CLASS;
3237 return 2;
3238 case V2SFmode:
3239 case V2SImode:
3240 case V4HImode:
3241 case V8QImode:
3242 classes[0] = X86_64_SSE_CLASS;
3243 return 1;
3244 case BLKmode:
3245 case VOIDmode:
3246 return 0;
3247 default:
3248 gcc_assert (VECTOR_MODE_P (mode));
3249
3250 if (bytes > 16)
3251 return 0;
3252
3253 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3254
3255 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3256 classes[0] = X86_64_INTEGERSI_CLASS;
3257 else
3258 classes[0] = X86_64_INTEGER_CLASS;
3259 classes[1] = X86_64_INTEGER_CLASS;
3260 return 1 + (bytes > 8);
3261 }
3262 }
3263
3264 /* Examine the argument and return set number of register required in each
3265 class. Return 0 iff parameter should be passed in memory. */
3266 static int
3267 examine_argument (enum machine_mode mode, tree type, int in_return,
3268 int *int_nregs, int *sse_nregs)
3269 {
3270 enum x86_64_reg_class class[MAX_CLASSES];
3271 int n = classify_argument (mode, type, class, 0);
3272
3273 *int_nregs = 0;
3274 *sse_nregs = 0;
3275 if (!n)
3276 return 0;
3277 for (n--; n >= 0; n--)
3278 switch (class[n])
3279 {
3280 case X86_64_INTEGER_CLASS:
3281 case X86_64_INTEGERSI_CLASS:
3282 (*int_nregs)++;
3283 break;
3284 case X86_64_SSE_CLASS:
3285 case X86_64_SSESF_CLASS:
3286 case X86_64_SSEDF_CLASS:
3287 (*sse_nregs)++;
3288 break;
3289 case X86_64_NO_CLASS:
3290 case X86_64_SSEUP_CLASS:
3291 break;
3292 case X86_64_X87_CLASS:
3293 case X86_64_X87UP_CLASS:
3294 if (!in_return)
3295 return 0;
3296 break;
3297 case X86_64_COMPLEX_X87_CLASS:
3298 return in_return ? 2 : 0;
3299 case X86_64_MEMORY_CLASS:
3300 gcc_unreachable ();
3301 }
3302 return 1;
3303 }
3304
3305 /* Construct container for the argument used by GCC interface. See
3306 FUNCTION_ARG for the detailed description. */
3307
3308 static rtx
3309 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3310 tree type, int in_return, int nintregs, int nsseregs,
3311 const int *intreg, int sse_regno)
3312 {
3313 /* The following variables hold the static issued_error state. */
3314 static bool issued_sse_arg_error;
3315 static bool issued_sse_ret_error;
3316 static bool issued_x87_ret_error;
3317
3318 enum machine_mode tmpmode;
3319 int bytes =
3320 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3321 enum x86_64_reg_class class[MAX_CLASSES];
3322 int n;
3323 int i;
3324 int nexps = 0;
3325 int needed_sseregs, needed_intregs;
3326 rtx exp[MAX_CLASSES];
3327 rtx ret;
3328
3329 n = classify_argument (mode, type, class, 0);
3330 if (TARGET_DEBUG_ARG)
3331 {
3332 if (!n)
3333 fprintf (stderr, "Memory class\n");
3334 else
3335 {
3336 fprintf (stderr, "Classes:");
3337 for (i = 0; i < n; i++)
3338 {
3339 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3340 }
3341 fprintf (stderr, "\n");
3342 }
3343 }
3344 if (!n)
3345 return NULL;
3346 if (!examine_argument (mode, type, in_return, &needed_intregs,
3347 &needed_sseregs))
3348 return NULL;
3349 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3350 return NULL;
3351
3352 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3353 some less clueful developer tries to use floating-point anyway. */
3354 if (needed_sseregs && !TARGET_SSE)
3355 {
3356 if (in_return)
3357 {
3358 if (!issued_sse_ret_error)
3359 {
3360 error ("SSE register return with SSE disabled");
3361 issued_sse_ret_error = true;
3362 }
3363 }
3364 else if (!issued_sse_arg_error)
3365 {
3366 error ("SSE register argument with SSE disabled");
3367 issued_sse_arg_error = true;
3368 }
3369 return NULL;
3370 }
3371
3372 /* Likewise, error if the ABI requires us to return values in the
3373 x87 registers and the user specified -mno-80387. */
3374 if (!TARGET_80387 && in_return)
3375 for (i = 0; i < n; i++)
3376 if (class[i] == X86_64_X87_CLASS
3377 || class[i] == X86_64_X87UP_CLASS
3378 || class[i] == X86_64_COMPLEX_X87_CLASS)
3379 {
3380 if (!issued_x87_ret_error)
3381 {
3382 error ("x87 register return with x87 disabled");
3383 issued_x87_ret_error = true;
3384 }
3385 return NULL;
3386 }
3387
3388 /* First construct simple cases. Avoid SCmode, since we want to use
3389 single register to pass this type. */
3390 if (n == 1 && mode != SCmode)
3391 switch (class[0])
3392 {
3393 case X86_64_INTEGER_CLASS:
3394 case X86_64_INTEGERSI_CLASS:
3395 return gen_rtx_REG (mode, intreg[0]);
3396 case X86_64_SSE_CLASS:
3397 case X86_64_SSESF_CLASS:
3398 case X86_64_SSEDF_CLASS:
3399 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3400 case X86_64_X87_CLASS:
3401 case X86_64_COMPLEX_X87_CLASS:
3402 return gen_rtx_REG (mode, FIRST_STACK_REG);
3403 case X86_64_NO_CLASS:
3404 /* Zero sized array, struct or class. */
3405 return NULL;
3406 default:
3407 gcc_unreachable ();
3408 }
3409 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3410 && mode != BLKmode)
3411 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3412 if (n == 2
3413 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3414 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3415 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3416 && class[1] == X86_64_INTEGER_CLASS
3417 && (mode == CDImode || mode == TImode || mode == TFmode)
3418 && intreg[0] + 1 == intreg[1])
3419 return gen_rtx_REG (mode, intreg[0]);
3420
3421 /* Otherwise figure out the entries of the PARALLEL. */
3422 for (i = 0; i < n; i++)
3423 {
3424 switch (class[i])
3425 {
3426 case X86_64_NO_CLASS:
3427 break;
3428 case X86_64_INTEGER_CLASS:
3429 case X86_64_INTEGERSI_CLASS:
3430 /* Merge TImodes on aligned occasions here too. */
3431 if (i * 8 + 8 > bytes)
3432 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3433 else if (class[i] == X86_64_INTEGERSI_CLASS)
3434 tmpmode = SImode;
3435 else
3436 tmpmode = DImode;
3437 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3438 if (tmpmode == BLKmode)
3439 tmpmode = DImode;
3440 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3441 gen_rtx_REG (tmpmode, *intreg),
3442 GEN_INT (i*8));
3443 intreg++;
3444 break;
3445 case X86_64_SSESF_CLASS:
3446 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3447 gen_rtx_REG (SFmode,
3448 SSE_REGNO (sse_regno)),
3449 GEN_INT (i*8));
3450 sse_regno++;
3451 break;
3452 case X86_64_SSEDF_CLASS:
3453 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3454 gen_rtx_REG (DFmode,
3455 SSE_REGNO (sse_regno)),
3456 GEN_INT (i*8));
3457 sse_regno++;
3458 break;
3459 case X86_64_SSE_CLASS:
3460 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3461 tmpmode = TImode;
3462 else
3463 tmpmode = DImode;
3464 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3465 gen_rtx_REG (tmpmode,
3466 SSE_REGNO (sse_regno)),
3467 GEN_INT (i*8));
3468 if (tmpmode == TImode)
3469 i++;
3470 sse_regno++;
3471 break;
3472 default:
3473 gcc_unreachable ();
3474 }
3475 }
3476
3477 /* Empty aligned struct, union or class. */
3478 if (nexps == 0)
3479 return NULL;
3480
3481 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3482 for (i = 0; i < nexps; i++)
3483 XVECEXP (ret, 0, i) = exp [i];
3484 return ret;
3485 }
3486
3487 /* Update the data in CUM to advance over an argument
3488 of mode MODE and data type TYPE.
3489 (TYPE is null for libcalls where that information may not be available.) */
3490
3491 void
3492 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3493 tree type, int named)
3494 {
3495 int bytes =
3496 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3497 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3498
3499 if (type)
3500 mode = type_natural_mode (type);
3501
3502 if (TARGET_DEBUG_ARG)
3503 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3504 "mode=%s, named=%d)\n\n",
3505 words, cum->words, cum->nregs, cum->sse_nregs,
3506 GET_MODE_NAME (mode), named);
3507
3508 if (TARGET_64BIT)
3509 {
3510 int int_nregs, sse_nregs;
3511 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3512 cum->words += words;
3513 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3514 {
3515 cum->nregs -= int_nregs;
3516 cum->sse_nregs -= sse_nregs;
3517 cum->regno += int_nregs;
3518 cum->sse_regno += sse_nregs;
3519 }
3520 else
3521 cum->words += words;
3522 }
3523 else
3524 {
3525 switch (mode)
3526 {
3527 default:
3528 break;
3529
3530 case BLKmode:
3531 if (bytes < 0)
3532 break;
3533 /* FALLTHRU */
3534
3535 case DImode:
3536 case SImode:
3537 case HImode:
3538 case QImode:
3539 cum->words += words;
3540 cum->nregs -= words;
3541 cum->regno += words;
3542
3543 if (cum->nregs <= 0)
3544 {
3545 cum->nregs = 0;
3546 cum->regno = 0;
3547 }
3548 break;
3549
3550 case DFmode:
3551 if (cum->float_in_sse < 2)
3552 break;
3553 case SFmode:
3554 if (cum->float_in_sse < 1)
3555 break;
3556 /* FALLTHRU */
3557
3558 case TImode:
3559 case V16QImode:
3560 case V8HImode:
3561 case V4SImode:
3562 case V2DImode:
3563 case V4SFmode:
3564 case V2DFmode:
3565 if (!type || !AGGREGATE_TYPE_P (type))
3566 {
3567 cum->sse_words += words;
3568 cum->sse_nregs -= 1;
3569 cum->sse_regno += 1;
3570 if (cum->sse_nregs <= 0)
3571 {
3572 cum->sse_nregs = 0;
3573 cum->sse_regno = 0;
3574 }
3575 }
3576 break;
3577
3578 case V8QImode:
3579 case V4HImode:
3580 case V2SImode:
3581 case V2SFmode:
3582 if (!type || !AGGREGATE_TYPE_P (type))
3583 {
3584 cum->mmx_words += words;
3585 cum->mmx_nregs -= 1;
3586 cum->mmx_regno += 1;
3587 if (cum->mmx_nregs <= 0)
3588 {
3589 cum->mmx_nregs = 0;
3590 cum->mmx_regno = 0;
3591 }
3592 }
3593 break;
3594 }
3595 }
3596 }
3597
3598 /* Define where to put the arguments to a function.
3599 Value is zero to push the argument on the stack,
3600 or a hard register in which to store the argument.
3601
3602 MODE is the argument's machine mode.
3603 TYPE is the data type of the argument (as a tree).
3604 This is null for libcalls where that information may
3605 not be available.
3606 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3607 the preceding args and about the function being called.
3608 NAMED is nonzero if this argument is a named parameter
3609 (otherwise it is an extra parameter matching an ellipsis). */
3610
3611 rtx
3612 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3613 tree type, int named)
3614 {
3615 enum machine_mode mode = orig_mode;
3616 rtx ret = NULL_RTX;
3617 int bytes =
3618 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3619 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3620 static bool warnedsse, warnedmmx;
3621
3622 /* To simplify the code below, represent vector types with a vector mode
3623 even if MMX/SSE are not active. */
3624 if (type && TREE_CODE (type) == VECTOR_TYPE)
3625 mode = type_natural_mode (type);
3626
3627 /* Handle a hidden AL argument containing number of registers for varargs
3628 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
3629 any AL settings. */
3630 if (mode == VOIDmode)
3631 {
3632 if (TARGET_64BIT)
3633 return GEN_INT (cum->maybe_vaarg
3634 ? (cum->sse_nregs < 0
3635 ? SSE_REGPARM_MAX
3636 : cum->sse_regno)
3637 : -1);
3638 else
3639 return constm1_rtx;
3640 }
3641 if (TARGET_64BIT)
3642 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
3643 cum->sse_nregs,
3644 &x86_64_int_parameter_registers [cum->regno],
3645 cum->sse_regno);
3646 else
3647 switch (mode)
3648 {
3649 /* For now, pass fp/complex values on the stack. */
3650 default:
3651 break;
3652
3653 case BLKmode:
3654 if (bytes < 0)
3655 break;
3656 /* FALLTHRU */
3657 case DImode:
3658 case SImode:
3659 case HImode:
3660 case QImode:
3661 if (words <= cum->nregs)
3662 {
3663 int regno = cum->regno;
3664
3665 /* Fastcall allocates the first two DWORD (SImode) or
3666 smaller arguments to ECX and EDX. */
3667 if (cum->fastcall)
3668 {
3669 if (mode == BLKmode || mode == DImode)
3670 break;
3671
3672 /* ECX not EAX is the first allocated register. */
3673 if (regno == 0)
3674 regno = 2;
3675 }
3676 ret = gen_rtx_REG (mode, regno);
3677 }
3678 break;
3679 case DFmode:
3680 if (cum->float_in_sse < 2)
3681 break;
3682 case SFmode:
3683 if (cum->float_in_sse < 1)
3684 break;
3685 /* FALLTHRU */
3686 case TImode:
3687 case V16QImode:
3688 case V8HImode:
3689 case V4SImode:
3690 case V2DImode:
3691 case V4SFmode:
3692 case V2DFmode:
3693 if (!type || !AGGREGATE_TYPE_P (type))
3694 {
3695 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3696 {
3697 warnedsse = true;
3698 warning (0, "SSE vector argument without SSE enabled "
3699 "changes the ABI");
3700 }
3701 if (cum->sse_nregs)
3702 ret = gen_reg_or_parallel (mode, orig_mode,
3703 cum->sse_regno + FIRST_SSE_REG);
3704 }
3705 break;
3706 case V8QImode:
3707 case V4HImode:
3708 case V2SImode:
3709 case V2SFmode:
3710 if (!type || !AGGREGATE_TYPE_P (type))
3711 {
3712 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3713 {
3714 warnedmmx = true;
3715 warning (0, "MMX vector argument without MMX enabled "
3716 "changes the ABI");
3717 }
3718 if (cum->mmx_nregs)
3719 ret = gen_reg_or_parallel (mode, orig_mode,
3720 cum->mmx_regno + FIRST_MMX_REG);
3721 }
3722 break;
3723 }
3724
3725 if (TARGET_DEBUG_ARG)
3726 {
3727 fprintf (stderr,
3728 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3729 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
3730
3731 if (ret)
3732 print_simple_rtl (stderr, ret);
3733 else
3734 fprintf (stderr, ", stack");
3735
3736 fprintf (stderr, " )\n");
3737 }
3738
3739 return ret;
3740 }
3741
3742 /* A C expression that indicates when an argument must be passed by
3743 reference. If nonzero for an argument, a copy of that argument is
3744 made in memory and a pointer to the argument is passed instead of
3745 the argument itself. The pointer is passed in whatever way is
3746 appropriate for passing a pointer to that type. */
3747
3748 static bool
3749 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3750 enum machine_mode mode ATTRIBUTE_UNUSED,
3751 tree type, bool named ATTRIBUTE_UNUSED)
3752 {
3753 if (!TARGET_64BIT)
3754 return 0;
3755
3756 if (type && int_size_in_bytes (type) == -1)
3757 {
3758 if (TARGET_DEBUG_ARG)
3759 fprintf (stderr, "function_arg_pass_by_reference\n");
3760 return 1;
3761 }
3762
3763 return 0;
3764 }
3765
3766 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
3767 ABI. Only called if TARGET_SSE. */
3768 static bool
3769 contains_128bit_aligned_vector_p (tree type)
3770 {
3771 enum machine_mode mode = TYPE_MODE (type);
3772 if (SSE_REG_MODE_P (mode)
3773 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3774 return true;
3775 if (TYPE_ALIGN (type) < 128)
3776 return false;
3777
3778 if (AGGREGATE_TYPE_P (type))
3779 {
3780 /* Walk the aggregates recursively. */
3781 switch (TREE_CODE (type))
3782 {
3783 case RECORD_TYPE:
3784 case UNION_TYPE:
3785 case QUAL_UNION_TYPE:
3786 {
3787 tree field;
3788
3789 /* Walk all the structure fields. */
3790 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3791 {
3792 if (TREE_CODE (field) == FIELD_DECL
3793 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
3794 return true;
3795 }
3796 break;
3797 }
3798
3799 case ARRAY_TYPE:
3800 /* Just for use if some languages passes arrays by value. */
3801 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
3802 return true;
3803 break;
3804
3805 default:
3806 gcc_unreachable ();
3807 }
3808 }
3809 return false;
3810 }
3811
3812 /* Gives the alignment boundary, in bits, of an argument with the
3813 specified mode and type. */
3814
3815 int
3816 ix86_function_arg_boundary (enum machine_mode mode, tree type)
3817 {
3818 int align;
3819 if (type)
3820 align = TYPE_ALIGN (type);
3821 else
3822 align = GET_MODE_ALIGNMENT (mode);
3823 if (align < PARM_BOUNDARY)
3824 align = PARM_BOUNDARY;
3825 if (!TARGET_64BIT)
3826 {
3827 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3828 make an exception for SSE modes since these require 128bit
3829 alignment.
3830
3831 The handling here differs from field_alignment. ICC aligns MMX
3832 arguments to 4 byte boundaries, while structure fields are aligned
3833 to 8 byte boundaries. */
3834 if (!TARGET_SSE)
3835 align = PARM_BOUNDARY;
3836 else if (!type)
3837 {
3838 if (!SSE_REG_MODE_P (mode))
3839 align = PARM_BOUNDARY;
3840 }
3841 else
3842 {
3843 if (!contains_128bit_aligned_vector_p (type))
3844 align = PARM_BOUNDARY;
3845 }
3846 }
3847 if (align > 128)
3848 align = 128;
3849 return align;
3850 }
3851
3852 /* Return true if N is a possible register number of function value. */
3853 bool
3854 ix86_function_value_regno_p (int regno)
3855 {
3856 if (regno == 0
3857 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3858 || (regno == FIRST_SSE_REG && TARGET_SSE))
3859 return true;
3860
3861 if (!TARGET_64BIT
3862 && (regno == FIRST_MMX_REG && TARGET_MMX))
3863 return true;
3864
3865 return false;
3866 }
3867
3868 /* Define how to find the value returned by a function.
3869 VALTYPE is the data type of the value (as a tree).
3870 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3871 otherwise, FUNC is 0. */
3872 rtx
3873 ix86_function_value (tree valtype, tree fntype_or_decl,
3874 bool outgoing ATTRIBUTE_UNUSED)
3875 {
3876 enum machine_mode natmode = type_natural_mode (valtype);
3877
3878 if (TARGET_64BIT)
3879 {
3880 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
3881 1, REGPARM_MAX, SSE_REGPARM_MAX,
3882 x86_64_int_return_registers, 0);
3883 /* For zero sized structures, construct_container return NULL, but we
3884 need to keep rest of compiler happy by returning meaningful value. */
3885 if (!ret)
3886 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
3887 return ret;
3888 }
3889 else
3890 {
3891 tree fn = NULL_TREE, fntype;
3892 if (fntype_or_decl
3893 && DECL_P (fntype_or_decl))
3894 fn = fntype_or_decl;
3895 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
3896 return gen_rtx_REG (TYPE_MODE (valtype),
3897 ix86_value_regno (natmode, fn, fntype));
3898 }
3899 }
3900
3901 /* Return true iff type is returned in memory. */
3902 int
3903 ix86_return_in_memory (tree type)
3904 {
3905 int needed_intregs, needed_sseregs, size;
3906 enum machine_mode mode = type_natural_mode (type);
3907
3908 if (TARGET_64BIT)
3909 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
3910
3911 if (mode == BLKmode)
3912 return 1;
3913
3914 size = int_size_in_bytes (type);
3915
3916 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3917 return 0;
3918
3919 if (VECTOR_MODE_P (mode) || mode == TImode)
3920 {
3921 /* User-created vectors small enough to fit in EAX. */
3922 if (size < 8)
3923 return 0;
3924
3925 /* MMX/3dNow values are returned in MM0,
3926 except when it doesn't exits. */
3927 if (size == 8)
3928 return (TARGET_MMX ? 0 : 1);
3929
3930 /* SSE values are returned in XMM0, except when it doesn't exist. */
3931 if (size == 16)
3932 return (TARGET_SSE ? 0 : 1);
3933 }
3934
3935 if (mode == XFmode)
3936 return 0;
3937
3938 if (mode == TDmode)
3939 return 1;
3940
3941 if (size > 12)
3942 return 1;
3943 return 0;
3944 }
3945
3946 /* When returning SSE vector types, we have a choice of either
3947 (1) being abi incompatible with a -march switch, or
3948 (2) generating an error.
3949 Given no good solution, I think the safest thing is one warning.
3950 The user won't be able to use -Werror, but....
3951
3952 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3953 called in response to actually generating a caller or callee that
3954 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3955 via aggregate_value_p for general type probing from tree-ssa. */
3956
3957 static rtx
3958 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3959 {
3960 static bool warnedsse, warnedmmx;
3961
3962 if (type)
3963 {
3964 /* Look at the return type of the function, not the function type. */
3965 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3966
3967 if (!TARGET_SSE && !warnedsse)
3968 {
3969 if (mode == TImode
3970 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3971 {
3972 warnedsse = true;
3973 warning (0, "SSE vector return without SSE enabled "
3974 "changes the ABI");
3975 }
3976 }
3977
3978 if (!TARGET_MMX && !warnedmmx)
3979 {
3980 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
3981 {
3982 warnedmmx = true;
3983 warning (0, "MMX vector return without MMX enabled "
3984 "changes the ABI");
3985 }
3986 }
3987 }
3988
3989 return NULL;
3990 }
3991
3992 /* Define how to find the value returned by a library function
3993 assuming the value has mode MODE. */
3994 rtx
3995 ix86_libcall_value (enum machine_mode mode)
3996 {
3997 if (TARGET_64BIT)
3998 {
3999 switch (mode)
4000 {
4001 case SFmode:
4002 case SCmode:
4003 case DFmode:
4004 case DCmode:
4005 case TFmode:
4006 case SDmode:
4007 case DDmode:
4008 case TDmode:
4009 return gen_rtx_REG (mode, FIRST_SSE_REG);
4010 case XFmode:
4011 case XCmode:
4012 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4013 case TCmode:
4014 return NULL;
4015 default:
4016 return gen_rtx_REG (mode, 0);
4017 }
4018 }
4019 else
4020 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
4021 }
4022
4023 /* Given a mode, return the register to use for a return value. */
4024
4025 static int
4026 ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
4027 {
4028 gcc_assert (!TARGET_64BIT);
4029
4030 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4031 we normally prevent this case when mmx is not available. However
4032 some ABIs may require the result to be returned like DImode. */
4033 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4034 return TARGET_MMX ? FIRST_MMX_REG : 0;
4035
4036 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4037 we prevent this case when sse is not available. However some ABIs
4038 may require the result to be returned like integer TImode. */
4039 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4040 return TARGET_SSE ? FIRST_SSE_REG : 0;
4041
4042 /* Decimal floating point values can go in %eax, unlike other float modes. */
4043 if (DECIMAL_FLOAT_MODE_P (mode))
4044 return 0;
4045
4046 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4047 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4048 return 0;
4049
4050 /* Floating point return values in %st(0), except for local functions when
4051 SSE math is enabled or for functions with sseregparm attribute. */
4052 if ((func || fntype)
4053 && (mode == SFmode || mode == DFmode))
4054 {
4055 int sse_level = ix86_function_sseregparm (fntype, func);
4056 if ((sse_level >= 1 && mode == SFmode)
4057 || (sse_level == 2 && mode == DFmode))
4058 return FIRST_SSE_REG;
4059 }
4060
4061 return FIRST_FLOAT_REG;
4062 }
4063 \f
4064 /* Create the va_list data type. */
4065
4066 static tree
4067 ix86_build_builtin_va_list (void)
4068 {
4069 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4070
4071 /* For i386 we use plain pointer to argument area. */
4072 if (!TARGET_64BIT)
4073 return build_pointer_type (char_type_node);
4074
4075 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4076 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4077
4078 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4079 unsigned_type_node);
4080 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4081 unsigned_type_node);
4082 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4083 ptr_type_node);
4084 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4085 ptr_type_node);
4086
4087 va_list_gpr_counter_field = f_gpr;
4088 va_list_fpr_counter_field = f_fpr;
4089
4090 DECL_FIELD_CONTEXT (f_gpr) = record;
4091 DECL_FIELD_CONTEXT (f_fpr) = record;
4092 DECL_FIELD_CONTEXT (f_ovf) = record;
4093 DECL_FIELD_CONTEXT (f_sav) = record;
4094
4095 TREE_CHAIN (record) = type_decl;
4096 TYPE_NAME (record) = type_decl;
4097 TYPE_FIELDS (record) = f_gpr;
4098 TREE_CHAIN (f_gpr) = f_fpr;
4099 TREE_CHAIN (f_fpr) = f_ovf;
4100 TREE_CHAIN (f_ovf) = f_sav;
4101
4102 layout_type (record);
4103
4104 /* The correct type is an array type of one element. */
4105 return build_array_type (record, build_index_type (size_zero_node));
4106 }
4107
4108 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4109
4110 static void
4111 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4112 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4113 int no_rtl)
4114 {
4115 CUMULATIVE_ARGS next_cum;
4116 rtx save_area = NULL_RTX, mem;
4117 rtx label;
4118 rtx label_ref;
4119 rtx tmp_reg;
4120 rtx nsse_reg;
4121 int set;
4122 tree fntype;
4123 int stdarg_p;
4124 int i;
4125
4126 if (!TARGET_64BIT)
4127 return;
4128
4129 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4130 return;
4131
4132 /* Indicate to allocate space on the stack for varargs save area. */
4133 ix86_save_varrargs_registers = 1;
4134
4135 cfun->stack_alignment_needed = 128;
4136
4137 fntype = TREE_TYPE (current_function_decl);
4138 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4139 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4140 != void_type_node));
4141
4142 /* For varargs, we do not want to skip the dummy va_dcl argument.
4143 For stdargs, we do want to skip the last named argument. */
4144 next_cum = *cum;
4145 if (stdarg_p)
4146 function_arg_advance (&next_cum, mode, type, 1);
4147
4148 if (!no_rtl)
4149 save_area = frame_pointer_rtx;
4150
4151 set = get_varargs_alias_set ();
4152
4153 for (i = next_cum.regno;
4154 i < ix86_regparm
4155 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4156 i++)
4157 {
4158 mem = gen_rtx_MEM (Pmode,
4159 plus_constant (save_area, i * UNITS_PER_WORD));
4160 MEM_NOTRAP_P (mem) = 1;
4161 set_mem_alias_set (mem, set);
4162 emit_move_insn (mem, gen_rtx_REG (Pmode,
4163 x86_64_int_parameter_registers[i]));
4164 }
4165
4166 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4167 {
4168 /* Now emit code to save SSE registers. The AX parameter contains number
4169 of SSE parameter registers used to call this function. We use
4170 sse_prologue_save insn template that produces computed jump across
4171 SSE saves. We need some preparation work to get this working. */
4172
4173 label = gen_label_rtx ();
4174 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4175
4176 /* Compute address to jump to :
4177 label - 5*eax + nnamed_sse_arguments*5 */
4178 tmp_reg = gen_reg_rtx (Pmode);
4179 nsse_reg = gen_reg_rtx (Pmode);
4180 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4181 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4182 gen_rtx_MULT (Pmode, nsse_reg,
4183 GEN_INT (4))));
4184 if (next_cum.sse_regno)
4185 emit_move_insn
4186 (nsse_reg,
4187 gen_rtx_CONST (DImode,
4188 gen_rtx_PLUS (DImode,
4189 label_ref,
4190 GEN_INT (next_cum.sse_regno * 4))));
4191 else
4192 emit_move_insn (nsse_reg, label_ref);
4193 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4194
4195 /* Compute address of memory block we save into. We always use pointer
4196 pointing 127 bytes after first byte to store - this is needed to keep
4197 instruction size limited by 4 bytes. */
4198 tmp_reg = gen_reg_rtx (Pmode);
4199 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4200 plus_constant (save_area,
4201 8 * REGPARM_MAX + 127)));
4202 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4203 MEM_NOTRAP_P (mem) = 1;
4204 set_mem_alias_set (mem, set);
4205 set_mem_align (mem, BITS_PER_WORD);
4206
4207 /* And finally do the dirty job! */
4208 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4209 GEN_INT (next_cum.sse_regno), label));
4210 }
4211
4212 }
4213
4214 /* Implement va_start. */
4215
4216 void
4217 ix86_va_start (tree valist, rtx nextarg)
4218 {
4219 HOST_WIDE_INT words, n_gpr, n_fpr;
4220 tree f_gpr, f_fpr, f_ovf, f_sav;
4221 tree gpr, fpr, ovf, sav, t;
4222 tree type;
4223
4224 /* Only 64bit target needs something special. */
4225 if (!TARGET_64BIT)
4226 {
4227 std_expand_builtin_va_start (valist, nextarg);
4228 return;
4229 }
4230
4231 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4232 f_fpr = TREE_CHAIN (f_gpr);
4233 f_ovf = TREE_CHAIN (f_fpr);
4234 f_sav = TREE_CHAIN (f_ovf);
4235
4236 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4237 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4238 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4239 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4240 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4241
4242 /* Count number of gp and fp argument registers used. */
4243 words = current_function_args_info.words;
4244 n_gpr = current_function_args_info.regno;
4245 n_fpr = current_function_args_info.sse_regno;
4246
4247 if (TARGET_DEBUG_ARG)
4248 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4249 (int) words, (int) n_gpr, (int) n_fpr);
4250
4251 if (cfun->va_list_gpr_size)
4252 {
4253 type = TREE_TYPE (gpr);
4254 t = build2 (MODIFY_EXPR, type, gpr,
4255 build_int_cst (type, n_gpr * 8));
4256 TREE_SIDE_EFFECTS (t) = 1;
4257 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4258 }
4259
4260 if (cfun->va_list_fpr_size)
4261 {
4262 type = TREE_TYPE (fpr);
4263 t = build2 (MODIFY_EXPR, type, fpr,
4264 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4265 TREE_SIDE_EFFECTS (t) = 1;
4266 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4267 }
4268
4269 /* Find the overflow area. */
4270 type = TREE_TYPE (ovf);
4271 t = make_tree (type, virtual_incoming_args_rtx);
4272 if (words != 0)
4273 t = build2 (PLUS_EXPR, type, t,
4274 build_int_cst (type, words * UNITS_PER_WORD));
4275 t = build2 (MODIFY_EXPR, type, ovf, t);
4276 TREE_SIDE_EFFECTS (t) = 1;
4277 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4278
4279 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4280 {
4281 /* Find the register save area.
4282 Prologue of the function save it right above stack frame. */
4283 type = TREE_TYPE (sav);
4284 t = make_tree (type, frame_pointer_rtx);
4285 t = build2 (MODIFY_EXPR, type, sav, t);
4286 TREE_SIDE_EFFECTS (t) = 1;
4287 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4288 }
4289 }
4290
4291 /* Implement va_arg. */
4292
4293 tree
4294 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4295 {
4296 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4297 tree f_gpr, f_fpr, f_ovf, f_sav;
4298 tree gpr, fpr, ovf, sav, t;
4299 int size, rsize;
4300 tree lab_false, lab_over = NULL_TREE;
4301 tree addr, t2;
4302 rtx container;
4303 int indirect_p = 0;
4304 tree ptrtype;
4305 enum machine_mode nat_mode;
4306
4307 /* Only 64bit target needs something special. */
4308 if (!TARGET_64BIT)
4309 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4310
4311 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4312 f_fpr = TREE_CHAIN (f_gpr);
4313 f_ovf = TREE_CHAIN (f_fpr);
4314 f_sav = TREE_CHAIN (f_ovf);
4315
4316 valist = build_va_arg_indirect_ref (valist);
4317 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4318 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4319 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4320 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4321
4322 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4323 if (indirect_p)
4324 type = build_pointer_type (type);
4325 size = int_size_in_bytes (type);
4326 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4327
4328 nat_mode = type_natural_mode (type);
4329 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4330 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4331
4332 /* Pull the value out of the saved registers. */
4333
4334 addr = create_tmp_var (ptr_type_node, "addr");
4335 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4336
4337 if (container)
4338 {
4339 int needed_intregs, needed_sseregs;
4340 bool need_temp;
4341 tree int_addr, sse_addr;
4342
4343 lab_false = create_artificial_label ();
4344 lab_over = create_artificial_label ();
4345
4346 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4347
4348 need_temp = (!REG_P (container)
4349 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4350 || TYPE_ALIGN (type) > 128));
4351
4352 /* In case we are passing structure, verify that it is consecutive block
4353 on the register save area. If not we need to do moves. */
4354 if (!need_temp && !REG_P (container))
4355 {
4356 /* Verify that all registers are strictly consecutive */
4357 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4358 {
4359 int i;
4360
4361 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4362 {
4363 rtx slot = XVECEXP (container, 0, i);
4364 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4365 || INTVAL (XEXP (slot, 1)) != i * 16)
4366 need_temp = 1;
4367 }
4368 }
4369 else
4370 {
4371 int i;
4372
4373 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4374 {
4375 rtx slot = XVECEXP (container, 0, i);
4376 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4377 || INTVAL (XEXP (slot, 1)) != i * 8)
4378 need_temp = 1;
4379 }
4380 }
4381 }
4382 if (!need_temp)
4383 {
4384 int_addr = addr;
4385 sse_addr = addr;
4386 }
4387 else
4388 {
4389 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4390 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4391 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4392 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4393 }
4394
4395 /* First ensure that we fit completely in registers. */
4396 if (needed_intregs)
4397 {
4398 t = build_int_cst (TREE_TYPE (gpr),
4399 (REGPARM_MAX - needed_intregs + 1) * 8);
4400 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4401 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4402 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4403 gimplify_and_add (t, pre_p);
4404 }
4405 if (needed_sseregs)
4406 {
4407 t = build_int_cst (TREE_TYPE (fpr),
4408 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4409 + REGPARM_MAX * 8);
4410 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4411 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4412 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4413 gimplify_and_add (t, pre_p);
4414 }
4415
4416 /* Compute index to start of area used for integer regs. */
4417 if (needed_intregs)
4418 {
4419 /* int_addr = gpr + sav; */
4420 t = fold_convert (ptr_type_node, gpr);
4421 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4422 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
4423 gimplify_and_add (t, pre_p);
4424 }
4425 if (needed_sseregs)
4426 {
4427 /* sse_addr = fpr + sav; */
4428 t = fold_convert (ptr_type_node, fpr);
4429 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4430 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
4431 gimplify_and_add (t, pre_p);
4432 }
4433 if (need_temp)
4434 {
4435 int i;
4436 tree temp = create_tmp_var (type, "va_arg_tmp");
4437
4438 /* addr = &temp; */
4439 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4440 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
4441 gimplify_and_add (t, pre_p);
4442
4443 for (i = 0; i < XVECLEN (container, 0); i++)
4444 {
4445 rtx slot = XVECEXP (container, 0, i);
4446 rtx reg = XEXP (slot, 0);
4447 enum machine_mode mode = GET_MODE (reg);
4448 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4449 tree addr_type = build_pointer_type (piece_type);
4450 tree src_addr, src;
4451 int src_offset;
4452 tree dest_addr, dest;
4453
4454 if (SSE_REGNO_P (REGNO (reg)))
4455 {
4456 src_addr = sse_addr;
4457 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4458 }
4459 else
4460 {
4461 src_addr = int_addr;
4462 src_offset = REGNO (reg) * 8;
4463 }
4464 src_addr = fold_convert (addr_type, src_addr);
4465 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4466 size_int (src_offset)));
4467 src = build_va_arg_indirect_ref (src_addr);
4468
4469 dest_addr = fold_convert (addr_type, addr);
4470 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4471 size_int (INTVAL (XEXP (slot, 1)))));
4472 dest = build_va_arg_indirect_ref (dest_addr);
4473
4474 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
4475 gimplify_and_add (t, pre_p);
4476 }
4477 }
4478
4479 if (needed_intregs)
4480 {
4481 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4482 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4483 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
4484 gimplify_and_add (t, pre_p);
4485 }
4486 if (needed_sseregs)
4487 {
4488 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4489 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4490 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
4491 gimplify_and_add (t, pre_p);
4492 }
4493
4494 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4495 gimplify_and_add (t, pre_p);
4496
4497 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4498 append_to_statement_list (t, pre_p);
4499 }
4500
4501 /* ... otherwise out of the overflow area. */
4502
4503 /* Care for on-stack alignment if needed. */
4504 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4505 || integer_zerop (TYPE_SIZE (type)))
4506 t = ovf;
4507 else
4508 {
4509 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4510 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4511 build_int_cst (TREE_TYPE (ovf), align - 1));
4512 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4513 build_int_cst (TREE_TYPE (t), -align));
4514 }
4515 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4516
4517 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
4518 gimplify_and_add (t2, pre_p);
4519
4520 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4521 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4522 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4523 gimplify_and_add (t, pre_p);
4524
4525 if (container)
4526 {
4527 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4528 append_to_statement_list (t, pre_p);
4529 }
4530
4531 ptrtype = build_pointer_type (type);
4532 addr = fold_convert (ptrtype, addr);
4533
4534 if (indirect_p)
4535 addr = build_va_arg_indirect_ref (addr);
4536 return build_va_arg_indirect_ref (addr);
4537 }
4538 \f
4539 /* Return nonzero if OPNUM's MEM should be matched
4540 in movabs* patterns. */
4541
4542 int
4543 ix86_check_movabs (rtx insn, int opnum)
4544 {
4545 rtx set, mem;
4546
4547 set = PATTERN (insn);
4548 if (GET_CODE (set) == PARALLEL)
4549 set = XVECEXP (set, 0, 0);
4550 gcc_assert (GET_CODE (set) == SET);
4551 mem = XEXP (set, opnum);
4552 while (GET_CODE (mem) == SUBREG)
4553 mem = SUBREG_REG (mem);
4554 gcc_assert (GET_CODE (mem) == MEM);
4555 return (volatile_ok || !MEM_VOLATILE_P (mem));
4556 }
4557 \f
4558 /* Initialize the table of extra 80387 mathematical constants. */
4559
4560 static void
4561 init_ext_80387_constants (void)
4562 {
4563 static const char * cst[5] =
4564 {
4565 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4566 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4567 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4568 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4569 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4570 };
4571 int i;
4572
4573 for (i = 0; i < 5; i++)
4574 {
4575 real_from_string (&ext_80387_constants_table[i], cst[i]);
4576 /* Ensure each constant is rounded to XFmode precision. */
4577 real_convert (&ext_80387_constants_table[i],
4578 XFmode, &ext_80387_constants_table[i]);
4579 }
4580
4581 ext_80387_constants_init = 1;
4582 }
4583
4584 /* Return true if the constant is something that can be loaded with
4585 a special instruction. */
4586
4587 int
4588 standard_80387_constant_p (rtx x)
4589 {
4590 REAL_VALUE_TYPE r;
4591
4592 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4593 return -1;
4594
4595 if (x == CONST0_RTX (GET_MODE (x)))
4596 return 1;
4597 if (x == CONST1_RTX (GET_MODE (x)))
4598 return 2;
4599
4600 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4601
4602 /* For XFmode constants, try to find a special 80387 instruction when
4603 optimizing for size or on those CPUs that benefit from them. */
4604 if (GET_MODE (x) == XFmode
4605 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4606 {
4607 int i;
4608
4609 if (! ext_80387_constants_init)
4610 init_ext_80387_constants ();
4611
4612 for (i = 0; i < 5; i++)
4613 if (real_identical (&r, &ext_80387_constants_table[i]))
4614 return i + 3;
4615 }
4616
4617 /* Load of the constant -0.0 or -1.0 will be split as
4618 fldz;fchs or fld1;fchs sequence. */
4619 if (real_isnegzero (&r))
4620 return 8;
4621 if (real_identical (&r, &dconstm1))
4622 return 9;
4623
4624 return 0;
4625 }
4626
4627 /* Return the opcode of the special instruction to be used to load
4628 the constant X. */
4629
4630 const char *
4631 standard_80387_constant_opcode (rtx x)
4632 {
4633 switch (standard_80387_constant_p (x))
4634 {
4635 case 1:
4636 return "fldz";
4637 case 2:
4638 return "fld1";
4639 case 3:
4640 return "fldlg2";
4641 case 4:
4642 return "fldln2";
4643 case 5:
4644 return "fldl2e";
4645 case 6:
4646 return "fldl2t";
4647 case 7:
4648 return "fldpi";
4649 case 8:
4650 case 9:
4651 return "#";
4652 default:
4653 gcc_unreachable ();
4654 }
4655 }
4656
4657 /* Return the CONST_DOUBLE representing the 80387 constant that is
4658 loaded by the specified special instruction. The argument IDX
4659 matches the return value from standard_80387_constant_p. */
4660
4661 rtx
4662 standard_80387_constant_rtx (int idx)
4663 {
4664 int i;
4665
4666 if (! ext_80387_constants_init)
4667 init_ext_80387_constants ();
4668
4669 switch (idx)
4670 {
4671 case 3:
4672 case 4:
4673 case 5:
4674 case 6:
4675 case 7:
4676 i = idx - 3;
4677 break;
4678
4679 default:
4680 gcc_unreachable ();
4681 }
4682
4683 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4684 XFmode);
4685 }
4686
4687 /* Return 1 if mode is a valid mode for sse. */
4688 static int
4689 standard_sse_mode_p (enum machine_mode mode)
4690 {
4691 switch (mode)
4692 {
4693 case V16QImode:
4694 case V8HImode:
4695 case V4SImode:
4696 case V2DImode:
4697 case V4SFmode:
4698 case V2DFmode:
4699 return 1;
4700
4701 default:
4702 return 0;
4703 }
4704 }
4705
4706 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4707 */
4708 int
4709 standard_sse_constant_p (rtx x)
4710 {
4711 enum machine_mode mode = GET_MODE (x);
4712
4713 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
4714 return 1;
4715 if (vector_all_ones_operand (x, mode)
4716 && standard_sse_mode_p (mode))
4717 return TARGET_SSE2 ? 2 : -1;
4718
4719 return 0;
4720 }
4721
4722 /* Return the opcode of the special instruction to be used to load
4723 the constant X. */
4724
4725 const char *
4726 standard_sse_constant_opcode (rtx insn, rtx x)
4727 {
4728 switch (standard_sse_constant_p (x))
4729 {
4730 case 1:
4731 if (get_attr_mode (insn) == MODE_V4SF)
4732 return "xorps\t%0, %0";
4733 else if (get_attr_mode (insn) == MODE_V2DF)
4734 return "xorpd\t%0, %0";
4735 else
4736 return "pxor\t%0, %0";
4737 case 2:
4738 return "pcmpeqd\t%0, %0";
4739 }
4740 gcc_unreachable ();
4741 }
4742
4743 /* Returns 1 if OP contains a symbol reference */
4744
4745 int
4746 symbolic_reference_mentioned_p (rtx op)
4747 {
4748 const char *fmt;
4749 int i;
4750
4751 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4752 return 1;
4753
4754 fmt = GET_RTX_FORMAT (GET_CODE (op));
4755 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4756 {
4757 if (fmt[i] == 'E')
4758 {
4759 int j;
4760
4761 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4762 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4763 return 1;
4764 }
4765
4766 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4767 return 1;
4768 }
4769
4770 return 0;
4771 }
4772
4773 /* Return 1 if it is appropriate to emit `ret' instructions in the
4774 body of a function. Do this only if the epilogue is simple, needing a
4775 couple of insns. Prior to reloading, we can't tell how many registers
4776 must be saved, so return 0 then. Return 0 if there is no frame
4777 marker to de-allocate. */
4778
4779 int
4780 ix86_can_use_return_insn_p (void)
4781 {
4782 struct ix86_frame frame;
4783
4784 if (! reload_completed || frame_pointer_needed)
4785 return 0;
4786
4787 /* Don't allow more than 32 pop, since that's all we can do
4788 with one instruction. */
4789 if (current_function_pops_args
4790 && current_function_args_size >= 32768)
4791 return 0;
4792
4793 ix86_compute_frame_layout (&frame);
4794 return frame.to_allocate == 0 && frame.nregs == 0;
4795 }
4796 \f
4797 /* Value should be nonzero if functions must have frame pointers.
4798 Zero means the frame pointer need not be set up (and parms may
4799 be accessed via the stack pointer) in functions that seem suitable. */
4800
4801 int
4802 ix86_frame_pointer_required (void)
4803 {
4804 /* If we accessed previous frames, then the generated code expects
4805 to be able to access the saved ebp value in our frame. */
4806 if (cfun->machine->accesses_prev_frame)
4807 return 1;
4808
4809 /* Several x86 os'es need a frame pointer for other reasons,
4810 usually pertaining to setjmp. */
4811 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4812 return 1;
4813
4814 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4815 the frame pointer by default. Turn it back on now if we've not
4816 got a leaf function. */
4817 if (TARGET_OMIT_LEAF_FRAME_POINTER
4818 && (!current_function_is_leaf
4819 || ix86_current_function_calls_tls_descriptor))
4820 return 1;
4821
4822 if (current_function_profile)
4823 return 1;
4824
4825 return 0;
4826 }
4827
4828 /* Record that the current function accesses previous call frames. */
4829
4830 void
4831 ix86_setup_frame_addresses (void)
4832 {
4833 cfun->machine->accesses_prev_frame = 1;
4834 }
4835 \f
4836 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
4837 # define USE_HIDDEN_LINKONCE 1
4838 #else
4839 # define USE_HIDDEN_LINKONCE 0
4840 #endif
4841
4842 static int pic_labels_used;
4843
4844 /* Fills in the label name that should be used for a pc thunk for
4845 the given register. */
4846
4847 static void
4848 get_pc_thunk_name (char name[32], unsigned int regno)
4849 {
4850 gcc_assert (!TARGET_64BIT);
4851
4852 if (USE_HIDDEN_LINKONCE)
4853 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4854 else
4855 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4856 }
4857
4858
4859 /* This function generates code for -fpic that loads %ebx with
4860 the return address of the caller and then returns. */
4861
4862 void
4863 ix86_file_end (void)
4864 {
4865 rtx xops[2];
4866 int regno;
4867
4868 for (regno = 0; regno < 8; ++regno)
4869 {
4870 char name[32];
4871
4872 if (! ((pic_labels_used >> regno) & 1))
4873 continue;
4874
4875 get_pc_thunk_name (name, regno);
4876
4877 #if TARGET_MACHO
4878 if (TARGET_MACHO)
4879 {
4880 switch_to_section (darwin_sections[text_coal_section]);
4881 fputs ("\t.weak_definition\t", asm_out_file);
4882 assemble_name (asm_out_file, name);
4883 fputs ("\n\t.private_extern\t", asm_out_file);
4884 assemble_name (asm_out_file, name);
4885 fputs ("\n", asm_out_file);
4886 ASM_OUTPUT_LABEL (asm_out_file, name);
4887 }
4888 else
4889 #endif
4890 if (USE_HIDDEN_LINKONCE)
4891 {
4892 tree decl;
4893
4894 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4895 error_mark_node);
4896 TREE_PUBLIC (decl) = 1;
4897 TREE_STATIC (decl) = 1;
4898 DECL_ONE_ONLY (decl) = 1;
4899
4900 (*targetm.asm_out.unique_section) (decl, 0);
4901 switch_to_section (get_named_section (decl, NULL, 0));
4902
4903 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4904 fputs ("\t.hidden\t", asm_out_file);
4905 assemble_name (asm_out_file, name);
4906 fputc ('\n', asm_out_file);
4907 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4908 }
4909 else
4910 {
4911 switch_to_section (text_section);
4912 ASM_OUTPUT_LABEL (asm_out_file, name);
4913 }
4914
4915 xops[0] = gen_rtx_REG (SImode, regno);
4916 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4917 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4918 output_asm_insn ("ret", xops);
4919 }
4920
4921 if (NEED_INDICATE_EXEC_STACK)
4922 file_end_indicate_exec_stack ();
4923 }
4924
4925 /* Emit code for the SET_GOT patterns. */
4926
4927 const char *
4928 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
4929 {
4930 rtx xops[3];
4931
4932 xops[0] = dest;
4933 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4934
4935 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4936 {
4937 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
4938
4939 if (!flag_pic)
4940 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4941 else
4942 output_asm_insn ("call\t%a2", xops);
4943
4944 #if TARGET_MACHO
4945 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4946 is what will be referenced by the Mach-O PIC subsystem. */
4947 if (!label)
4948 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4949 #endif
4950
4951 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4952 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4953
4954 if (flag_pic)
4955 output_asm_insn ("pop{l}\t%0", xops);
4956 }
4957 else
4958 {
4959 char name[32];
4960 get_pc_thunk_name (name, REGNO (dest));
4961 pic_labels_used |= 1 << REGNO (dest);
4962
4963 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4964 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4965 output_asm_insn ("call\t%X2", xops);
4966 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4967 is what will be referenced by the Mach-O PIC subsystem. */
4968 #if TARGET_MACHO
4969 if (!label)
4970 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4971 else
4972 targetm.asm_out.internal_label (asm_out_file, "L",
4973 CODE_LABEL_NUMBER (label));
4974 #endif
4975 }
4976
4977 if (TARGET_MACHO)
4978 return "";
4979
4980 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4981 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4982 else
4983 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
4984
4985 return "";
4986 }
4987
4988 /* Generate an "push" pattern for input ARG. */
4989
4990 static rtx
4991 gen_push (rtx arg)
4992 {
4993 return gen_rtx_SET (VOIDmode,
4994 gen_rtx_MEM (Pmode,
4995 gen_rtx_PRE_DEC (Pmode,
4996 stack_pointer_rtx)),
4997 arg);
4998 }
4999
5000 /* Return >= 0 if there is an unused call-clobbered register available
5001 for the entire function. */
5002
5003 static unsigned int
5004 ix86_select_alt_pic_regnum (void)
5005 {
5006 if (current_function_is_leaf && !current_function_profile
5007 && !ix86_current_function_calls_tls_descriptor)
5008 {
5009 int i;
5010 for (i = 2; i >= 0; --i)
5011 if (!regs_ever_live[i])
5012 return i;
5013 }
5014
5015 return INVALID_REGNUM;
5016 }
5017
5018 /* Return 1 if we need to save REGNO. */
5019 static int
5020 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5021 {
5022 if (pic_offset_table_rtx
5023 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5024 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5025 || current_function_profile
5026 || current_function_calls_eh_return
5027 || current_function_uses_const_pool))
5028 {
5029 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5030 return 0;
5031 return 1;
5032 }
5033
5034 if (current_function_calls_eh_return && maybe_eh_return)
5035 {
5036 unsigned i;
5037 for (i = 0; ; i++)
5038 {
5039 unsigned test = EH_RETURN_DATA_REGNO (i);
5040 if (test == INVALID_REGNUM)
5041 break;
5042 if (test == regno)
5043 return 1;
5044 }
5045 }
5046
5047 if (cfun->machine->force_align_arg_pointer
5048 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5049 return 1;
5050
5051 return (regs_ever_live[regno]
5052 && !call_used_regs[regno]
5053 && !fixed_regs[regno]
5054 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5055 }
5056
5057 /* Return number of registers to be saved on the stack. */
5058
5059 static int
5060 ix86_nsaved_regs (void)
5061 {
5062 int nregs = 0;
5063 int regno;
5064
5065 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5066 if (ix86_save_reg (regno, true))
5067 nregs++;
5068 return nregs;
5069 }
5070
5071 /* Return the offset between two registers, one to be eliminated, and the other
5072 its replacement, at the start of a routine. */
5073
5074 HOST_WIDE_INT
5075 ix86_initial_elimination_offset (int from, int to)
5076 {
5077 struct ix86_frame frame;
5078 ix86_compute_frame_layout (&frame);
5079
5080 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5081 return frame.hard_frame_pointer_offset;
5082 else if (from == FRAME_POINTER_REGNUM
5083 && to == HARD_FRAME_POINTER_REGNUM)
5084 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5085 else
5086 {
5087 gcc_assert (to == STACK_POINTER_REGNUM);
5088
5089 if (from == ARG_POINTER_REGNUM)
5090 return frame.stack_pointer_offset;
5091
5092 gcc_assert (from == FRAME_POINTER_REGNUM);
5093 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5094 }
5095 }
5096
5097 /* Fill structure ix86_frame about frame of currently computed function. */
5098
5099 static void
5100 ix86_compute_frame_layout (struct ix86_frame *frame)
5101 {
5102 HOST_WIDE_INT total_size;
5103 unsigned int stack_alignment_needed;
5104 HOST_WIDE_INT offset;
5105 unsigned int preferred_alignment;
5106 HOST_WIDE_INT size = get_frame_size ();
5107
5108 frame->nregs = ix86_nsaved_regs ();
5109 total_size = size;
5110
5111 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5112 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5113
5114 /* During reload iteration the amount of registers saved can change.
5115 Recompute the value as needed. Do not recompute when amount of registers
5116 didn't change as reload does multiple calls to the function and does not
5117 expect the decision to change within single iteration. */
5118 if (!optimize_size
5119 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5120 {
5121 int count = frame->nregs;
5122
5123 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5124 /* The fast prologue uses move instead of push to save registers. This
5125 is significantly longer, but also executes faster as modern hardware
5126 can execute the moves in parallel, but can't do that for push/pop.
5127
5128 Be careful about choosing what prologue to emit: When function takes
5129 many instructions to execute we may use slow version as well as in
5130 case function is known to be outside hot spot (this is known with
5131 feedback only). Weight the size of function by number of registers
5132 to save as it is cheap to use one or two push instructions but very
5133 slow to use many of them. */
5134 if (count)
5135 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5136 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5137 || (flag_branch_probabilities
5138 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5139 cfun->machine->use_fast_prologue_epilogue = false;
5140 else
5141 cfun->machine->use_fast_prologue_epilogue
5142 = !expensive_function_p (count);
5143 }
5144 if (TARGET_PROLOGUE_USING_MOVE
5145 && cfun->machine->use_fast_prologue_epilogue)
5146 frame->save_regs_using_mov = true;
5147 else
5148 frame->save_regs_using_mov = false;
5149
5150
5151 /* Skip return address and saved base pointer. */
5152 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5153
5154 frame->hard_frame_pointer_offset = offset;
5155
5156 /* Do some sanity checking of stack_alignment_needed and
5157 preferred_alignment, since i386 port is the only using those features
5158 that may break easily. */
5159
5160 gcc_assert (!size || stack_alignment_needed);
5161 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5162 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5163 gcc_assert (stack_alignment_needed
5164 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5165
5166 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5167 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5168
5169 /* Register save area */
5170 offset += frame->nregs * UNITS_PER_WORD;
5171
5172 /* Va-arg area */
5173 if (ix86_save_varrargs_registers)
5174 {
5175 offset += X86_64_VARARGS_SIZE;
5176 frame->va_arg_size = X86_64_VARARGS_SIZE;
5177 }
5178 else
5179 frame->va_arg_size = 0;
5180
5181 /* Align start of frame for local function. */
5182 frame->padding1 = ((offset + stack_alignment_needed - 1)
5183 & -stack_alignment_needed) - offset;
5184
5185 offset += frame->padding1;
5186
5187 /* Frame pointer points here. */
5188 frame->frame_pointer_offset = offset;
5189
5190 offset += size;
5191
5192 /* Add outgoing arguments area. Can be skipped if we eliminated
5193 all the function calls as dead code.
5194 Skipping is however impossible when function calls alloca. Alloca
5195 expander assumes that last current_function_outgoing_args_size
5196 of stack frame are unused. */
5197 if (ACCUMULATE_OUTGOING_ARGS
5198 && (!current_function_is_leaf || current_function_calls_alloca
5199 || ix86_current_function_calls_tls_descriptor))
5200 {
5201 offset += current_function_outgoing_args_size;
5202 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5203 }
5204 else
5205 frame->outgoing_arguments_size = 0;
5206
5207 /* Align stack boundary. Only needed if we're calling another function
5208 or using alloca. */
5209 if (!current_function_is_leaf || current_function_calls_alloca
5210 || ix86_current_function_calls_tls_descriptor)
5211 frame->padding2 = ((offset + preferred_alignment - 1)
5212 & -preferred_alignment) - offset;
5213 else
5214 frame->padding2 = 0;
5215
5216 offset += frame->padding2;
5217
5218 /* We've reached end of stack frame. */
5219 frame->stack_pointer_offset = offset;
5220
5221 /* Size prologue needs to allocate. */
5222 frame->to_allocate =
5223 (size + frame->padding1 + frame->padding2
5224 + frame->outgoing_arguments_size + frame->va_arg_size);
5225
5226 if ((!frame->to_allocate && frame->nregs <= 1)
5227 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5228 frame->save_regs_using_mov = false;
5229
5230 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5231 && current_function_is_leaf
5232 && !ix86_current_function_calls_tls_descriptor)
5233 {
5234 frame->red_zone_size = frame->to_allocate;
5235 if (frame->save_regs_using_mov)
5236 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5237 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5238 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5239 }
5240 else
5241 frame->red_zone_size = 0;
5242 frame->to_allocate -= frame->red_zone_size;
5243 frame->stack_pointer_offset -= frame->red_zone_size;
5244 #if 0
5245 fprintf (stderr, "nregs: %i\n", frame->nregs);
5246 fprintf (stderr, "size: %i\n", size);
5247 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5248 fprintf (stderr, "padding1: %i\n", frame->padding1);
5249 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5250 fprintf (stderr, "padding2: %i\n", frame->padding2);
5251 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5252 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5253 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5254 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5255 frame->hard_frame_pointer_offset);
5256 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5257 #endif
5258 }
5259
5260 /* Emit code to save registers in the prologue. */
5261
5262 static void
5263 ix86_emit_save_regs (void)
5264 {
5265 unsigned int regno;
5266 rtx insn;
5267
5268 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5269 if (ix86_save_reg (regno, true))
5270 {
5271 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5272 RTX_FRAME_RELATED_P (insn) = 1;
5273 }
5274 }
5275
5276 /* Emit code to save registers using MOV insns. First register
5277 is restored from POINTER + OFFSET. */
5278 static void
5279 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5280 {
5281 unsigned int regno;
5282 rtx insn;
5283
5284 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5285 if (ix86_save_reg (regno, true))
5286 {
5287 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5288 Pmode, offset),
5289 gen_rtx_REG (Pmode, regno));
5290 RTX_FRAME_RELATED_P (insn) = 1;
5291 offset += UNITS_PER_WORD;
5292 }
5293 }
5294
5295 /* Expand prologue or epilogue stack adjustment.
5296 The pattern exist to put a dependency on all ebp-based memory accesses.
5297 STYLE should be negative if instructions should be marked as frame related,
5298 zero if %r11 register is live and cannot be freely used and positive
5299 otherwise. */
5300
5301 static void
5302 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5303 {
5304 rtx insn;
5305
5306 if (! TARGET_64BIT)
5307 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5308 else if (x86_64_immediate_operand (offset, DImode))
5309 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5310 else
5311 {
5312 rtx r11;
5313 /* r11 is used by indirect sibcall return as well, set before the
5314 epilogue and used after the epilogue. ATM indirect sibcall
5315 shouldn't be used together with huge frame sizes in one
5316 function because of the frame_size check in sibcall.c. */
5317 gcc_assert (style);
5318 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5319 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5320 if (style < 0)
5321 RTX_FRAME_RELATED_P (insn) = 1;
5322 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5323 offset));
5324 }
5325 if (style < 0)
5326 RTX_FRAME_RELATED_P (insn) = 1;
5327 }
5328
5329 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5330
5331 static rtx
5332 ix86_internal_arg_pointer (void)
5333 {
5334 bool has_force_align_arg_pointer =
5335 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5336 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5337 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5338 && DECL_NAME (current_function_decl)
5339 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5340 && DECL_FILE_SCOPE_P (current_function_decl))
5341 || ix86_force_align_arg_pointer
5342 || has_force_align_arg_pointer)
5343 {
5344 /* Nested functions can't realign the stack due to a register
5345 conflict. */
5346 if (DECL_CONTEXT (current_function_decl)
5347 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5348 {
5349 if (ix86_force_align_arg_pointer)
5350 warning (0, "-mstackrealign ignored for nested functions");
5351 if (has_force_align_arg_pointer)
5352 error ("%s not supported for nested functions",
5353 ix86_force_align_arg_pointer_string);
5354 return virtual_incoming_args_rtx;
5355 }
5356 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5357 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5358 }
5359 else
5360 return virtual_incoming_args_rtx;
5361 }
5362
5363 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5364 This is called from dwarf2out.c to emit call frame instructions
5365 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5366 static void
5367 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5368 {
5369 rtx unspec = SET_SRC (pattern);
5370 gcc_assert (GET_CODE (unspec) == UNSPEC);
5371
5372 switch (index)
5373 {
5374 case UNSPEC_REG_SAVE:
5375 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5376 SET_DEST (pattern));
5377 break;
5378 case UNSPEC_DEF_CFA:
5379 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5380 INTVAL (XVECEXP (unspec, 0, 0)));
5381 break;
5382 default:
5383 gcc_unreachable ();
5384 }
5385 }
5386
5387 /* Expand the prologue into a bunch of separate insns. */
5388
5389 void
5390 ix86_expand_prologue (void)
5391 {
5392 rtx insn;
5393 bool pic_reg_used;
5394 struct ix86_frame frame;
5395 HOST_WIDE_INT allocate;
5396
5397 ix86_compute_frame_layout (&frame);
5398
5399 if (cfun->machine->force_align_arg_pointer)
5400 {
5401 rtx x, y;
5402
5403 /* Grab the argument pointer. */
5404 x = plus_constant (stack_pointer_rtx, 4);
5405 y = cfun->machine->force_align_arg_pointer;
5406 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5407 RTX_FRAME_RELATED_P (insn) = 1;
5408
5409 /* The unwind info consists of two parts: install the fafp as the cfa,
5410 and record the fafp as the "save register" of the stack pointer.
5411 The later is there in order that the unwinder can see where it
5412 should restore the stack pointer across the and insn. */
5413 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5414 x = gen_rtx_SET (VOIDmode, y, x);
5415 RTX_FRAME_RELATED_P (x) = 1;
5416 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5417 UNSPEC_REG_SAVE);
5418 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5419 RTX_FRAME_RELATED_P (y) = 1;
5420 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5421 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5422 REG_NOTES (insn) = x;
5423
5424 /* Align the stack. */
5425 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5426 GEN_INT (-16)));
5427
5428 /* And here we cheat like madmen with the unwind info. We force the
5429 cfa register back to sp+4, which is exactly what it was at the
5430 start of the function. Re-pushing the return address results in
5431 the return at the same spot relative to the cfa, and thus is
5432 correct wrt the unwind info. */
5433 x = cfun->machine->force_align_arg_pointer;
5434 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5435 insn = emit_insn (gen_push (x));
5436 RTX_FRAME_RELATED_P (insn) = 1;
5437
5438 x = GEN_INT (4);
5439 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5440 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5441 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5442 REG_NOTES (insn) = x;
5443 }
5444
5445 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5446 slower on all targets. Also sdb doesn't like it. */
5447
5448 if (frame_pointer_needed)
5449 {
5450 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5451 RTX_FRAME_RELATED_P (insn) = 1;
5452
5453 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5454 RTX_FRAME_RELATED_P (insn) = 1;
5455 }
5456
5457 allocate = frame.to_allocate;
5458
5459 if (!frame.save_regs_using_mov)
5460 ix86_emit_save_regs ();
5461 else
5462 allocate += frame.nregs * UNITS_PER_WORD;
5463
5464 /* When using red zone we may start register saving before allocating
5465 the stack frame saving one cycle of the prologue. */
5466 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5467 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5468 : stack_pointer_rtx,
5469 -frame.nregs * UNITS_PER_WORD);
5470
5471 if (allocate == 0)
5472 ;
5473 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5474 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5475 GEN_INT (-allocate), -1);
5476 else
5477 {
5478 /* Only valid for Win32. */
5479 rtx eax = gen_rtx_REG (SImode, 0);
5480 bool eax_live = ix86_eax_live_at_start_p ();
5481 rtx t;
5482
5483 gcc_assert (!TARGET_64BIT);
5484
5485 if (eax_live)
5486 {
5487 emit_insn (gen_push (eax));
5488 allocate -= 4;
5489 }
5490
5491 emit_move_insn (eax, GEN_INT (allocate));
5492
5493 insn = emit_insn (gen_allocate_stack_worker (eax));
5494 RTX_FRAME_RELATED_P (insn) = 1;
5495 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5496 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5497 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5498 t, REG_NOTES (insn));
5499
5500 if (eax_live)
5501 {
5502 if (frame_pointer_needed)
5503 t = plus_constant (hard_frame_pointer_rtx,
5504 allocate
5505 - frame.to_allocate
5506 - frame.nregs * UNITS_PER_WORD);
5507 else
5508 t = plus_constant (stack_pointer_rtx, allocate);
5509 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5510 }
5511 }
5512
5513 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5514 {
5515 if (!frame_pointer_needed || !frame.to_allocate)
5516 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5517 else
5518 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5519 -frame.nregs * UNITS_PER_WORD);
5520 }
5521
5522 pic_reg_used = false;
5523 if (pic_offset_table_rtx
5524 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5525 || current_function_profile))
5526 {
5527 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5528
5529 if (alt_pic_reg_used != INVALID_REGNUM)
5530 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5531
5532 pic_reg_used = true;
5533 }
5534
5535 if (pic_reg_used)
5536 {
5537 if (TARGET_64BIT)
5538 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5539 else
5540 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5541
5542 /* Even with accurate pre-reload life analysis, we can wind up
5543 deleting all references to the pic register after reload.
5544 Consider if cross-jumping unifies two sides of a branch
5545 controlled by a comparison vs the only read from a global.
5546 In which case, allow the set_got to be deleted, though we're
5547 too late to do anything about the ebx save in the prologue. */
5548 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5549 }
5550
5551 /* Prevent function calls from be scheduled before the call to mcount.
5552 In the pic_reg_used case, make sure that the got load isn't deleted. */
5553 if (current_function_profile)
5554 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5555 }
5556
5557 /* Emit code to restore saved registers using MOV insns. First register
5558 is restored from POINTER + OFFSET. */
5559 static void
5560 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5561 int maybe_eh_return)
5562 {
5563 int regno;
5564 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5565
5566 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5567 if (ix86_save_reg (regno, maybe_eh_return))
5568 {
5569 /* Ensure that adjust_address won't be forced to produce pointer
5570 out of range allowed by x86-64 instruction set. */
5571 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5572 {
5573 rtx r11;
5574
5575 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5576 emit_move_insn (r11, GEN_INT (offset));
5577 emit_insn (gen_adddi3 (r11, r11, pointer));
5578 base_address = gen_rtx_MEM (Pmode, r11);
5579 offset = 0;
5580 }
5581 emit_move_insn (gen_rtx_REG (Pmode, regno),
5582 adjust_address (base_address, Pmode, offset));
5583 offset += UNITS_PER_WORD;
5584 }
5585 }
5586
5587 /* Restore function stack, frame, and registers. */
5588
5589 void
5590 ix86_expand_epilogue (int style)
5591 {
5592 int regno;
5593 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5594 struct ix86_frame frame;
5595 HOST_WIDE_INT offset;
5596
5597 ix86_compute_frame_layout (&frame);
5598
5599 /* Calculate start of saved registers relative to ebp. Special care
5600 must be taken for the normal return case of a function using
5601 eh_return: the eax and edx registers are marked as saved, but not
5602 restored along this path. */
5603 offset = frame.nregs;
5604 if (current_function_calls_eh_return && style != 2)
5605 offset -= 2;
5606 offset *= -UNITS_PER_WORD;
5607
5608 /* If we're only restoring one register and sp is not valid then
5609 using a move instruction to restore the register since it's
5610 less work than reloading sp and popping the register.
5611
5612 The default code result in stack adjustment using add/lea instruction,
5613 while this code results in LEAVE instruction (or discrete equivalent),
5614 so it is profitable in some other cases as well. Especially when there
5615 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5616 and there is exactly one register to pop. This heuristic may need some
5617 tuning in future. */
5618 if ((!sp_valid && frame.nregs <= 1)
5619 || (TARGET_EPILOGUE_USING_MOVE
5620 && cfun->machine->use_fast_prologue_epilogue
5621 && (frame.nregs > 1 || frame.to_allocate))
5622 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5623 || (frame_pointer_needed && TARGET_USE_LEAVE
5624 && cfun->machine->use_fast_prologue_epilogue
5625 && frame.nregs == 1)
5626 || current_function_calls_eh_return)
5627 {
5628 /* Restore registers. We can use ebp or esp to address the memory
5629 locations. If both are available, default to ebp, since offsets
5630 are known to be small. Only exception is esp pointing directly to the
5631 end of block of saved registers, where we may simplify addressing
5632 mode. */
5633
5634 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5635 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5636 frame.to_allocate, style == 2);
5637 else
5638 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5639 offset, style == 2);
5640
5641 /* eh_return epilogues need %ecx added to the stack pointer. */
5642 if (style == 2)
5643 {
5644 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5645
5646 if (frame_pointer_needed)
5647 {
5648 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5649 tmp = plus_constant (tmp, UNITS_PER_WORD);
5650 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5651
5652 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5653 emit_move_insn (hard_frame_pointer_rtx, tmp);
5654
5655 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5656 const0_rtx, style);
5657 }
5658 else
5659 {
5660 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5661 tmp = plus_constant (tmp, (frame.to_allocate
5662 + frame.nregs * UNITS_PER_WORD));
5663 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5664 }
5665 }
5666 else if (!frame_pointer_needed)
5667 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5668 GEN_INT (frame.to_allocate
5669 + frame.nregs * UNITS_PER_WORD),
5670 style);
5671 /* If not an i386, mov & pop is faster than "leave". */
5672 else if (TARGET_USE_LEAVE || optimize_size
5673 || !cfun->machine->use_fast_prologue_epilogue)
5674 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5675 else
5676 {
5677 pro_epilogue_adjust_stack (stack_pointer_rtx,
5678 hard_frame_pointer_rtx,
5679 const0_rtx, style);
5680 if (TARGET_64BIT)
5681 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5682 else
5683 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5684 }
5685 }
5686 else
5687 {
5688 /* First step is to deallocate the stack frame so that we can
5689 pop the registers. */
5690 if (!sp_valid)
5691 {
5692 gcc_assert (frame_pointer_needed);
5693 pro_epilogue_adjust_stack (stack_pointer_rtx,
5694 hard_frame_pointer_rtx,
5695 GEN_INT (offset), style);
5696 }
5697 else if (frame.to_allocate)
5698 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5699 GEN_INT (frame.to_allocate), style);
5700
5701 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5702 if (ix86_save_reg (regno, false))
5703 {
5704 if (TARGET_64BIT)
5705 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5706 else
5707 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5708 }
5709 if (frame_pointer_needed)
5710 {
5711 /* Leave results in shorter dependency chains on CPUs that are
5712 able to grok it fast. */
5713 if (TARGET_USE_LEAVE)
5714 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5715 else if (TARGET_64BIT)
5716 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5717 else
5718 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5719 }
5720 }
5721
5722 if (cfun->machine->force_align_arg_pointer)
5723 {
5724 emit_insn (gen_addsi3 (stack_pointer_rtx,
5725 cfun->machine->force_align_arg_pointer,
5726 GEN_INT (-4)));
5727 }
5728
5729 /* Sibcall epilogues don't want a return instruction. */
5730 if (style == 0)
5731 return;
5732
5733 if (current_function_pops_args && current_function_args_size)
5734 {
5735 rtx popc = GEN_INT (current_function_pops_args);
5736
5737 /* i386 can only pop 64K bytes. If asked to pop more, pop
5738 return address, do explicit add, and jump indirectly to the
5739 caller. */
5740
5741 if (current_function_pops_args >= 65536)
5742 {
5743 rtx ecx = gen_rtx_REG (SImode, 2);
5744
5745 /* There is no "pascal" calling convention in 64bit ABI. */
5746 gcc_assert (!TARGET_64BIT);
5747
5748 emit_insn (gen_popsi1 (ecx));
5749 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5750 emit_jump_insn (gen_return_indirect_internal (ecx));
5751 }
5752 else
5753 emit_jump_insn (gen_return_pop_internal (popc));
5754 }
5755 else
5756 emit_jump_insn (gen_return_internal ());
5757 }
5758
5759 /* Reset from the function's potential modifications. */
5760
5761 static void
5762 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5763 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5764 {
5765 if (pic_offset_table_rtx)
5766 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5767 #if TARGET_MACHO
5768 /* Mach-O doesn't support labels at the end of objects, so if
5769 it looks like we might want one, insert a NOP. */
5770 {
5771 rtx insn = get_last_insn ();
5772 while (insn
5773 && NOTE_P (insn)
5774 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
5775 insn = PREV_INSN (insn);
5776 if (insn
5777 && (LABEL_P (insn)
5778 || (NOTE_P (insn)
5779 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
5780 fputs ("\tnop\n", file);
5781 }
5782 #endif
5783
5784 }
5785 \f
5786 /* Extract the parts of an RTL expression that is a valid memory address
5787 for an instruction. Return 0 if the structure of the address is
5788 grossly off. Return -1 if the address contains ASHIFT, so it is not
5789 strictly valid, but still used for computing length of lea instruction. */
5790
5791 int
5792 ix86_decompose_address (rtx addr, struct ix86_address *out)
5793 {
5794 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
5795 rtx base_reg, index_reg;
5796 HOST_WIDE_INT scale = 1;
5797 rtx scale_rtx = NULL_RTX;
5798 int retval = 1;
5799 enum ix86_address_seg seg = SEG_DEFAULT;
5800
5801 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5802 base = addr;
5803 else if (GET_CODE (addr) == PLUS)
5804 {
5805 rtx addends[4], op;
5806 int n = 0, i;
5807
5808 op = addr;
5809 do
5810 {
5811 if (n >= 4)
5812 return 0;
5813 addends[n++] = XEXP (op, 1);
5814 op = XEXP (op, 0);
5815 }
5816 while (GET_CODE (op) == PLUS);
5817 if (n >= 4)
5818 return 0;
5819 addends[n] = op;
5820
5821 for (i = n; i >= 0; --i)
5822 {
5823 op = addends[i];
5824 switch (GET_CODE (op))
5825 {
5826 case MULT:
5827 if (index)
5828 return 0;
5829 index = XEXP (op, 0);
5830 scale_rtx = XEXP (op, 1);
5831 break;
5832
5833 case UNSPEC:
5834 if (XINT (op, 1) == UNSPEC_TP
5835 && TARGET_TLS_DIRECT_SEG_REFS
5836 && seg == SEG_DEFAULT)
5837 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5838 else
5839 return 0;
5840 break;
5841
5842 case REG:
5843 case SUBREG:
5844 if (!base)
5845 base = op;
5846 else if (!index)
5847 index = op;
5848 else
5849 return 0;
5850 break;
5851
5852 case CONST:
5853 case CONST_INT:
5854 case SYMBOL_REF:
5855 case LABEL_REF:
5856 if (disp)
5857 return 0;
5858 disp = op;
5859 break;
5860
5861 default:
5862 return 0;
5863 }
5864 }
5865 }
5866 else if (GET_CODE (addr) == MULT)
5867 {
5868 index = XEXP (addr, 0); /* index*scale */
5869 scale_rtx = XEXP (addr, 1);
5870 }
5871 else if (GET_CODE (addr) == ASHIFT)
5872 {
5873 rtx tmp;
5874
5875 /* We're called for lea too, which implements ashift on occasion. */
5876 index = XEXP (addr, 0);
5877 tmp = XEXP (addr, 1);
5878 if (GET_CODE (tmp) != CONST_INT)
5879 return 0;
5880 scale = INTVAL (tmp);
5881 if ((unsigned HOST_WIDE_INT) scale > 3)
5882 return 0;
5883 scale = 1 << scale;
5884 retval = -1;
5885 }
5886 else
5887 disp = addr; /* displacement */
5888
5889 /* Extract the integral value of scale. */
5890 if (scale_rtx)
5891 {
5892 if (GET_CODE (scale_rtx) != CONST_INT)
5893 return 0;
5894 scale = INTVAL (scale_rtx);
5895 }
5896
5897 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
5898 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
5899
5900 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5901 if (base_reg && index_reg && scale == 1
5902 && (index_reg == arg_pointer_rtx
5903 || index_reg == frame_pointer_rtx
5904 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
5905 {
5906 rtx tmp;
5907 tmp = base, base = index, index = tmp;
5908 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
5909 }
5910
5911 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5912 if ((base_reg == hard_frame_pointer_rtx
5913 || base_reg == frame_pointer_rtx
5914 || base_reg == arg_pointer_rtx) && !disp)
5915 disp = const0_rtx;
5916
5917 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5918 Avoid this by transforming to [%esi+0]. */
5919 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5920 && base_reg && !index_reg && !disp
5921 && REG_P (base_reg)
5922 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
5923 disp = const0_rtx;
5924
5925 /* Special case: encode reg+reg instead of reg*2. */
5926 if (!base && index && scale && scale == 2)
5927 base = index, base_reg = index_reg, scale = 1;
5928
5929 /* Special case: scaling cannot be encoded without base or displacement. */
5930 if (!base && !disp && index && scale != 1)
5931 disp = const0_rtx;
5932
5933 out->base = base;
5934 out->index = index;
5935 out->disp = disp;
5936 out->scale = scale;
5937 out->seg = seg;
5938
5939 return retval;
5940 }
5941 \f
5942 /* Return cost of the memory address x.
5943 For i386, it is better to use a complex address than let gcc copy
5944 the address into a reg and make a new pseudo. But not if the address
5945 requires to two regs - that would mean more pseudos with longer
5946 lifetimes. */
5947 static int
5948 ix86_address_cost (rtx x)
5949 {
5950 struct ix86_address parts;
5951 int cost = 1;
5952 int ok = ix86_decompose_address (x, &parts);
5953
5954 gcc_assert (ok);
5955
5956 if (parts.base && GET_CODE (parts.base) == SUBREG)
5957 parts.base = SUBREG_REG (parts.base);
5958 if (parts.index && GET_CODE (parts.index) == SUBREG)
5959 parts.index = SUBREG_REG (parts.index);
5960
5961 /* More complex memory references are better. */
5962 if (parts.disp && parts.disp != const0_rtx)
5963 cost--;
5964 if (parts.seg != SEG_DEFAULT)
5965 cost--;
5966
5967 /* Attempt to minimize number of registers in the address. */
5968 if ((parts.base
5969 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5970 || (parts.index
5971 && (!REG_P (parts.index)
5972 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5973 cost++;
5974
5975 if (parts.base
5976 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5977 && parts.index
5978 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5979 && parts.base != parts.index)
5980 cost++;
5981
5982 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5983 since it's predecode logic can't detect the length of instructions
5984 and it degenerates to vector decoded. Increase cost of such
5985 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5986 to split such addresses or even refuse such addresses at all.
5987
5988 Following addressing modes are affected:
5989 [base+scale*index]
5990 [scale*index+disp]
5991 [base+index]
5992
5993 The first and last case may be avoidable by explicitly coding the zero in
5994 memory address, but I don't have AMD-K6 machine handy to check this
5995 theory. */
5996
5997 if (TARGET_K6
5998 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5999 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6000 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6001 cost += 10;
6002
6003 return cost;
6004 }
6005 \f
6006 /* If X is a machine specific address (i.e. a symbol or label being
6007 referenced as a displacement from the GOT implemented using an
6008 UNSPEC), then return the base term. Otherwise return X. */
6009
6010 rtx
6011 ix86_find_base_term (rtx x)
6012 {
6013 rtx term;
6014
6015 if (TARGET_64BIT)
6016 {
6017 if (GET_CODE (x) != CONST)
6018 return x;
6019 term = XEXP (x, 0);
6020 if (GET_CODE (term) == PLUS
6021 && (GET_CODE (XEXP (term, 1)) == CONST_INT
6022 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
6023 term = XEXP (term, 0);
6024 if (GET_CODE (term) != UNSPEC
6025 || XINT (term, 1) != UNSPEC_GOTPCREL)
6026 return x;
6027
6028 term = XVECEXP (term, 0, 0);
6029
6030 if (GET_CODE (term) != SYMBOL_REF
6031 && GET_CODE (term) != LABEL_REF)
6032 return x;
6033
6034 return term;
6035 }
6036
6037 term = ix86_delegitimize_address (x);
6038
6039 if (GET_CODE (term) != SYMBOL_REF
6040 && GET_CODE (term) != LABEL_REF)
6041 return x;
6042
6043 return term;
6044 }
6045
6046 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6047 this is used for to form addresses to local data when -fPIC is in
6048 use. */
6049
6050 static bool
6051 darwin_local_data_pic (rtx disp)
6052 {
6053 if (GET_CODE (disp) == MINUS)
6054 {
6055 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6056 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6057 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6058 {
6059 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6060 if (! strcmp (sym_name, "<pic base>"))
6061 return true;
6062 }
6063 }
6064
6065 return false;
6066 }
6067 \f
6068 /* Determine if a given RTX is a valid constant. We already know this
6069 satisfies CONSTANT_P. */
6070
6071 bool
6072 legitimate_constant_p (rtx x)
6073 {
6074 switch (GET_CODE (x))
6075 {
6076 case CONST:
6077 x = XEXP (x, 0);
6078
6079 if (GET_CODE (x) == PLUS)
6080 {
6081 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6082 return false;
6083 x = XEXP (x, 0);
6084 }
6085
6086 if (TARGET_MACHO && darwin_local_data_pic (x))
6087 return true;
6088
6089 /* Only some unspecs are valid as "constants". */
6090 if (GET_CODE (x) == UNSPEC)
6091 switch (XINT (x, 1))
6092 {
6093 case UNSPEC_GOTOFF:
6094 return TARGET_64BIT;
6095 case UNSPEC_TPOFF:
6096 case UNSPEC_NTPOFF:
6097 x = XVECEXP (x, 0, 0);
6098 return (GET_CODE (x) == SYMBOL_REF
6099 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6100 case UNSPEC_DTPOFF:
6101 x = XVECEXP (x, 0, 0);
6102 return (GET_CODE (x) == SYMBOL_REF
6103 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6104 default:
6105 return false;
6106 }
6107
6108 /* We must have drilled down to a symbol. */
6109 if (GET_CODE (x) == LABEL_REF)
6110 return true;
6111 if (GET_CODE (x) != SYMBOL_REF)
6112 return false;
6113 /* FALLTHRU */
6114
6115 case SYMBOL_REF:
6116 /* TLS symbols are never valid. */
6117 if (SYMBOL_REF_TLS_MODEL (x))
6118 return false;
6119 break;
6120
6121 case CONST_DOUBLE:
6122 if (GET_MODE (x) == TImode
6123 && x != CONST0_RTX (TImode)
6124 && !TARGET_64BIT)
6125 return false;
6126 break;
6127
6128 case CONST_VECTOR:
6129 if (x == CONST0_RTX (GET_MODE (x)))
6130 return true;
6131 return false;
6132
6133 default:
6134 break;
6135 }
6136
6137 /* Otherwise we handle everything else in the move patterns. */
6138 return true;
6139 }
6140
6141 /* Determine if it's legal to put X into the constant pool. This
6142 is not possible for the address of thread-local symbols, which
6143 is checked above. */
6144
6145 static bool
6146 ix86_cannot_force_const_mem (rtx x)
6147 {
6148 /* We can always put integral constants and vectors in memory. */
6149 switch (GET_CODE (x))
6150 {
6151 case CONST_INT:
6152 case CONST_DOUBLE:
6153 case CONST_VECTOR:
6154 return false;
6155
6156 default:
6157 break;
6158 }
6159 return !legitimate_constant_p (x);
6160 }
6161
6162 /* Determine if a given RTX is a valid constant address. */
6163
6164 bool
6165 constant_address_p (rtx x)
6166 {
6167 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6168 }
6169
6170 /* Nonzero if the constant value X is a legitimate general operand
6171 when generating PIC code. It is given that flag_pic is on and
6172 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6173
6174 bool
6175 legitimate_pic_operand_p (rtx x)
6176 {
6177 rtx inner;
6178
6179 switch (GET_CODE (x))
6180 {
6181 case CONST:
6182 inner = XEXP (x, 0);
6183 if (GET_CODE (inner) == PLUS
6184 && GET_CODE (XEXP (inner, 1)) == CONST_INT)
6185 inner = XEXP (inner, 0);
6186
6187 /* Only some unspecs are valid as "constants". */
6188 if (GET_CODE (inner) == UNSPEC)
6189 switch (XINT (inner, 1))
6190 {
6191 case UNSPEC_GOTOFF:
6192 return TARGET_64BIT;
6193 case UNSPEC_TPOFF:
6194 x = XVECEXP (inner, 0, 0);
6195 return (GET_CODE (x) == SYMBOL_REF
6196 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6197 default:
6198 return false;
6199 }
6200 /* FALLTHRU */
6201
6202 case SYMBOL_REF:
6203 case LABEL_REF:
6204 return legitimate_pic_address_disp_p (x);
6205
6206 default:
6207 return true;
6208 }
6209 }
6210
6211 /* Determine if a given CONST RTX is a valid memory displacement
6212 in PIC mode. */
6213
6214 int
6215 legitimate_pic_address_disp_p (rtx disp)
6216 {
6217 bool saw_plus;
6218
6219 /* In 64bit mode we can allow direct addresses of symbols and labels
6220 when they are not dynamic symbols. */
6221 if (TARGET_64BIT)
6222 {
6223 rtx op0 = disp, op1;
6224
6225 switch (GET_CODE (disp))
6226 {
6227 case LABEL_REF:
6228 return true;
6229
6230 case CONST:
6231 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6232 break;
6233 op0 = XEXP (XEXP (disp, 0), 0);
6234 op1 = XEXP (XEXP (disp, 0), 1);
6235 if (GET_CODE (op1) != CONST_INT
6236 || INTVAL (op1) >= 16*1024*1024
6237 || INTVAL (op1) < -16*1024*1024)
6238 break;
6239 if (GET_CODE (op0) == LABEL_REF)
6240 return true;
6241 if (GET_CODE (op0) != SYMBOL_REF)
6242 break;
6243 /* FALLTHRU */
6244
6245 case SYMBOL_REF:
6246 /* TLS references should always be enclosed in UNSPEC. */
6247 if (SYMBOL_REF_TLS_MODEL (op0))
6248 return false;
6249 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
6250 return true;
6251 break;
6252
6253 default:
6254 break;
6255 }
6256 }
6257 if (GET_CODE (disp) != CONST)
6258 return 0;
6259 disp = XEXP (disp, 0);
6260
6261 if (TARGET_64BIT)
6262 {
6263 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6264 of GOT tables. We should not need these anyway. */
6265 if (GET_CODE (disp) != UNSPEC
6266 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6267 && XINT (disp, 1) != UNSPEC_GOTOFF))
6268 return 0;
6269
6270 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6271 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6272 return 0;
6273 return 1;
6274 }
6275
6276 saw_plus = false;
6277 if (GET_CODE (disp) == PLUS)
6278 {
6279 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6280 return 0;
6281 disp = XEXP (disp, 0);
6282 saw_plus = true;
6283 }
6284
6285 if (TARGET_MACHO && darwin_local_data_pic (disp))
6286 return 1;
6287
6288 if (GET_CODE (disp) != UNSPEC)
6289 return 0;
6290
6291 switch (XINT (disp, 1))
6292 {
6293 case UNSPEC_GOT:
6294 if (saw_plus)
6295 return false;
6296 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6297 case UNSPEC_GOTOFF:
6298 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6299 While ABI specify also 32bit relocation but we don't produce it in
6300 small PIC model at all. */
6301 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6302 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6303 && !TARGET_64BIT)
6304 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6305 return false;
6306 case UNSPEC_GOTTPOFF:
6307 case UNSPEC_GOTNTPOFF:
6308 case UNSPEC_INDNTPOFF:
6309 if (saw_plus)
6310 return false;
6311 disp = XVECEXP (disp, 0, 0);
6312 return (GET_CODE (disp) == SYMBOL_REF
6313 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6314 case UNSPEC_NTPOFF:
6315 disp = XVECEXP (disp, 0, 0);
6316 return (GET_CODE (disp) == SYMBOL_REF
6317 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6318 case UNSPEC_DTPOFF:
6319 disp = XVECEXP (disp, 0, 0);
6320 return (GET_CODE (disp) == SYMBOL_REF
6321 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6322 }
6323
6324 return 0;
6325 }
6326
6327 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6328 memory address for an instruction. The MODE argument is the machine mode
6329 for the MEM expression that wants to use this address.
6330
6331 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6332 convert common non-canonical forms to canonical form so that they will
6333 be recognized. */
6334
6335 int
6336 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6337 {
6338 struct ix86_address parts;
6339 rtx base, index, disp;
6340 HOST_WIDE_INT scale;
6341 const char *reason = NULL;
6342 rtx reason_rtx = NULL_RTX;
6343
6344 if (TARGET_DEBUG_ADDR)
6345 {
6346 fprintf (stderr,
6347 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6348 GET_MODE_NAME (mode), strict);
6349 debug_rtx (addr);
6350 }
6351
6352 if (ix86_decompose_address (addr, &parts) <= 0)
6353 {
6354 reason = "decomposition failed";
6355 goto report_error;
6356 }
6357
6358 base = parts.base;
6359 index = parts.index;
6360 disp = parts.disp;
6361 scale = parts.scale;
6362
6363 /* Validate base register.
6364
6365 Don't allow SUBREG's that span more than a word here. It can lead to spill
6366 failures when the base is one word out of a two word structure, which is
6367 represented internally as a DImode int. */
6368
6369 if (base)
6370 {
6371 rtx reg;
6372 reason_rtx = base;
6373
6374 if (REG_P (base))
6375 reg = base;
6376 else if (GET_CODE (base) == SUBREG
6377 && REG_P (SUBREG_REG (base))
6378 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6379 <= UNITS_PER_WORD)
6380 reg = SUBREG_REG (base);
6381 else
6382 {
6383 reason = "base is not a register";
6384 goto report_error;
6385 }
6386
6387 if (GET_MODE (base) != Pmode)
6388 {
6389 reason = "base is not in Pmode";
6390 goto report_error;
6391 }
6392
6393 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6394 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6395 {
6396 reason = "base is not valid";
6397 goto report_error;
6398 }
6399 }
6400
6401 /* Validate index register.
6402
6403 Don't allow SUBREG's that span more than a word here -- same as above. */
6404
6405 if (index)
6406 {
6407 rtx reg;
6408 reason_rtx = index;
6409
6410 if (REG_P (index))
6411 reg = index;
6412 else if (GET_CODE (index) == SUBREG
6413 && REG_P (SUBREG_REG (index))
6414 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6415 <= UNITS_PER_WORD)
6416 reg = SUBREG_REG (index);
6417 else
6418 {
6419 reason = "index is not a register";
6420 goto report_error;
6421 }
6422
6423 if (GET_MODE (index) != Pmode)
6424 {
6425 reason = "index is not in Pmode";
6426 goto report_error;
6427 }
6428
6429 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6430 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6431 {
6432 reason = "index is not valid";
6433 goto report_error;
6434 }
6435 }
6436
6437 /* Validate scale factor. */
6438 if (scale != 1)
6439 {
6440 reason_rtx = GEN_INT (scale);
6441 if (!index)
6442 {
6443 reason = "scale without index";
6444 goto report_error;
6445 }
6446
6447 if (scale != 2 && scale != 4 && scale != 8)
6448 {
6449 reason = "scale is not a valid multiplier";
6450 goto report_error;
6451 }
6452 }
6453
6454 /* Validate displacement. */
6455 if (disp)
6456 {
6457 reason_rtx = disp;
6458
6459 if (GET_CODE (disp) == CONST
6460 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6461 switch (XINT (XEXP (disp, 0), 1))
6462 {
6463 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6464 used. While ABI specify also 32bit relocations, we don't produce
6465 them at all and use IP relative instead. */
6466 case UNSPEC_GOT:
6467 case UNSPEC_GOTOFF:
6468 gcc_assert (flag_pic);
6469 if (!TARGET_64BIT)
6470 goto is_legitimate_pic;
6471 reason = "64bit address unspec";
6472 goto report_error;
6473
6474 case UNSPEC_GOTPCREL:
6475 gcc_assert (flag_pic);
6476 goto is_legitimate_pic;
6477
6478 case UNSPEC_GOTTPOFF:
6479 case UNSPEC_GOTNTPOFF:
6480 case UNSPEC_INDNTPOFF:
6481 case UNSPEC_NTPOFF:
6482 case UNSPEC_DTPOFF:
6483 break;
6484
6485 default:
6486 reason = "invalid address unspec";
6487 goto report_error;
6488 }
6489
6490 else if (SYMBOLIC_CONST (disp)
6491 && (flag_pic
6492 || (TARGET_MACHO
6493 #if TARGET_MACHO
6494 && MACHOPIC_INDIRECT
6495 && !machopic_operand_p (disp)
6496 #endif
6497 )))
6498 {
6499
6500 is_legitimate_pic:
6501 if (TARGET_64BIT && (index || base))
6502 {
6503 /* foo@dtpoff(%rX) is ok. */
6504 if (GET_CODE (disp) != CONST
6505 || GET_CODE (XEXP (disp, 0)) != PLUS
6506 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6507 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6508 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6509 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6510 {
6511 reason = "non-constant pic memory reference";
6512 goto report_error;
6513 }
6514 }
6515 else if (! legitimate_pic_address_disp_p (disp))
6516 {
6517 reason = "displacement is an invalid pic construct";
6518 goto report_error;
6519 }
6520
6521 /* This code used to verify that a symbolic pic displacement
6522 includes the pic_offset_table_rtx register.
6523
6524 While this is good idea, unfortunately these constructs may
6525 be created by "adds using lea" optimization for incorrect
6526 code like:
6527
6528 int a;
6529 int foo(int i)
6530 {
6531 return *(&a+i);
6532 }
6533
6534 This code is nonsensical, but results in addressing
6535 GOT table with pic_offset_table_rtx base. We can't
6536 just refuse it easily, since it gets matched by
6537 "addsi3" pattern, that later gets split to lea in the
6538 case output register differs from input. While this
6539 can be handled by separate addsi pattern for this case
6540 that never results in lea, this seems to be easier and
6541 correct fix for crash to disable this test. */
6542 }
6543 else if (GET_CODE (disp) != LABEL_REF
6544 && GET_CODE (disp) != CONST_INT
6545 && (GET_CODE (disp) != CONST
6546 || !legitimate_constant_p (disp))
6547 && (GET_CODE (disp) != SYMBOL_REF
6548 || !legitimate_constant_p (disp)))
6549 {
6550 reason = "displacement is not constant";
6551 goto report_error;
6552 }
6553 else if (TARGET_64BIT
6554 && !x86_64_immediate_operand (disp, VOIDmode))
6555 {
6556 reason = "displacement is out of range";
6557 goto report_error;
6558 }
6559 }
6560
6561 /* Everything looks valid. */
6562 if (TARGET_DEBUG_ADDR)
6563 fprintf (stderr, "Success.\n");
6564 return TRUE;
6565
6566 report_error:
6567 if (TARGET_DEBUG_ADDR)
6568 {
6569 fprintf (stderr, "Error: %s\n", reason);
6570 debug_rtx (reason_rtx);
6571 }
6572 return FALSE;
6573 }
6574 \f
6575 /* Return a unique alias set for the GOT. */
6576
6577 static HOST_WIDE_INT
6578 ix86_GOT_alias_set (void)
6579 {
6580 static HOST_WIDE_INT set = -1;
6581 if (set == -1)
6582 set = new_alias_set ();
6583 return set;
6584 }
6585
6586 /* Return a legitimate reference for ORIG (an address) using the
6587 register REG. If REG is 0, a new pseudo is generated.
6588
6589 There are two types of references that must be handled:
6590
6591 1. Global data references must load the address from the GOT, via
6592 the PIC reg. An insn is emitted to do this load, and the reg is
6593 returned.
6594
6595 2. Static data references, constant pool addresses, and code labels
6596 compute the address as an offset from the GOT, whose base is in
6597 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6598 differentiate them from global data objects. The returned
6599 address is the PIC reg + an unspec constant.
6600
6601 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6602 reg also appears in the address. */
6603
6604 static rtx
6605 legitimize_pic_address (rtx orig, rtx reg)
6606 {
6607 rtx addr = orig;
6608 rtx new = orig;
6609 rtx base;
6610
6611 #if TARGET_MACHO
6612 if (TARGET_MACHO && !TARGET_64BIT)
6613 {
6614 if (reg == 0)
6615 reg = gen_reg_rtx (Pmode);
6616 /* Use the generic Mach-O PIC machinery. */
6617 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6618 }
6619 #endif
6620
6621 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6622 new = addr;
6623 else if (TARGET_64BIT
6624 && ix86_cmodel != CM_SMALL_PIC
6625 && local_symbolic_operand (addr, Pmode))
6626 {
6627 rtx tmpreg;
6628 /* This symbol may be referenced via a displacement from the PIC
6629 base address (@GOTOFF). */
6630
6631 if (reload_in_progress)
6632 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6633 if (GET_CODE (addr) == CONST)
6634 addr = XEXP (addr, 0);
6635 if (GET_CODE (addr) == PLUS)
6636 {
6637 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6638 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6639 }
6640 else
6641 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6642 new = gen_rtx_CONST (Pmode, new);
6643 if (!reg)
6644 tmpreg = gen_reg_rtx (Pmode);
6645 else
6646 tmpreg = reg;
6647 emit_move_insn (tmpreg, new);
6648
6649 if (reg != 0)
6650 {
6651 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6652 tmpreg, 1, OPTAB_DIRECT);
6653 new = reg;
6654 }
6655 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6656 }
6657 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6658 {
6659 /* This symbol may be referenced via a displacement from the PIC
6660 base address (@GOTOFF). */
6661
6662 if (reload_in_progress)
6663 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6664 if (GET_CODE (addr) == CONST)
6665 addr = XEXP (addr, 0);
6666 if (GET_CODE (addr) == PLUS)
6667 {
6668 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6669 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6670 }
6671 else
6672 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6673 new = gen_rtx_CONST (Pmode, new);
6674 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6675
6676 if (reg != 0)
6677 {
6678 emit_move_insn (reg, new);
6679 new = reg;
6680 }
6681 }
6682 else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
6683 {
6684 if (TARGET_64BIT)
6685 {
6686 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6687 new = gen_rtx_CONST (Pmode, new);
6688 new = gen_const_mem (Pmode, new);
6689 set_mem_alias_set (new, ix86_GOT_alias_set ());
6690
6691 if (reg == 0)
6692 reg = gen_reg_rtx (Pmode);
6693 /* Use directly gen_movsi, otherwise the address is loaded
6694 into register for CSE. We don't want to CSE this addresses,
6695 instead we CSE addresses from the GOT table, so skip this. */
6696 emit_insn (gen_movsi (reg, new));
6697 new = reg;
6698 }
6699 else
6700 {
6701 /* This symbol must be referenced via a load from the
6702 Global Offset Table (@GOT). */
6703
6704 if (reload_in_progress)
6705 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6706 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6707 new = gen_rtx_CONST (Pmode, new);
6708 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6709 new = gen_const_mem (Pmode, new);
6710 set_mem_alias_set (new, ix86_GOT_alias_set ());
6711
6712 if (reg == 0)
6713 reg = gen_reg_rtx (Pmode);
6714 emit_move_insn (reg, new);
6715 new = reg;
6716 }
6717 }
6718 else
6719 {
6720 if (GET_CODE (addr) == CONST_INT
6721 && !x86_64_immediate_operand (addr, VOIDmode))
6722 {
6723 if (reg)
6724 {
6725 emit_move_insn (reg, addr);
6726 new = reg;
6727 }
6728 else
6729 new = force_reg (Pmode, addr);
6730 }
6731 else if (GET_CODE (addr) == CONST)
6732 {
6733 addr = XEXP (addr, 0);
6734
6735 /* We must match stuff we generate before. Assume the only
6736 unspecs that can get here are ours. Not that we could do
6737 anything with them anyway.... */
6738 if (GET_CODE (addr) == UNSPEC
6739 || (GET_CODE (addr) == PLUS
6740 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6741 return orig;
6742 gcc_assert (GET_CODE (addr) == PLUS);
6743 }
6744 if (GET_CODE (addr) == PLUS)
6745 {
6746 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6747
6748 /* Check first to see if this is a constant offset from a @GOTOFF
6749 symbol reference. */
6750 if (local_symbolic_operand (op0, Pmode)
6751 && GET_CODE (op1) == CONST_INT)
6752 {
6753 if (!TARGET_64BIT)
6754 {
6755 if (reload_in_progress)
6756 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6757 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6758 UNSPEC_GOTOFF);
6759 new = gen_rtx_PLUS (Pmode, new, op1);
6760 new = gen_rtx_CONST (Pmode, new);
6761 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6762
6763 if (reg != 0)
6764 {
6765 emit_move_insn (reg, new);
6766 new = reg;
6767 }
6768 }
6769 else
6770 {
6771 if (INTVAL (op1) < -16*1024*1024
6772 || INTVAL (op1) >= 16*1024*1024)
6773 {
6774 if (!x86_64_immediate_operand (op1, Pmode))
6775 op1 = force_reg (Pmode, op1);
6776 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
6777 }
6778 }
6779 }
6780 else
6781 {
6782 base = legitimize_pic_address (XEXP (addr, 0), reg);
6783 new = legitimize_pic_address (XEXP (addr, 1),
6784 base == reg ? NULL_RTX : reg);
6785
6786 if (GET_CODE (new) == CONST_INT)
6787 new = plus_constant (base, INTVAL (new));
6788 else
6789 {
6790 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6791 {
6792 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6793 new = XEXP (new, 1);
6794 }
6795 new = gen_rtx_PLUS (Pmode, base, new);
6796 }
6797 }
6798 }
6799 }
6800 return new;
6801 }
6802 \f
6803 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6804
6805 static rtx
6806 get_thread_pointer (int to_reg)
6807 {
6808 rtx tp, reg, insn;
6809
6810 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6811 if (!to_reg)
6812 return tp;
6813
6814 reg = gen_reg_rtx (Pmode);
6815 insn = gen_rtx_SET (VOIDmode, reg, tp);
6816 insn = emit_insn (insn);
6817
6818 return reg;
6819 }
6820
6821 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6822 false if we expect this to be used for a memory address and true if
6823 we expect to load the address into a register. */
6824
6825 static rtx
6826 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6827 {
6828 rtx dest, base, off, pic, tp;
6829 int type;
6830
6831 switch (model)
6832 {
6833 case TLS_MODEL_GLOBAL_DYNAMIC:
6834 dest = gen_reg_rtx (Pmode);
6835 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6836
6837 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6838 {
6839 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6840
6841 start_sequence ();
6842 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6843 insns = get_insns ();
6844 end_sequence ();
6845
6846 emit_libcall_block (insns, dest, rax, x);
6847 }
6848 else if (TARGET_64BIT && TARGET_GNU2_TLS)
6849 emit_insn (gen_tls_global_dynamic_64 (dest, x));
6850 else
6851 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6852
6853 if (TARGET_GNU2_TLS)
6854 {
6855 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
6856
6857 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6858 }
6859 break;
6860
6861 case TLS_MODEL_LOCAL_DYNAMIC:
6862 base = gen_reg_rtx (Pmode);
6863 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6864
6865 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6866 {
6867 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6868
6869 start_sequence ();
6870 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6871 insns = get_insns ();
6872 end_sequence ();
6873
6874 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6875 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6876 emit_libcall_block (insns, base, rax, note);
6877 }
6878 else if (TARGET_64BIT && TARGET_GNU2_TLS)
6879 emit_insn (gen_tls_local_dynamic_base_64 (base));
6880 else
6881 emit_insn (gen_tls_local_dynamic_base_32 (base));
6882
6883 if (TARGET_GNU2_TLS)
6884 {
6885 rtx x = ix86_tls_module_base ();
6886
6887 set_unique_reg_note (get_last_insn (), REG_EQUIV,
6888 gen_rtx_MINUS (Pmode, x, tp));
6889 }
6890
6891 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6892 off = gen_rtx_CONST (Pmode, off);
6893
6894 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
6895
6896 if (TARGET_GNU2_TLS)
6897 {
6898 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
6899
6900 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6901 }
6902
6903 break;
6904
6905 case TLS_MODEL_INITIAL_EXEC:
6906 if (TARGET_64BIT)
6907 {
6908 pic = NULL;
6909 type = UNSPEC_GOTNTPOFF;
6910 }
6911 else if (flag_pic)
6912 {
6913 if (reload_in_progress)
6914 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6915 pic = pic_offset_table_rtx;
6916 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6917 }
6918 else if (!TARGET_ANY_GNU_TLS)
6919 {
6920 pic = gen_reg_rtx (Pmode);
6921 emit_insn (gen_set_got (pic));
6922 type = UNSPEC_GOTTPOFF;
6923 }
6924 else
6925 {
6926 pic = NULL;
6927 type = UNSPEC_INDNTPOFF;
6928 }
6929
6930 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6931 off = gen_rtx_CONST (Pmode, off);
6932 if (pic)
6933 off = gen_rtx_PLUS (Pmode, pic, off);
6934 off = gen_const_mem (Pmode, off);
6935 set_mem_alias_set (off, ix86_GOT_alias_set ());
6936
6937 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6938 {
6939 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6940 off = force_reg (Pmode, off);
6941 return gen_rtx_PLUS (Pmode, base, off);
6942 }
6943 else
6944 {
6945 base = get_thread_pointer (true);
6946 dest = gen_reg_rtx (Pmode);
6947 emit_insn (gen_subsi3 (dest, base, off));
6948 }
6949 break;
6950
6951 case TLS_MODEL_LOCAL_EXEC:
6952 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6953 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6954 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6955 off = gen_rtx_CONST (Pmode, off);
6956
6957 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6958 {
6959 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6960 return gen_rtx_PLUS (Pmode, base, off);
6961 }
6962 else
6963 {
6964 base = get_thread_pointer (true);
6965 dest = gen_reg_rtx (Pmode);
6966 emit_insn (gen_subsi3 (dest, base, off));
6967 }
6968 break;
6969
6970 default:
6971 gcc_unreachable ();
6972 }
6973
6974 return dest;
6975 }
6976
6977 /* Try machine-dependent ways of modifying an illegitimate address
6978 to be legitimate. If we find one, return the new, valid address.
6979 This macro is used in only one place: `memory_address' in explow.c.
6980
6981 OLDX is the address as it was before break_out_memory_refs was called.
6982 In some cases it is useful to look at this to decide what needs to be done.
6983
6984 MODE and WIN are passed so that this macro can use
6985 GO_IF_LEGITIMATE_ADDRESS.
6986
6987 It is always safe for this macro to do nothing. It exists to recognize
6988 opportunities to optimize the output.
6989
6990 For the 80386, we handle X+REG by loading X into a register R and
6991 using R+REG. R will go in a general reg and indexing will be used.
6992 However, if REG is a broken-out memory address or multiplication,
6993 nothing needs to be done because REG can certainly go in a general reg.
6994
6995 When -fpic is used, special handling is needed for symbolic references.
6996 See comments by legitimize_pic_address in i386.c for details. */
6997
6998 rtx
6999 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7000 {
7001 int changed = 0;
7002 unsigned log;
7003
7004 if (TARGET_DEBUG_ADDR)
7005 {
7006 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
7007 GET_MODE_NAME (mode));
7008 debug_rtx (x);
7009 }
7010
7011 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7012 if (log)
7013 return legitimize_tls_address (x, log, false);
7014 if (GET_CODE (x) == CONST
7015 && GET_CODE (XEXP (x, 0)) == PLUS
7016 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7017 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7018 {
7019 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7020 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7021 }
7022
7023 if (flag_pic && SYMBOLIC_CONST (x))
7024 return legitimize_pic_address (x, 0);
7025
7026 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7027 if (GET_CODE (x) == ASHIFT
7028 && GET_CODE (XEXP (x, 1)) == CONST_INT
7029 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7030 {
7031 changed = 1;
7032 log = INTVAL (XEXP (x, 1));
7033 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7034 GEN_INT (1 << log));
7035 }
7036
7037 if (GET_CODE (x) == PLUS)
7038 {
7039 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7040
7041 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7042 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7043 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7044 {
7045 changed = 1;
7046 log = INTVAL (XEXP (XEXP (x, 0), 1));
7047 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7048 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7049 GEN_INT (1 << log));
7050 }
7051
7052 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7053 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
7054 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7055 {
7056 changed = 1;
7057 log = INTVAL (XEXP (XEXP (x, 1), 1));
7058 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7059 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7060 GEN_INT (1 << log));
7061 }
7062
7063 /* Put multiply first if it isn't already. */
7064 if (GET_CODE (XEXP (x, 1)) == MULT)
7065 {
7066 rtx tmp = XEXP (x, 0);
7067 XEXP (x, 0) = XEXP (x, 1);
7068 XEXP (x, 1) = tmp;
7069 changed = 1;
7070 }
7071
7072 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7073 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7074 created by virtual register instantiation, register elimination, and
7075 similar optimizations. */
7076 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7077 {
7078 changed = 1;
7079 x = gen_rtx_PLUS (Pmode,
7080 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7081 XEXP (XEXP (x, 1), 0)),
7082 XEXP (XEXP (x, 1), 1));
7083 }
7084
7085 /* Canonicalize
7086 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7087 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7088 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7089 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7090 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7091 && CONSTANT_P (XEXP (x, 1)))
7092 {
7093 rtx constant;
7094 rtx other = NULL_RTX;
7095
7096 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7097 {
7098 constant = XEXP (x, 1);
7099 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7100 }
7101 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
7102 {
7103 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7104 other = XEXP (x, 1);
7105 }
7106 else
7107 constant = 0;
7108
7109 if (constant)
7110 {
7111 changed = 1;
7112 x = gen_rtx_PLUS (Pmode,
7113 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7114 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7115 plus_constant (other, INTVAL (constant)));
7116 }
7117 }
7118
7119 if (changed && legitimate_address_p (mode, x, FALSE))
7120 return x;
7121
7122 if (GET_CODE (XEXP (x, 0)) == MULT)
7123 {
7124 changed = 1;
7125 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7126 }
7127
7128 if (GET_CODE (XEXP (x, 1)) == MULT)
7129 {
7130 changed = 1;
7131 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7132 }
7133
7134 if (changed
7135 && GET_CODE (XEXP (x, 1)) == REG
7136 && GET_CODE (XEXP (x, 0)) == REG)
7137 return x;
7138
7139 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7140 {
7141 changed = 1;
7142 x = legitimize_pic_address (x, 0);
7143 }
7144
7145 if (changed && legitimate_address_p (mode, x, FALSE))
7146 return x;
7147
7148 if (GET_CODE (XEXP (x, 0)) == REG)
7149 {
7150 rtx temp = gen_reg_rtx (Pmode);
7151 rtx val = force_operand (XEXP (x, 1), temp);
7152 if (val != temp)
7153 emit_move_insn (temp, val);
7154
7155 XEXP (x, 1) = temp;
7156 return x;
7157 }
7158
7159 else if (GET_CODE (XEXP (x, 1)) == REG)
7160 {
7161 rtx temp = gen_reg_rtx (Pmode);
7162 rtx val = force_operand (XEXP (x, 0), temp);
7163 if (val != temp)
7164 emit_move_insn (temp, val);
7165
7166 XEXP (x, 0) = temp;
7167 return x;
7168 }
7169 }
7170
7171 return x;
7172 }
7173 \f
7174 /* Print an integer constant expression in assembler syntax. Addition
7175 and subtraction are the only arithmetic that may appear in these
7176 expressions. FILE is the stdio stream to write to, X is the rtx, and
7177 CODE is the operand print code from the output string. */
7178
7179 static void
7180 output_pic_addr_const (FILE *file, rtx x, int code)
7181 {
7182 char buf[256];
7183
7184 switch (GET_CODE (x))
7185 {
7186 case PC:
7187 gcc_assert (flag_pic);
7188 putc ('.', file);
7189 break;
7190
7191 case SYMBOL_REF:
7192 output_addr_const (file, x);
7193 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7194 fputs ("@PLT", file);
7195 break;
7196
7197 case LABEL_REF:
7198 x = XEXP (x, 0);
7199 /* FALLTHRU */
7200 case CODE_LABEL:
7201 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7202 assemble_name (asm_out_file, buf);
7203 break;
7204
7205 case CONST_INT:
7206 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7207 break;
7208
7209 case CONST:
7210 /* This used to output parentheses around the expression,
7211 but that does not work on the 386 (either ATT or BSD assembler). */
7212 output_pic_addr_const (file, XEXP (x, 0), code);
7213 break;
7214
7215 case CONST_DOUBLE:
7216 if (GET_MODE (x) == VOIDmode)
7217 {
7218 /* We can use %d if the number is <32 bits and positive. */
7219 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7220 fprintf (file, "0x%lx%08lx",
7221 (unsigned long) CONST_DOUBLE_HIGH (x),
7222 (unsigned long) CONST_DOUBLE_LOW (x));
7223 else
7224 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7225 }
7226 else
7227 /* We can't handle floating point constants;
7228 PRINT_OPERAND must handle them. */
7229 output_operand_lossage ("floating constant misused");
7230 break;
7231
7232 case PLUS:
7233 /* Some assemblers need integer constants to appear first. */
7234 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
7235 {
7236 output_pic_addr_const (file, XEXP (x, 0), code);
7237 putc ('+', file);
7238 output_pic_addr_const (file, XEXP (x, 1), code);
7239 }
7240 else
7241 {
7242 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
7243 output_pic_addr_const (file, XEXP (x, 1), code);
7244 putc ('+', file);
7245 output_pic_addr_const (file, XEXP (x, 0), code);
7246 }
7247 break;
7248
7249 case MINUS:
7250 if (!TARGET_MACHO)
7251 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7252 output_pic_addr_const (file, XEXP (x, 0), code);
7253 putc ('-', file);
7254 output_pic_addr_const (file, XEXP (x, 1), code);
7255 if (!TARGET_MACHO)
7256 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7257 break;
7258
7259 case UNSPEC:
7260 gcc_assert (XVECLEN (x, 0) == 1);
7261 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7262 switch (XINT (x, 1))
7263 {
7264 case UNSPEC_GOT:
7265 fputs ("@GOT", file);
7266 break;
7267 case UNSPEC_GOTOFF:
7268 fputs ("@GOTOFF", file);
7269 break;
7270 case UNSPEC_GOTPCREL:
7271 fputs ("@GOTPCREL(%rip)", file);
7272 break;
7273 case UNSPEC_GOTTPOFF:
7274 /* FIXME: This might be @TPOFF in Sun ld too. */
7275 fputs ("@GOTTPOFF", file);
7276 break;
7277 case UNSPEC_TPOFF:
7278 fputs ("@TPOFF", file);
7279 break;
7280 case UNSPEC_NTPOFF:
7281 if (TARGET_64BIT)
7282 fputs ("@TPOFF", file);
7283 else
7284 fputs ("@NTPOFF", file);
7285 break;
7286 case UNSPEC_DTPOFF:
7287 fputs ("@DTPOFF", file);
7288 break;
7289 case UNSPEC_GOTNTPOFF:
7290 if (TARGET_64BIT)
7291 fputs ("@GOTTPOFF(%rip)", file);
7292 else
7293 fputs ("@GOTNTPOFF", file);
7294 break;
7295 case UNSPEC_INDNTPOFF:
7296 fputs ("@INDNTPOFF", file);
7297 break;
7298 default:
7299 output_operand_lossage ("invalid UNSPEC as operand");
7300 break;
7301 }
7302 break;
7303
7304 default:
7305 output_operand_lossage ("invalid expression as operand");
7306 }
7307 }
7308
7309 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7310 We need to emit DTP-relative relocations. */
7311
7312 static void
7313 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7314 {
7315 fputs (ASM_LONG, file);
7316 output_addr_const (file, x);
7317 fputs ("@DTPOFF", file);
7318 switch (size)
7319 {
7320 case 4:
7321 break;
7322 case 8:
7323 fputs (", 0", file);
7324 break;
7325 default:
7326 gcc_unreachable ();
7327 }
7328 }
7329
7330 /* In the name of slightly smaller debug output, and to cater to
7331 general assembler lossage, recognize PIC+GOTOFF and turn it back
7332 into a direct symbol reference.
7333
7334 On Darwin, this is necessary to avoid a crash, because Darwin
7335 has a different PIC label for each routine but the DWARF debugging
7336 information is not associated with any particular routine, so it's
7337 necessary to remove references to the PIC label from RTL stored by
7338 the DWARF output code. */
7339
7340 static rtx
7341 ix86_delegitimize_address (rtx orig_x)
7342 {
7343 rtx x = orig_x;
7344 /* reg_addend is NULL or a multiple of some register. */
7345 rtx reg_addend = NULL_RTX;
7346 /* const_addend is NULL or a const_int. */
7347 rtx const_addend = NULL_RTX;
7348 /* This is the result, or NULL. */
7349 rtx result = NULL_RTX;
7350
7351 if (GET_CODE (x) == MEM)
7352 x = XEXP (x, 0);
7353
7354 if (TARGET_64BIT)
7355 {
7356 if (GET_CODE (x) != CONST
7357 || GET_CODE (XEXP (x, 0)) != UNSPEC
7358 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7359 || GET_CODE (orig_x) != MEM)
7360 return orig_x;
7361 return XVECEXP (XEXP (x, 0), 0, 0);
7362 }
7363
7364 if (GET_CODE (x) != PLUS
7365 || GET_CODE (XEXP (x, 1)) != CONST)
7366 return orig_x;
7367
7368 if (GET_CODE (XEXP (x, 0)) == REG
7369 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7370 /* %ebx + GOT/GOTOFF */
7371 ;
7372 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7373 {
7374 /* %ebx + %reg * scale + GOT/GOTOFF */
7375 reg_addend = XEXP (x, 0);
7376 if (GET_CODE (XEXP (reg_addend, 0)) == REG
7377 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7378 reg_addend = XEXP (reg_addend, 1);
7379 else if (GET_CODE (XEXP (reg_addend, 1)) == REG
7380 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7381 reg_addend = XEXP (reg_addend, 0);
7382 else
7383 return orig_x;
7384 if (GET_CODE (reg_addend) != REG
7385 && GET_CODE (reg_addend) != MULT
7386 && GET_CODE (reg_addend) != ASHIFT)
7387 return orig_x;
7388 }
7389 else
7390 return orig_x;
7391
7392 x = XEXP (XEXP (x, 1), 0);
7393 if (GET_CODE (x) == PLUS
7394 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7395 {
7396 const_addend = XEXP (x, 1);
7397 x = XEXP (x, 0);
7398 }
7399
7400 if (GET_CODE (x) == UNSPEC
7401 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7402 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7403 result = XVECEXP (x, 0, 0);
7404
7405 if (TARGET_MACHO && darwin_local_data_pic (x)
7406 && GET_CODE (orig_x) != MEM)
7407 result = XEXP (x, 0);
7408
7409 if (! result)
7410 return orig_x;
7411
7412 if (const_addend)
7413 result = gen_rtx_PLUS (Pmode, result, const_addend);
7414 if (reg_addend)
7415 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7416 return result;
7417 }
7418 \f
7419 static void
7420 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7421 int fp, FILE *file)
7422 {
7423 const char *suffix;
7424
7425 if (mode == CCFPmode || mode == CCFPUmode)
7426 {
7427 enum rtx_code second_code, bypass_code;
7428 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7429 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7430 code = ix86_fp_compare_code_to_integer (code);
7431 mode = CCmode;
7432 }
7433 if (reverse)
7434 code = reverse_condition (code);
7435
7436 switch (code)
7437 {
7438 case EQ:
7439 suffix = "e";
7440 break;
7441 case NE:
7442 suffix = "ne";
7443 break;
7444 case GT:
7445 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7446 suffix = "g";
7447 break;
7448 case GTU:
7449 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7450 Those same assemblers have the same but opposite lossage on cmov. */
7451 gcc_assert (mode == CCmode);
7452 suffix = fp ? "nbe" : "a";
7453 break;
7454 case LT:
7455 switch (mode)
7456 {
7457 case CCNOmode:
7458 case CCGOCmode:
7459 suffix = "s";
7460 break;
7461
7462 case CCmode:
7463 case CCGCmode:
7464 suffix = "l";
7465 break;
7466
7467 default:
7468 gcc_unreachable ();
7469 }
7470 break;
7471 case LTU:
7472 gcc_assert (mode == CCmode);
7473 suffix = "b";
7474 break;
7475 case GE:
7476 switch (mode)
7477 {
7478 case CCNOmode:
7479 case CCGOCmode:
7480 suffix = "ns";
7481 break;
7482
7483 case CCmode:
7484 case CCGCmode:
7485 suffix = "ge";
7486 break;
7487
7488 default:
7489 gcc_unreachable ();
7490 }
7491 break;
7492 case GEU:
7493 /* ??? As above. */
7494 gcc_assert (mode == CCmode);
7495 suffix = fp ? "nb" : "ae";
7496 break;
7497 case LE:
7498 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7499 suffix = "le";
7500 break;
7501 case LEU:
7502 gcc_assert (mode == CCmode);
7503 suffix = "be";
7504 break;
7505 case UNORDERED:
7506 suffix = fp ? "u" : "p";
7507 break;
7508 case ORDERED:
7509 suffix = fp ? "nu" : "np";
7510 break;
7511 default:
7512 gcc_unreachable ();
7513 }
7514 fputs (suffix, file);
7515 }
7516
7517 /* Print the name of register X to FILE based on its machine mode and number.
7518 If CODE is 'w', pretend the mode is HImode.
7519 If CODE is 'b', pretend the mode is QImode.
7520 If CODE is 'k', pretend the mode is SImode.
7521 If CODE is 'q', pretend the mode is DImode.
7522 If CODE is 'h', pretend the reg is the 'high' byte register.
7523 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7524
7525 void
7526 print_reg (rtx x, int code, FILE *file)
7527 {
7528 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7529 && REGNO (x) != FRAME_POINTER_REGNUM
7530 && REGNO (x) != FLAGS_REG
7531 && REGNO (x) != FPSR_REG
7532 && REGNO (x) != FPCR_REG);
7533
7534 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7535 putc ('%', file);
7536
7537 if (code == 'w' || MMX_REG_P (x))
7538 code = 2;
7539 else if (code == 'b')
7540 code = 1;
7541 else if (code == 'k')
7542 code = 4;
7543 else if (code == 'q')
7544 code = 8;
7545 else if (code == 'y')
7546 code = 3;
7547 else if (code == 'h')
7548 code = 0;
7549 else
7550 code = GET_MODE_SIZE (GET_MODE (x));
7551
7552 /* Irritatingly, AMD extended registers use different naming convention
7553 from the normal registers. */
7554 if (REX_INT_REG_P (x))
7555 {
7556 gcc_assert (TARGET_64BIT);
7557 switch (code)
7558 {
7559 case 0:
7560 error ("extended registers have no high halves");
7561 break;
7562 case 1:
7563 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7564 break;
7565 case 2:
7566 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7567 break;
7568 case 4:
7569 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7570 break;
7571 case 8:
7572 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7573 break;
7574 default:
7575 error ("unsupported operand size for extended register");
7576 break;
7577 }
7578 return;
7579 }
7580 switch (code)
7581 {
7582 case 3:
7583 if (STACK_TOP_P (x))
7584 {
7585 fputs ("st(0)", file);
7586 break;
7587 }
7588 /* FALLTHRU */
7589 case 8:
7590 case 4:
7591 case 12:
7592 if (! ANY_FP_REG_P (x))
7593 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7594 /* FALLTHRU */
7595 case 16:
7596 case 2:
7597 normal:
7598 fputs (hi_reg_name[REGNO (x)], file);
7599 break;
7600 case 1:
7601 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7602 goto normal;
7603 fputs (qi_reg_name[REGNO (x)], file);
7604 break;
7605 case 0:
7606 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7607 goto normal;
7608 fputs (qi_high_reg_name[REGNO (x)], file);
7609 break;
7610 default:
7611 gcc_unreachable ();
7612 }
7613 }
7614
7615 /* Locate some local-dynamic symbol still in use by this function
7616 so that we can print its name in some tls_local_dynamic_base
7617 pattern. */
7618
7619 static const char *
7620 get_some_local_dynamic_name (void)
7621 {
7622 rtx insn;
7623
7624 if (cfun->machine->some_ld_name)
7625 return cfun->machine->some_ld_name;
7626
7627 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7628 if (INSN_P (insn)
7629 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7630 return cfun->machine->some_ld_name;
7631
7632 gcc_unreachable ();
7633 }
7634
7635 static int
7636 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7637 {
7638 rtx x = *px;
7639
7640 if (GET_CODE (x) == SYMBOL_REF
7641 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
7642 {
7643 cfun->machine->some_ld_name = XSTR (x, 0);
7644 return 1;
7645 }
7646
7647 return 0;
7648 }
7649
7650 /* Meaning of CODE:
7651 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7652 C -- print opcode suffix for set/cmov insn.
7653 c -- like C, but print reversed condition
7654 F,f -- likewise, but for floating-point.
7655 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7656 otherwise nothing
7657 R -- print the prefix for register names.
7658 z -- print the opcode suffix for the size of the current operand.
7659 * -- print a star (in certain assembler syntax)
7660 A -- print an absolute memory reference.
7661 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7662 s -- print a shift double count, followed by the assemblers argument
7663 delimiter.
7664 b -- print the QImode name of the register for the indicated operand.
7665 %b0 would print %al if operands[0] is reg 0.
7666 w -- likewise, print the HImode name of the register.
7667 k -- likewise, print the SImode name of the register.
7668 q -- likewise, print the DImode name of the register.
7669 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7670 y -- print "st(0)" instead of "st" as a register.
7671 D -- print condition for SSE cmp instruction.
7672 P -- if PIC, print an @PLT suffix.
7673 X -- don't print any sort of PIC '@' suffix for a symbol.
7674 & -- print some in-use local-dynamic symbol name.
7675 H -- print a memory address offset by 8; used for sse high-parts
7676 */
7677
7678 void
7679 print_operand (FILE *file, rtx x, int code)
7680 {
7681 if (code)
7682 {
7683 switch (code)
7684 {
7685 case '*':
7686 if (ASSEMBLER_DIALECT == ASM_ATT)
7687 putc ('*', file);
7688 return;
7689
7690 case '&':
7691 assemble_name (file, get_some_local_dynamic_name ());
7692 return;
7693
7694 case 'A':
7695 switch (ASSEMBLER_DIALECT)
7696 {
7697 case ASM_ATT:
7698 putc ('*', file);
7699 break;
7700
7701 case ASM_INTEL:
7702 /* Intel syntax. For absolute addresses, registers should not
7703 be surrounded by braces. */
7704 if (GET_CODE (x) != REG)
7705 {
7706 putc ('[', file);
7707 PRINT_OPERAND (file, x, 0);
7708 putc (']', file);
7709 return;
7710 }
7711 break;
7712
7713 default:
7714 gcc_unreachable ();
7715 }
7716
7717 PRINT_OPERAND (file, x, 0);
7718 return;
7719
7720
7721 case 'L':
7722 if (ASSEMBLER_DIALECT == ASM_ATT)
7723 putc ('l', file);
7724 return;
7725
7726 case 'W':
7727 if (ASSEMBLER_DIALECT == ASM_ATT)
7728 putc ('w', file);
7729 return;
7730
7731 case 'B':
7732 if (ASSEMBLER_DIALECT == ASM_ATT)
7733 putc ('b', file);
7734 return;
7735
7736 case 'Q':
7737 if (ASSEMBLER_DIALECT == ASM_ATT)
7738 putc ('l', file);
7739 return;
7740
7741 case 'S':
7742 if (ASSEMBLER_DIALECT == ASM_ATT)
7743 putc ('s', file);
7744 return;
7745
7746 case 'T':
7747 if (ASSEMBLER_DIALECT == ASM_ATT)
7748 putc ('t', file);
7749 return;
7750
7751 case 'z':
7752 /* 387 opcodes don't get size suffixes if the operands are
7753 registers. */
7754 if (STACK_REG_P (x))
7755 return;
7756
7757 /* Likewise if using Intel opcodes. */
7758 if (ASSEMBLER_DIALECT == ASM_INTEL)
7759 return;
7760
7761 /* This is the size of op from size of operand. */
7762 switch (GET_MODE_SIZE (GET_MODE (x)))
7763 {
7764 case 2:
7765 #ifdef HAVE_GAS_FILDS_FISTS
7766 putc ('s', file);
7767 #endif
7768 return;
7769
7770 case 4:
7771 if (GET_MODE (x) == SFmode)
7772 {
7773 putc ('s', file);
7774 return;
7775 }
7776 else
7777 putc ('l', file);
7778 return;
7779
7780 case 12:
7781 case 16:
7782 putc ('t', file);
7783 return;
7784
7785 case 8:
7786 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7787 {
7788 #ifdef GAS_MNEMONICS
7789 putc ('q', file);
7790 #else
7791 putc ('l', file);
7792 putc ('l', file);
7793 #endif
7794 }
7795 else
7796 putc ('l', file);
7797 return;
7798
7799 default:
7800 gcc_unreachable ();
7801 }
7802
7803 case 'b':
7804 case 'w':
7805 case 'k':
7806 case 'q':
7807 case 'h':
7808 case 'y':
7809 case 'X':
7810 case 'P':
7811 break;
7812
7813 case 's':
7814 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7815 {
7816 PRINT_OPERAND (file, x, 0);
7817 putc (',', file);
7818 }
7819 return;
7820
7821 case 'D':
7822 /* Little bit of braindamage here. The SSE compare instructions
7823 does use completely different names for the comparisons that the
7824 fp conditional moves. */
7825 switch (GET_CODE (x))
7826 {
7827 case EQ:
7828 case UNEQ:
7829 fputs ("eq", file);
7830 break;
7831 case LT:
7832 case UNLT:
7833 fputs ("lt", file);
7834 break;
7835 case LE:
7836 case UNLE:
7837 fputs ("le", file);
7838 break;
7839 case UNORDERED:
7840 fputs ("unord", file);
7841 break;
7842 case NE:
7843 case LTGT:
7844 fputs ("neq", file);
7845 break;
7846 case UNGE:
7847 case GE:
7848 fputs ("nlt", file);
7849 break;
7850 case UNGT:
7851 case GT:
7852 fputs ("nle", file);
7853 break;
7854 case ORDERED:
7855 fputs ("ord", file);
7856 break;
7857 default:
7858 gcc_unreachable ();
7859 }
7860 return;
7861 case 'O':
7862 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7863 if (ASSEMBLER_DIALECT == ASM_ATT)
7864 {
7865 switch (GET_MODE (x))
7866 {
7867 case HImode: putc ('w', file); break;
7868 case SImode:
7869 case SFmode: putc ('l', file); break;
7870 case DImode:
7871 case DFmode: putc ('q', file); break;
7872 default: gcc_unreachable ();
7873 }
7874 putc ('.', file);
7875 }
7876 #endif
7877 return;
7878 case 'C':
7879 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7880 return;
7881 case 'F':
7882 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7883 if (ASSEMBLER_DIALECT == ASM_ATT)
7884 putc ('.', file);
7885 #endif
7886 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7887 return;
7888
7889 /* Like above, but reverse condition */
7890 case 'c':
7891 /* Check to see if argument to %c is really a constant
7892 and not a condition code which needs to be reversed. */
7893 if (!COMPARISON_P (x))
7894 {
7895 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7896 return;
7897 }
7898 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7899 return;
7900 case 'f':
7901 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7902 if (ASSEMBLER_DIALECT == ASM_ATT)
7903 putc ('.', file);
7904 #endif
7905 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7906 return;
7907
7908 case 'H':
7909 /* It doesn't actually matter what mode we use here, as we're
7910 only going to use this for printing. */
7911 x = adjust_address_nv (x, DImode, 8);
7912 break;
7913
7914 case '+':
7915 {
7916 rtx x;
7917
7918 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7919 return;
7920
7921 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7922 if (x)
7923 {
7924 int pred_val = INTVAL (XEXP (x, 0));
7925
7926 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7927 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7928 {
7929 int taken = pred_val > REG_BR_PROB_BASE / 2;
7930 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7931
7932 /* Emit hints only in the case default branch prediction
7933 heuristics would fail. */
7934 if (taken != cputaken)
7935 {
7936 /* We use 3e (DS) prefix for taken branches and
7937 2e (CS) prefix for not taken branches. */
7938 if (taken)
7939 fputs ("ds ; ", file);
7940 else
7941 fputs ("cs ; ", file);
7942 }
7943 }
7944 }
7945 return;
7946 }
7947 default:
7948 output_operand_lossage ("invalid operand code '%c'", code);
7949 }
7950 }
7951
7952 if (GET_CODE (x) == REG)
7953 print_reg (x, code, file);
7954
7955 else if (GET_CODE (x) == MEM)
7956 {
7957 /* No `byte ptr' prefix for call instructions. */
7958 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7959 {
7960 const char * size;
7961 switch (GET_MODE_SIZE (GET_MODE (x)))
7962 {
7963 case 1: size = "BYTE"; break;
7964 case 2: size = "WORD"; break;
7965 case 4: size = "DWORD"; break;
7966 case 8: size = "QWORD"; break;
7967 case 12: size = "XWORD"; break;
7968 case 16: size = "XMMWORD"; break;
7969 default:
7970 gcc_unreachable ();
7971 }
7972
7973 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7974 if (code == 'b')
7975 size = "BYTE";
7976 else if (code == 'w')
7977 size = "WORD";
7978 else if (code == 'k')
7979 size = "DWORD";
7980
7981 fputs (size, file);
7982 fputs (" PTR ", file);
7983 }
7984
7985 x = XEXP (x, 0);
7986 /* Avoid (%rip) for call operands. */
7987 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7988 && GET_CODE (x) != CONST_INT)
7989 output_addr_const (file, x);
7990 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7991 output_operand_lossage ("invalid constraints for operand");
7992 else
7993 output_address (x);
7994 }
7995
7996 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7997 {
7998 REAL_VALUE_TYPE r;
7999 long l;
8000
8001 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8002 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8003
8004 if (ASSEMBLER_DIALECT == ASM_ATT)
8005 putc ('$', file);
8006 fprintf (file, "0x%08lx", l);
8007 }
8008
8009 /* These float cases don't actually occur as immediate operands. */
8010 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8011 {
8012 char dstr[30];
8013
8014 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8015 fprintf (file, "%s", dstr);
8016 }
8017
8018 else if (GET_CODE (x) == CONST_DOUBLE
8019 && GET_MODE (x) == XFmode)
8020 {
8021 char dstr[30];
8022
8023 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8024 fprintf (file, "%s", dstr);
8025 }
8026
8027 else
8028 {
8029 /* We have patterns that allow zero sets of memory, for instance.
8030 In 64-bit mode, we should probably support all 8-byte vectors,
8031 since we can in fact encode that into an immediate. */
8032 if (GET_CODE (x) == CONST_VECTOR)
8033 {
8034 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8035 x = const0_rtx;
8036 }
8037
8038 if (code != 'P')
8039 {
8040 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8041 {
8042 if (ASSEMBLER_DIALECT == ASM_ATT)
8043 putc ('$', file);
8044 }
8045 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8046 || GET_CODE (x) == LABEL_REF)
8047 {
8048 if (ASSEMBLER_DIALECT == ASM_ATT)
8049 putc ('$', file);
8050 else
8051 fputs ("OFFSET FLAT:", file);
8052 }
8053 }
8054 if (GET_CODE (x) == CONST_INT)
8055 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8056 else if (flag_pic)
8057 output_pic_addr_const (file, x, code);
8058 else
8059 output_addr_const (file, x);
8060 }
8061 }
8062 \f
8063 /* Print a memory operand whose address is ADDR. */
8064
8065 void
8066 print_operand_address (FILE *file, rtx addr)
8067 {
8068 struct ix86_address parts;
8069 rtx base, index, disp;
8070 int scale;
8071 int ok = ix86_decompose_address (addr, &parts);
8072
8073 gcc_assert (ok);
8074
8075 base = parts.base;
8076 index = parts.index;
8077 disp = parts.disp;
8078 scale = parts.scale;
8079
8080 switch (parts.seg)
8081 {
8082 case SEG_DEFAULT:
8083 break;
8084 case SEG_FS:
8085 case SEG_GS:
8086 if (USER_LABEL_PREFIX[0] == 0)
8087 putc ('%', file);
8088 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8089 break;
8090 default:
8091 gcc_unreachable ();
8092 }
8093
8094 if (!base && !index)
8095 {
8096 /* Displacement only requires special attention. */
8097
8098 if (GET_CODE (disp) == CONST_INT)
8099 {
8100 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8101 {
8102 if (USER_LABEL_PREFIX[0] == 0)
8103 putc ('%', file);
8104 fputs ("ds:", file);
8105 }
8106 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8107 }
8108 else if (flag_pic)
8109 output_pic_addr_const (file, disp, 0);
8110 else
8111 output_addr_const (file, disp);
8112
8113 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8114 if (TARGET_64BIT)
8115 {
8116 if (GET_CODE (disp) == CONST
8117 && GET_CODE (XEXP (disp, 0)) == PLUS
8118 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8119 disp = XEXP (XEXP (disp, 0), 0);
8120 if (GET_CODE (disp) == LABEL_REF
8121 || (GET_CODE (disp) == SYMBOL_REF
8122 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8123 fputs ("(%rip)", file);
8124 }
8125 }
8126 else
8127 {
8128 if (ASSEMBLER_DIALECT == ASM_ATT)
8129 {
8130 if (disp)
8131 {
8132 if (flag_pic)
8133 output_pic_addr_const (file, disp, 0);
8134 else if (GET_CODE (disp) == LABEL_REF)
8135 output_asm_label (disp);
8136 else
8137 output_addr_const (file, disp);
8138 }
8139
8140 putc ('(', file);
8141 if (base)
8142 print_reg (base, 0, file);
8143 if (index)
8144 {
8145 putc (',', file);
8146 print_reg (index, 0, file);
8147 if (scale != 1)
8148 fprintf (file, ",%d", scale);
8149 }
8150 putc (')', file);
8151 }
8152 else
8153 {
8154 rtx offset = NULL_RTX;
8155
8156 if (disp)
8157 {
8158 /* Pull out the offset of a symbol; print any symbol itself. */
8159 if (GET_CODE (disp) == CONST
8160 && GET_CODE (XEXP (disp, 0)) == PLUS
8161 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8162 {
8163 offset = XEXP (XEXP (disp, 0), 1);
8164 disp = gen_rtx_CONST (VOIDmode,
8165 XEXP (XEXP (disp, 0), 0));
8166 }
8167
8168 if (flag_pic)
8169 output_pic_addr_const (file, disp, 0);
8170 else if (GET_CODE (disp) == LABEL_REF)
8171 output_asm_label (disp);
8172 else if (GET_CODE (disp) == CONST_INT)
8173 offset = disp;
8174 else
8175 output_addr_const (file, disp);
8176 }
8177
8178 putc ('[', file);
8179 if (base)
8180 {
8181 print_reg (base, 0, file);
8182 if (offset)
8183 {
8184 if (INTVAL (offset) >= 0)
8185 putc ('+', file);
8186 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8187 }
8188 }
8189 else if (offset)
8190 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8191 else
8192 putc ('0', file);
8193
8194 if (index)
8195 {
8196 putc ('+', file);
8197 print_reg (index, 0, file);
8198 if (scale != 1)
8199 fprintf (file, "*%d", scale);
8200 }
8201 putc (']', file);
8202 }
8203 }
8204 }
8205
8206 bool
8207 output_addr_const_extra (FILE *file, rtx x)
8208 {
8209 rtx op;
8210
8211 if (GET_CODE (x) != UNSPEC)
8212 return false;
8213
8214 op = XVECEXP (x, 0, 0);
8215 switch (XINT (x, 1))
8216 {
8217 case UNSPEC_GOTTPOFF:
8218 output_addr_const (file, op);
8219 /* FIXME: This might be @TPOFF in Sun ld. */
8220 fputs ("@GOTTPOFF", file);
8221 break;
8222 case UNSPEC_TPOFF:
8223 output_addr_const (file, op);
8224 fputs ("@TPOFF", file);
8225 break;
8226 case UNSPEC_NTPOFF:
8227 output_addr_const (file, op);
8228 if (TARGET_64BIT)
8229 fputs ("@TPOFF", file);
8230 else
8231 fputs ("@NTPOFF", file);
8232 break;
8233 case UNSPEC_DTPOFF:
8234 output_addr_const (file, op);
8235 fputs ("@DTPOFF", file);
8236 break;
8237 case UNSPEC_GOTNTPOFF:
8238 output_addr_const (file, op);
8239 if (TARGET_64BIT)
8240 fputs ("@GOTTPOFF(%rip)", file);
8241 else
8242 fputs ("@GOTNTPOFF", file);
8243 break;
8244 case UNSPEC_INDNTPOFF:
8245 output_addr_const (file, op);
8246 fputs ("@INDNTPOFF", file);
8247 break;
8248
8249 default:
8250 return false;
8251 }
8252
8253 return true;
8254 }
8255 \f
8256 /* Split one or more DImode RTL references into pairs of SImode
8257 references. The RTL can be REG, offsettable MEM, integer constant, or
8258 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8259 split and "num" is its length. lo_half and hi_half are output arrays
8260 that parallel "operands". */
8261
8262 void
8263 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8264 {
8265 while (num--)
8266 {
8267 rtx op = operands[num];
8268
8269 /* simplify_subreg refuse to split volatile memory addresses,
8270 but we still have to handle it. */
8271 if (GET_CODE (op) == MEM)
8272 {
8273 lo_half[num] = adjust_address (op, SImode, 0);
8274 hi_half[num] = adjust_address (op, SImode, 4);
8275 }
8276 else
8277 {
8278 lo_half[num] = simplify_gen_subreg (SImode, op,
8279 GET_MODE (op) == VOIDmode
8280 ? DImode : GET_MODE (op), 0);
8281 hi_half[num] = simplify_gen_subreg (SImode, op,
8282 GET_MODE (op) == VOIDmode
8283 ? DImode : GET_MODE (op), 4);
8284 }
8285 }
8286 }
8287 /* Split one or more TImode RTL references into pairs of DImode
8288 references. The RTL can be REG, offsettable MEM, integer constant, or
8289 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8290 split and "num" is its length. lo_half and hi_half are output arrays
8291 that parallel "operands". */
8292
8293 void
8294 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8295 {
8296 while (num--)
8297 {
8298 rtx op = operands[num];
8299
8300 /* simplify_subreg refuse to split volatile memory addresses, but we
8301 still have to handle it. */
8302 if (GET_CODE (op) == MEM)
8303 {
8304 lo_half[num] = adjust_address (op, DImode, 0);
8305 hi_half[num] = adjust_address (op, DImode, 8);
8306 }
8307 else
8308 {
8309 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8310 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8311 }
8312 }
8313 }
8314 \f
8315 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8316 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8317 is the expression of the binary operation. The output may either be
8318 emitted here, or returned to the caller, like all output_* functions.
8319
8320 There is no guarantee that the operands are the same mode, as they
8321 might be within FLOAT or FLOAT_EXTEND expressions. */
8322
8323 #ifndef SYSV386_COMPAT
8324 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8325 wants to fix the assemblers because that causes incompatibility
8326 with gcc. No-one wants to fix gcc because that causes
8327 incompatibility with assemblers... You can use the option of
8328 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8329 #define SYSV386_COMPAT 1
8330 #endif
8331
8332 const char *
8333 output_387_binary_op (rtx insn, rtx *operands)
8334 {
8335 static char buf[30];
8336 const char *p;
8337 const char *ssep;
8338 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8339
8340 #ifdef ENABLE_CHECKING
8341 /* Even if we do not want to check the inputs, this documents input
8342 constraints. Which helps in understanding the following code. */
8343 if (STACK_REG_P (operands[0])
8344 && ((REG_P (operands[1])
8345 && REGNO (operands[0]) == REGNO (operands[1])
8346 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
8347 || (REG_P (operands[2])
8348 && REGNO (operands[0]) == REGNO (operands[2])
8349 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
8350 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8351 ; /* ok */
8352 else
8353 gcc_assert (is_sse);
8354 #endif
8355
8356 switch (GET_CODE (operands[3]))
8357 {
8358 case PLUS:
8359 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8360 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8361 p = "fiadd";
8362 else
8363 p = "fadd";
8364 ssep = "add";
8365 break;
8366
8367 case MINUS:
8368 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8369 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8370 p = "fisub";
8371 else
8372 p = "fsub";
8373 ssep = "sub";
8374 break;
8375
8376 case MULT:
8377 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8378 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8379 p = "fimul";
8380 else
8381 p = "fmul";
8382 ssep = "mul";
8383 break;
8384
8385 case DIV:
8386 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8387 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8388 p = "fidiv";
8389 else
8390 p = "fdiv";
8391 ssep = "div";
8392 break;
8393
8394 default:
8395 gcc_unreachable ();
8396 }
8397
8398 if (is_sse)
8399 {
8400 strcpy (buf, ssep);
8401 if (GET_MODE (operands[0]) == SFmode)
8402 strcat (buf, "ss\t{%2, %0|%0, %2}");
8403 else
8404 strcat (buf, "sd\t{%2, %0|%0, %2}");
8405 return buf;
8406 }
8407 strcpy (buf, p);
8408
8409 switch (GET_CODE (operands[3]))
8410 {
8411 case MULT:
8412 case PLUS:
8413 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8414 {
8415 rtx temp = operands[2];
8416 operands[2] = operands[1];
8417 operands[1] = temp;
8418 }
8419
8420 /* know operands[0] == operands[1]. */
8421
8422 if (GET_CODE (operands[2]) == MEM)
8423 {
8424 p = "%z2\t%2";
8425 break;
8426 }
8427
8428 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8429 {
8430 if (STACK_TOP_P (operands[0]))
8431 /* How is it that we are storing to a dead operand[2]?
8432 Well, presumably operands[1] is dead too. We can't
8433 store the result to st(0) as st(0) gets popped on this
8434 instruction. Instead store to operands[2] (which I
8435 think has to be st(1)). st(1) will be popped later.
8436 gcc <= 2.8.1 didn't have this check and generated
8437 assembly code that the Unixware assembler rejected. */
8438 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8439 else
8440 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8441 break;
8442 }
8443
8444 if (STACK_TOP_P (operands[0]))
8445 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8446 else
8447 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8448 break;
8449
8450 case MINUS:
8451 case DIV:
8452 if (GET_CODE (operands[1]) == MEM)
8453 {
8454 p = "r%z1\t%1";
8455 break;
8456 }
8457
8458 if (GET_CODE (operands[2]) == MEM)
8459 {
8460 p = "%z2\t%2";
8461 break;
8462 }
8463
8464 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8465 {
8466 #if SYSV386_COMPAT
8467 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8468 derived assemblers, confusingly reverse the direction of
8469 the operation for fsub{r} and fdiv{r} when the
8470 destination register is not st(0). The Intel assembler
8471 doesn't have this brain damage. Read !SYSV386_COMPAT to
8472 figure out what the hardware really does. */
8473 if (STACK_TOP_P (operands[0]))
8474 p = "{p\t%0, %2|rp\t%2, %0}";
8475 else
8476 p = "{rp\t%2, %0|p\t%0, %2}";
8477 #else
8478 if (STACK_TOP_P (operands[0]))
8479 /* As above for fmul/fadd, we can't store to st(0). */
8480 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8481 else
8482 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8483 #endif
8484 break;
8485 }
8486
8487 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8488 {
8489 #if SYSV386_COMPAT
8490 if (STACK_TOP_P (operands[0]))
8491 p = "{rp\t%0, %1|p\t%1, %0}";
8492 else
8493 p = "{p\t%1, %0|rp\t%0, %1}";
8494 #else
8495 if (STACK_TOP_P (operands[0]))
8496 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8497 else
8498 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8499 #endif
8500 break;
8501 }
8502
8503 if (STACK_TOP_P (operands[0]))
8504 {
8505 if (STACK_TOP_P (operands[1]))
8506 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8507 else
8508 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8509 break;
8510 }
8511 else if (STACK_TOP_P (operands[1]))
8512 {
8513 #if SYSV386_COMPAT
8514 p = "{\t%1, %0|r\t%0, %1}";
8515 #else
8516 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8517 #endif
8518 }
8519 else
8520 {
8521 #if SYSV386_COMPAT
8522 p = "{r\t%2, %0|\t%0, %2}";
8523 #else
8524 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8525 #endif
8526 }
8527 break;
8528
8529 default:
8530 gcc_unreachable ();
8531 }
8532
8533 strcat (buf, p);
8534 return buf;
8535 }
8536
8537 /* Return needed mode for entity in optimize_mode_switching pass. */
8538
8539 int
8540 ix86_mode_needed (int entity, rtx insn)
8541 {
8542 enum attr_i387_cw mode;
8543
8544 /* The mode UNINITIALIZED is used to store control word after a
8545 function call or ASM pattern. The mode ANY specify that function
8546 has no requirements on the control word and make no changes in the
8547 bits we are interested in. */
8548
8549 if (CALL_P (insn)
8550 || (NONJUMP_INSN_P (insn)
8551 && (asm_noperands (PATTERN (insn)) >= 0
8552 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8553 return I387_CW_UNINITIALIZED;
8554
8555 if (recog_memoized (insn) < 0)
8556 return I387_CW_ANY;
8557
8558 mode = get_attr_i387_cw (insn);
8559
8560 switch (entity)
8561 {
8562 case I387_TRUNC:
8563 if (mode == I387_CW_TRUNC)
8564 return mode;
8565 break;
8566
8567 case I387_FLOOR:
8568 if (mode == I387_CW_FLOOR)
8569 return mode;
8570 break;
8571
8572 case I387_CEIL:
8573 if (mode == I387_CW_CEIL)
8574 return mode;
8575 break;
8576
8577 case I387_MASK_PM:
8578 if (mode == I387_CW_MASK_PM)
8579 return mode;
8580 break;
8581
8582 default:
8583 gcc_unreachable ();
8584 }
8585
8586 return I387_CW_ANY;
8587 }
8588
8589 /* Output code to initialize control word copies used by trunc?f?i and
8590 rounding patterns. CURRENT_MODE is set to current control word,
8591 while NEW_MODE is set to new control word. */
8592
8593 void
8594 emit_i387_cw_initialization (int mode)
8595 {
8596 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8597 rtx new_mode;
8598
8599 int slot;
8600
8601 rtx reg = gen_reg_rtx (HImode);
8602
8603 emit_insn (gen_x86_fnstcw_1 (stored_mode));
8604 emit_move_insn (reg, copy_rtx (stored_mode));
8605
8606 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
8607 {
8608 switch (mode)
8609 {
8610 case I387_CW_TRUNC:
8611 /* round toward zero (truncate) */
8612 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8613 slot = SLOT_CW_TRUNC;
8614 break;
8615
8616 case I387_CW_FLOOR:
8617 /* round down toward -oo */
8618 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8619 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8620 slot = SLOT_CW_FLOOR;
8621 break;
8622
8623 case I387_CW_CEIL:
8624 /* round up toward +oo */
8625 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8626 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
8627 slot = SLOT_CW_CEIL;
8628 break;
8629
8630 case I387_CW_MASK_PM:
8631 /* mask precision exception for nearbyint() */
8632 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8633 slot = SLOT_CW_MASK_PM;
8634 break;
8635
8636 default:
8637 gcc_unreachable ();
8638 }
8639 }
8640 else
8641 {
8642 switch (mode)
8643 {
8644 case I387_CW_TRUNC:
8645 /* round toward zero (truncate) */
8646 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8647 slot = SLOT_CW_TRUNC;
8648 break;
8649
8650 case I387_CW_FLOOR:
8651 /* round down toward -oo */
8652 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
8653 slot = SLOT_CW_FLOOR;
8654 break;
8655
8656 case I387_CW_CEIL:
8657 /* round up toward +oo */
8658 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
8659 slot = SLOT_CW_CEIL;
8660 break;
8661
8662 case I387_CW_MASK_PM:
8663 /* mask precision exception for nearbyint() */
8664 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8665 slot = SLOT_CW_MASK_PM;
8666 break;
8667
8668 default:
8669 gcc_unreachable ();
8670 }
8671 }
8672
8673 gcc_assert (slot < MAX_386_STACK_LOCALS);
8674
8675 new_mode = assign_386_stack_local (HImode, slot);
8676 emit_move_insn (new_mode, reg);
8677 }
8678
8679 /* Output code for INSN to convert a float to a signed int. OPERANDS
8680 are the insn operands. The output may be [HSD]Imode and the input
8681 operand may be [SDX]Fmode. */
8682
8683 const char *
8684 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
8685 {
8686 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8687 int dimode_p = GET_MODE (operands[0]) == DImode;
8688 int round_mode = get_attr_i387_cw (insn);
8689
8690 /* Jump through a hoop or two for DImode, since the hardware has no
8691 non-popping instruction. We used to do this a different way, but
8692 that was somewhat fragile and broke with post-reload splitters. */
8693 if ((dimode_p || fisttp) && !stack_top_dies)
8694 output_asm_insn ("fld\t%y1", operands);
8695
8696 gcc_assert (STACK_TOP_P (operands[1]));
8697 gcc_assert (GET_CODE (operands[0]) == MEM);
8698
8699 if (fisttp)
8700 output_asm_insn ("fisttp%z0\t%0", operands);
8701 else
8702 {
8703 if (round_mode != I387_CW_ANY)
8704 output_asm_insn ("fldcw\t%3", operands);
8705 if (stack_top_dies || dimode_p)
8706 output_asm_insn ("fistp%z0\t%0", operands);
8707 else
8708 output_asm_insn ("fist%z0\t%0", operands);
8709 if (round_mode != I387_CW_ANY)
8710 output_asm_insn ("fldcw\t%2", operands);
8711 }
8712
8713 return "";
8714 }
8715
8716 /* Output code for x87 ffreep insn. The OPNO argument, which may only
8717 have the values zero or one, indicates the ffreep insn's operand
8718 from the OPERANDS array. */
8719
8720 static const char *
8721 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
8722 {
8723 if (TARGET_USE_FFREEP)
8724 #if HAVE_AS_IX86_FFREEP
8725 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
8726 #else
8727 {
8728 static char retval[] = ".word\t0xc_df";
8729 int regno = REGNO (operands[opno]);
8730
8731 gcc_assert (FP_REGNO_P (regno));
8732
8733 retval[9] = '0' + (regno - FIRST_STACK_REG);
8734 return retval;
8735 }
8736 #endif
8737
8738 return opno ? "fstp\t%y1" : "fstp\t%y0";
8739 }
8740
8741
8742 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8743 should be used. UNORDERED_P is true when fucom should be used. */
8744
8745 const char *
8746 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8747 {
8748 int stack_top_dies;
8749 rtx cmp_op0, cmp_op1;
8750 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
8751
8752 if (eflags_p)
8753 {
8754 cmp_op0 = operands[0];
8755 cmp_op1 = operands[1];
8756 }
8757 else
8758 {
8759 cmp_op0 = operands[1];
8760 cmp_op1 = operands[2];
8761 }
8762
8763 if (is_sse)
8764 {
8765 if (GET_MODE (operands[0]) == SFmode)
8766 if (unordered_p)
8767 return "ucomiss\t{%1, %0|%0, %1}";
8768 else
8769 return "comiss\t{%1, %0|%0, %1}";
8770 else
8771 if (unordered_p)
8772 return "ucomisd\t{%1, %0|%0, %1}";
8773 else
8774 return "comisd\t{%1, %0|%0, %1}";
8775 }
8776
8777 gcc_assert (STACK_TOP_P (cmp_op0));
8778
8779 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8780
8781 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
8782 {
8783 if (stack_top_dies)
8784 {
8785 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
8786 return output_387_ffreep (operands, 1);
8787 }
8788 else
8789 return "ftst\n\tfnstsw\t%0";
8790 }
8791
8792 if (STACK_REG_P (cmp_op1)
8793 && stack_top_dies
8794 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8795 && REGNO (cmp_op1) != FIRST_STACK_REG)
8796 {
8797 /* If both the top of the 387 stack dies, and the other operand
8798 is also a stack register that dies, then this must be a
8799 `fcompp' float compare */
8800
8801 if (eflags_p)
8802 {
8803 /* There is no double popping fcomi variant. Fortunately,
8804 eflags is immune from the fstp's cc clobbering. */
8805 if (unordered_p)
8806 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8807 else
8808 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8809 return output_387_ffreep (operands, 0);
8810 }
8811 else
8812 {
8813 if (unordered_p)
8814 return "fucompp\n\tfnstsw\t%0";
8815 else
8816 return "fcompp\n\tfnstsw\t%0";
8817 }
8818 }
8819 else
8820 {
8821 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8822
8823 static const char * const alt[16] =
8824 {
8825 "fcom%z2\t%y2\n\tfnstsw\t%0",
8826 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8827 "fucom%z2\t%y2\n\tfnstsw\t%0",
8828 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8829
8830 "ficom%z2\t%y2\n\tfnstsw\t%0",
8831 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8832 NULL,
8833 NULL,
8834
8835 "fcomi\t{%y1, %0|%0, %y1}",
8836 "fcomip\t{%y1, %0|%0, %y1}",
8837 "fucomi\t{%y1, %0|%0, %y1}",
8838 "fucomip\t{%y1, %0|%0, %y1}",
8839
8840 NULL,
8841 NULL,
8842 NULL,
8843 NULL
8844 };
8845
8846 int mask;
8847 const char *ret;
8848
8849 mask = eflags_p << 3;
8850 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
8851 mask |= unordered_p << 1;
8852 mask |= stack_top_dies;
8853
8854 gcc_assert (mask < 16);
8855 ret = alt[mask];
8856 gcc_assert (ret);
8857
8858 return ret;
8859 }
8860 }
8861
8862 void
8863 ix86_output_addr_vec_elt (FILE *file, int value)
8864 {
8865 const char *directive = ASM_LONG;
8866
8867 #ifdef ASM_QUAD
8868 if (TARGET_64BIT)
8869 directive = ASM_QUAD;
8870 #else
8871 gcc_assert (!TARGET_64BIT);
8872 #endif
8873
8874 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8875 }
8876
8877 void
8878 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8879 {
8880 if (TARGET_64BIT)
8881 fprintf (file, "%s%s%d-%s%d\n",
8882 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8883 else if (HAVE_AS_GOTOFF_IN_DATA)
8884 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8885 #if TARGET_MACHO
8886 else if (TARGET_MACHO)
8887 {
8888 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8889 machopic_output_function_base_name (file);
8890 fprintf(file, "\n");
8891 }
8892 #endif
8893 else
8894 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8895 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8896 }
8897 \f
8898 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8899 for the target. */
8900
8901 void
8902 ix86_expand_clear (rtx dest)
8903 {
8904 rtx tmp;
8905
8906 /* We play register width games, which are only valid after reload. */
8907 gcc_assert (reload_completed);
8908
8909 /* Avoid HImode and its attendant prefix byte. */
8910 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8911 dest = gen_rtx_REG (SImode, REGNO (dest));
8912
8913 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8914
8915 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8916 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8917 {
8918 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8919 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8920 }
8921
8922 emit_insn (tmp);
8923 }
8924
8925 /* X is an unchanging MEM. If it is a constant pool reference, return
8926 the constant pool rtx, else NULL. */
8927
8928 rtx
8929 maybe_get_pool_constant (rtx x)
8930 {
8931 x = ix86_delegitimize_address (XEXP (x, 0));
8932
8933 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8934 return get_pool_constant (x);
8935
8936 return NULL_RTX;
8937 }
8938
8939 void
8940 ix86_expand_move (enum machine_mode mode, rtx operands[])
8941 {
8942 int strict = (reload_in_progress || reload_completed);
8943 rtx op0, op1;
8944 enum tls_model model;
8945
8946 op0 = operands[0];
8947 op1 = operands[1];
8948
8949 if (GET_CODE (op1) == SYMBOL_REF)
8950 {
8951 model = SYMBOL_REF_TLS_MODEL (op1);
8952 if (model)
8953 {
8954 op1 = legitimize_tls_address (op1, model, true);
8955 op1 = force_operand (op1, op0);
8956 if (op1 == op0)
8957 return;
8958 }
8959 }
8960 else if (GET_CODE (op1) == CONST
8961 && GET_CODE (XEXP (op1, 0)) == PLUS
8962 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
8963 {
8964 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
8965 if (model)
8966 {
8967 rtx addend = XEXP (XEXP (op1, 0), 1);
8968 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
8969 op1 = force_operand (op1, NULL);
8970 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
8971 op0, 1, OPTAB_DIRECT);
8972 if (op1 == op0)
8973 return;
8974 }
8975 }
8976
8977 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8978 {
8979 if (TARGET_MACHO && !TARGET_64BIT)
8980 {
8981 #if TARGET_MACHO
8982 if (MACHOPIC_PURE)
8983 {
8984 rtx temp = ((reload_in_progress
8985 || ((op0 && GET_CODE (op0) == REG)
8986 && mode == Pmode))
8987 ? op0 : gen_reg_rtx (Pmode));
8988 op1 = machopic_indirect_data_reference (op1, temp);
8989 op1 = machopic_legitimize_pic_address (op1, mode,
8990 temp == op1 ? 0 : temp);
8991 }
8992 else if (MACHOPIC_INDIRECT)
8993 op1 = machopic_indirect_data_reference (op1, 0);
8994 if (op0 == op1)
8995 return;
8996 #endif
8997 }
8998 else
8999 {
9000 if (GET_CODE (op0) == MEM)
9001 op1 = force_reg (Pmode, op1);
9002 else
9003 op1 = legitimize_address (op1, op1, Pmode);
9004 }
9005 }
9006 else
9007 {
9008 if (GET_CODE (op0) == MEM
9009 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9010 || !push_operand (op0, mode))
9011 && GET_CODE (op1) == MEM)
9012 op1 = force_reg (mode, op1);
9013
9014 if (push_operand (op0, mode)
9015 && ! general_no_elim_operand (op1, mode))
9016 op1 = copy_to_mode_reg (mode, op1);
9017
9018 /* Force large constants in 64bit compilation into register
9019 to get them CSEed. */
9020 if (TARGET_64BIT && mode == DImode
9021 && immediate_operand (op1, mode)
9022 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9023 && !register_operand (op0, mode)
9024 && optimize && !reload_completed && !reload_in_progress)
9025 op1 = copy_to_mode_reg (mode, op1);
9026
9027 if (FLOAT_MODE_P (mode))
9028 {
9029 /* If we are loading a floating point constant to a register,
9030 force the value to memory now, since we'll get better code
9031 out the back end. */
9032
9033 if (strict)
9034 ;
9035 else if (GET_CODE (op1) == CONST_DOUBLE)
9036 {
9037 op1 = validize_mem (force_const_mem (mode, op1));
9038 if (!register_operand (op0, mode))
9039 {
9040 rtx temp = gen_reg_rtx (mode);
9041 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9042 emit_move_insn (op0, temp);
9043 return;
9044 }
9045 }
9046 }
9047 }
9048
9049 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9050 }
9051
9052 void
9053 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9054 {
9055 rtx op0 = operands[0], op1 = operands[1];
9056
9057 /* Force constants other than zero into memory. We do not know how
9058 the instructions used to build constants modify the upper 64 bits
9059 of the register, once we have that information we may be able
9060 to handle some of them more efficiently. */
9061 if ((reload_in_progress | reload_completed) == 0
9062 && register_operand (op0, mode)
9063 && CONSTANT_P (op1)
9064 && standard_sse_constant_p (op1) <= 0)
9065 op1 = validize_mem (force_const_mem (mode, op1));
9066
9067 /* Make operand1 a register if it isn't already. */
9068 if (!no_new_pseudos
9069 && !register_operand (op0, mode)
9070 && !register_operand (op1, mode))
9071 {
9072 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9073 return;
9074 }
9075
9076 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9077 }
9078
9079 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9080 straight to ix86_expand_vector_move. */
9081
9082 void
9083 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9084 {
9085 rtx op0, op1, m;
9086
9087 op0 = operands[0];
9088 op1 = operands[1];
9089
9090 if (MEM_P (op1))
9091 {
9092 /* If we're optimizing for size, movups is the smallest. */
9093 if (optimize_size)
9094 {
9095 op0 = gen_lowpart (V4SFmode, op0);
9096 op1 = gen_lowpart (V4SFmode, op1);
9097 emit_insn (gen_sse_movups (op0, op1));
9098 return;
9099 }
9100
9101 /* ??? If we have typed data, then it would appear that using
9102 movdqu is the only way to get unaligned data loaded with
9103 integer type. */
9104 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9105 {
9106 op0 = gen_lowpart (V16QImode, op0);
9107 op1 = gen_lowpart (V16QImode, op1);
9108 emit_insn (gen_sse2_movdqu (op0, op1));
9109 return;
9110 }
9111
9112 if (TARGET_SSE2 && mode == V2DFmode)
9113 {
9114 rtx zero;
9115
9116 /* When SSE registers are split into halves, we can avoid
9117 writing to the top half twice. */
9118 if (TARGET_SSE_SPLIT_REGS)
9119 {
9120 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9121 zero = op0;
9122 }
9123 else
9124 {
9125 /* ??? Not sure about the best option for the Intel chips.
9126 The following would seem to satisfy; the register is
9127 entirely cleared, breaking the dependency chain. We
9128 then store to the upper half, with a dependency depth
9129 of one. A rumor has it that Intel recommends two movsd
9130 followed by an unpacklpd, but this is unconfirmed. And
9131 given that the dependency depth of the unpacklpd would
9132 still be one, I'm not sure why this would be better. */
9133 zero = CONST0_RTX (V2DFmode);
9134 }
9135
9136 m = adjust_address (op1, DFmode, 0);
9137 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9138 m = adjust_address (op1, DFmode, 8);
9139 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9140 }
9141 else
9142 {
9143 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9144 emit_move_insn (op0, CONST0_RTX (mode));
9145 else
9146 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9147
9148 if (mode != V4SFmode)
9149 op0 = gen_lowpart (V4SFmode, op0);
9150 m = adjust_address (op1, V2SFmode, 0);
9151 emit_insn (gen_sse_loadlps (op0, op0, m));
9152 m = adjust_address (op1, V2SFmode, 8);
9153 emit_insn (gen_sse_loadhps (op0, op0, m));
9154 }
9155 }
9156 else if (MEM_P (op0))
9157 {
9158 /* If we're optimizing for size, movups is the smallest. */
9159 if (optimize_size)
9160 {
9161 op0 = gen_lowpart (V4SFmode, op0);
9162 op1 = gen_lowpart (V4SFmode, op1);
9163 emit_insn (gen_sse_movups (op0, op1));
9164 return;
9165 }
9166
9167 /* ??? Similar to above, only less clear because of quote
9168 typeless stores unquote. */
9169 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9170 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9171 {
9172 op0 = gen_lowpart (V16QImode, op0);
9173 op1 = gen_lowpart (V16QImode, op1);
9174 emit_insn (gen_sse2_movdqu (op0, op1));
9175 return;
9176 }
9177
9178 if (TARGET_SSE2 && mode == V2DFmode)
9179 {
9180 m = adjust_address (op0, DFmode, 0);
9181 emit_insn (gen_sse2_storelpd (m, op1));
9182 m = adjust_address (op0, DFmode, 8);
9183 emit_insn (gen_sse2_storehpd (m, op1));
9184 }
9185 else
9186 {
9187 if (mode != V4SFmode)
9188 op1 = gen_lowpart (V4SFmode, op1);
9189 m = adjust_address (op0, V2SFmode, 0);
9190 emit_insn (gen_sse_storelps (m, op1));
9191 m = adjust_address (op0, V2SFmode, 8);
9192 emit_insn (gen_sse_storehps (m, op1));
9193 }
9194 }
9195 else
9196 gcc_unreachable ();
9197 }
9198
9199 /* Expand a push in MODE. This is some mode for which we do not support
9200 proper push instructions, at least from the registers that we expect
9201 the value to live in. */
9202
9203 void
9204 ix86_expand_push (enum machine_mode mode, rtx x)
9205 {
9206 rtx tmp;
9207
9208 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9209 GEN_INT (-GET_MODE_SIZE (mode)),
9210 stack_pointer_rtx, 1, OPTAB_DIRECT);
9211 if (tmp != stack_pointer_rtx)
9212 emit_move_insn (stack_pointer_rtx, tmp);
9213
9214 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9215 emit_move_insn (tmp, x);
9216 }
9217
9218 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9219 destination to use for the operation. If different from the true
9220 destination in operands[0], a copy operation will be required. */
9221
9222 rtx
9223 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9224 rtx operands[])
9225 {
9226 int matching_memory;
9227 rtx src1, src2, dst;
9228
9229 dst = operands[0];
9230 src1 = operands[1];
9231 src2 = operands[2];
9232
9233 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
9234 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9235 && (rtx_equal_p (dst, src2)
9236 || immediate_operand (src1, mode)))
9237 {
9238 rtx temp = src1;
9239 src1 = src2;
9240 src2 = temp;
9241 }
9242
9243 /* If the destination is memory, and we do not have matching source
9244 operands, do things in registers. */
9245 matching_memory = 0;
9246 if (GET_CODE (dst) == MEM)
9247 {
9248 if (rtx_equal_p (dst, src1))
9249 matching_memory = 1;
9250 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9251 && rtx_equal_p (dst, src2))
9252 matching_memory = 2;
9253 else
9254 dst = gen_reg_rtx (mode);
9255 }
9256
9257 /* Both source operands cannot be in memory. */
9258 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
9259 {
9260 if (matching_memory != 2)
9261 src2 = force_reg (mode, src2);
9262 else
9263 src1 = force_reg (mode, src1);
9264 }
9265
9266 /* If the operation is not commutable, source 1 cannot be a constant
9267 or non-matching memory. */
9268 if ((CONSTANT_P (src1)
9269 || (!matching_memory && GET_CODE (src1) == MEM))
9270 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9271 src1 = force_reg (mode, src1);
9272
9273 src1 = operands[1] = src1;
9274 src2 = operands[2] = src2;
9275 return dst;
9276 }
9277
9278 /* Similarly, but assume that the destination has already been
9279 set up properly. */
9280
9281 void
9282 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9283 enum machine_mode mode, rtx operands[])
9284 {
9285 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9286 gcc_assert (dst == operands[0]);
9287 }
9288
9289 /* Attempt to expand a binary operator. Make the expansion closer to the
9290 actual machine, then just general_operand, which will allow 3 separate
9291 memory references (one output, two input) in a single insn. */
9292
9293 void
9294 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9295 rtx operands[])
9296 {
9297 rtx src1, src2, dst, op, clob;
9298
9299 dst = ix86_fixup_binary_operands (code, mode, operands);
9300 src1 = operands[1];
9301 src2 = operands[2];
9302
9303 /* Emit the instruction. */
9304
9305 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9306 if (reload_in_progress)
9307 {
9308 /* Reload doesn't know about the flags register, and doesn't know that
9309 it doesn't want to clobber it. We can only do this with PLUS. */
9310 gcc_assert (code == PLUS);
9311 emit_insn (op);
9312 }
9313 else
9314 {
9315 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9316 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9317 }
9318
9319 /* Fix up the destination if needed. */
9320 if (dst != operands[0])
9321 emit_move_insn (operands[0], dst);
9322 }
9323
9324 /* Return TRUE or FALSE depending on whether the binary operator meets the
9325 appropriate constraints. */
9326
9327 int
9328 ix86_binary_operator_ok (enum rtx_code code,
9329 enum machine_mode mode ATTRIBUTE_UNUSED,
9330 rtx operands[3])
9331 {
9332 /* Both source operands cannot be in memory. */
9333 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
9334 return 0;
9335 /* If the operation is not commutable, source 1 cannot be a constant. */
9336 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9337 return 0;
9338 /* If the destination is memory, we must have a matching source operand. */
9339 if (GET_CODE (operands[0]) == MEM
9340 && ! (rtx_equal_p (operands[0], operands[1])
9341 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9342 && rtx_equal_p (operands[0], operands[2]))))
9343 return 0;
9344 /* If the operation is not commutable and the source 1 is memory, we must
9345 have a matching destination. */
9346 if (GET_CODE (operands[1]) == MEM
9347 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
9348 && ! rtx_equal_p (operands[0], operands[1]))
9349 return 0;
9350 return 1;
9351 }
9352
9353 /* Attempt to expand a unary operator. Make the expansion closer to the
9354 actual machine, then just general_operand, which will allow 2 separate
9355 memory references (one output, one input) in a single insn. */
9356
9357 void
9358 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9359 rtx operands[])
9360 {
9361 int matching_memory;
9362 rtx src, dst, op, clob;
9363
9364 dst = operands[0];
9365 src = operands[1];
9366
9367 /* If the destination is memory, and we do not have matching source
9368 operands, do things in registers. */
9369 matching_memory = 0;
9370 if (MEM_P (dst))
9371 {
9372 if (rtx_equal_p (dst, src))
9373 matching_memory = 1;
9374 else
9375 dst = gen_reg_rtx (mode);
9376 }
9377
9378 /* When source operand is memory, destination must match. */
9379 if (MEM_P (src) && !matching_memory)
9380 src = force_reg (mode, src);
9381
9382 /* Emit the instruction. */
9383
9384 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9385 if (reload_in_progress || code == NOT)
9386 {
9387 /* Reload doesn't know about the flags register, and doesn't know that
9388 it doesn't want to clobber it. */
9389 gcc_assert (code == NOT);
9390 emit_insn (op);
9391 }
9392 else
9393 {
9394 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9395 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9396 }
9397
9398 /* Fix up the destination if needed. */
9399 if (dst != operands[0])
9400 emit_move_insn (operands[0], dst);
9401 }
9402
9403 /* Return TRUE or FALSE depending on whether the unary operator meets the
9404 appropriate constraints. */
9405
9406 int
9407 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9408 enum machine_mode mode ATTRIBUTE_UNUSED,
9409 rtx operands[2] ATTRIBUTE_UNUSED)
9410 {
9411 /* If one of operands is memory, source and destination must match. */
9412 if ((GET_CODE (operands[0]) == MEM
9413 || GET_CODE (operands[1]) == MEM)
9414 && ! rtx_equal_p (operands[0], operands[1]))
9415 return FALSE;
9416 return TRUE;
9417 }
9418
9419 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9420 Create a mask for the sign bit in MODE for an SSE register. If VECT is
9421 true, then replicate the mask for all elements of the vector register.
9422 If INVERT is true, then create a mask excluding the sign bit. */
9423
9424 rtx
9425 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
9426 {
9427 enum machine_mode vec_mode;
9428 HOST_WIDE_INT hi, lo;
9429 int shift = 63;
9430 rtvec v;
9431 rtx mask;
9432
9433 /* Find the sign bit, sign extended to 2*HWI. */
9434 if (mode == SFmode)
9435 lo = 0x80000000, hi = lo < 0;
9436 else if (HOST_BITS_PER_WIDE_INT >= 64)
9437 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
9438 else
9439 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
9440
9441 if (invert)
9442 lo = ~lo, hi = ~hi;
9443
9444 /* Force this value into the low part of a fp vector constant. */
9445 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
9446 mask = gen_lowpart (mode, mask);
9447
9448 if (mode == SFmode)
9449 {
9450 if (vect)
9451 v = gen_rtvec (4, mask, mask, mask, mask);
9452 else
9453 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
9454 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9455 vec_mode = V4SFmode;
9456 }
9457 else
9458 {
9459 if (vect)
9460 v = gen_rtvec (2, mask, mask);
9461 else
9462 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
9463 vec_mode = V2DFmode;
9464 }
9465
9466 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
9467 }
9468
9469 /* Generate code for floating point ABS or NEG. */
9470
9471 void
9472 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
9473 rtx operands[])
9474 {
9475 rtx mask, set, use, clob, dst, src;
9476 bool matching_memory;
9477 bool use_sse = false;
9478 bool vector_mode = VECTOR_MODE_P (mode);
9479 enum machine_mode elt_mode = mode;
9480
9481 if (vector_mode)
9482 {
9483 elt_mode = GET_MODE_INNER (mode);
9484 use_sse = true;
9485 }
9486 else if (TARGET_SSE_MATH)
9487 use_sse = SSE_FLOAT_MODE_P (mode);
9488
9489 /* NEG and ABS performed with SSE use bitwise mask operations.
9490 Create the appropriate mask now. */
9491 if (use_sse)
9492 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
9493 else
9494 mask = NULL_RTX;
9495
9496 dst = operands[0];
9497 src = operands[1];
9498
9499 /* If the destination is memory, and we don't have matching source
9500 operands or we're using the x87, do things in registers. */
9501 matching_memory = false;
9502 if (MEM_P (dst))
9503 {
9504 if (use_sse && rtx_equal_p (dst, src))
9505 matching_memory = true;
9506 else
9507 dst = gen_reg_rtx (mode);
9508 }
9509 if (MEM_P (src) && !matching_memory)
9510 src = force_reg (mode, src);
9511
9512 if (vector_mode)
9513 {
9514 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
9515 set = gen_rtx_SET (VOIDmode, dst, set);
9516 emit_insn (set);
9517 }
9518 else
9519 {
9520 set = gen_rtx_fmt_e (code, mode, src);
9521 set = gen_rtx_SET (VOIDmode, dst, set);
9522 if (mask)
9523 {
9524 use = gen_rtx_USE (VOIDmode, mask);
9525 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9526 emit_insn (gen_rtx_PARALLEL (VOIDmode,
9527 gen_rtvec (3, set, use, clob)));
9528 }
9529 else
9530 emit_insn (set);
9531 }
9532
9533 if (dst != operands[0])
9534 emit_move_insn (operands[0], dst);
9535 }
9536
9537 /* Expand a copysign operation. Special case operand 0 being a constant. */
9538
9539 void
9540 ix86_expand_copysign (rtx operands[])
9541 {
9542 enum machine_mode mode, vmode;
9543 rtx dest, op0, op1, mask, nmask;
9544
9545 dest = operands[0];
9546 op0 = operands[1];
9547 op1 = operands[2];
9548
9549 mode = GET_MODE (dest);
9550 vmode = mode == SFmode ? V4SFmode : V2DFmode;
9551
9552 if (GET_CODE (op0) == CONST_DOUBLE)
9553 {
9554 rtvec v;
9555
9556 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
9557 op0 = simplify_unary_operation (ABS, mode, op0, mode);
9558
9559 if (op0 == CONST0_RTX (mode))
9560 op0 = CONST0_RTX (vmode);
9561 else
9562 {
9563 if (mode == SFmode)
9564 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
9565 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9566 else
9567 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
9568 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
9569 }
9570
9571 mask = ix86_build_signbit_mask (mode, 0, 0);
9572
9573 if (mode == SFmode)
9574 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
9575 else
9576 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
9577 }
9578 else
9579 {
9580 nmask = ix86_build_signbit_mask (mode, 0, 1);
9581 mask = ix86_build_signbit_mask (mode, 0, 0);
9582
9583 if (mode == SFmode)
9584 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
9585 else
9586 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
9587 }
9588 }
9589
9590 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
9591 be a constant, and so has already been expanded into a vector constant. */
9592
9593 void
9594 ix86_split_copysign_const (rtx operands[])
9595 {
9596 enum machine_mode mode, vmode;
9597 rtx dest, op0, op1, mask, x;
9598
9599 dest = operands[0];
9600 op0 = operands[1];
9601 op1 = operands[2];
9602 mask = operands[3];
9603
9604 mode = GET_MODE (dest);
9605 vmode = GET_MODE (mask);
9606
9607 dest = simplify_gen_subreg (vmode, dest, mode, 0);
9608 x = gen_rtx_AND (vmode, dest, mask);
9609 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9610
9611 if (op0 != CONST0_RTX (vmode))
9612 {
9613 x = gen_rtx_IOR (vmode, dest, op0);
9614 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9615 }
9616 }
9617
9618 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
9619 so we have to do two masks. */
9620
9621 void
9622 ix86_split_copysign_var (rtx operands[])
9623 {
9624 enum machine_mode mode, vmode;
9625 rtx dest, scratch, op0, op1, mask, nmask, x;
9626
9627 dest = operands[0];
9628 scratch = operands[1];
9629 op0 = operands[2];
9630 op1 = operands[3];
9631 nmask = operands[4];
9632 mask = operands[5];
9633
9634 mode = GET_MODE (dest);
9635 vmode = GET_MODE (mask);
9636
9637 if (rtx_equal_p (op0, op1))
9638 {
9639 /* Shouldn't happen often (it's useless, obviously), but when it does
9640 we'd generate incorrect code if we continue below. */
9641 emit_move_insn (dest, op0);
9642 return;
9643 }
9644
9645 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
9646 {
9647 gcc_assert (REGNO (op1) == REGNO (scratch));
9648
9649 x = gen_rtx_AND (vmode, scratch, mask);
9650 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9651
9652 dest = mask;
9653 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9654 x = gen_rtx_NOT (vmode, dest);
9655 x = gen_rtx_AND (vmode, x, op0);
9656 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9657 }
9658 else
9659 {
9660 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
9661 {
9662 x = gen_rtx_AND (vmode, scratch, mask);
9663 }
9664 else /* alternative 2,4 */
9665 {
9666 gcc_assert (REGNO (mask) == REGNO (scratch));
9667 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
9668 x = gen_rtx_AND (vmode, scratch, op1);
9669 }
9670 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9671
9672 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
9673 {
9674 dest = simplify_gen_subreg (vmode, op0, mode, 0);
9675 x = gen_rtx_AND (vmode, dest, nmask);
9676 }
9677 else /* alternative 3,4 */
9678 {
9679 gcc_assert (REGNO (nmask) == REGNO (dest));
9680 dest = nmask;
9681 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9682 x = gen_rtx_AND (vmode, dest, op0);
9683 }
9684 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9685 }
9686
9687 x = gen_rtx_IOR (vmode, dest, scratch);
9688 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9689 }
9690
9691 /* Return TRUE or FALSE depending on whether the first SET in INSN
9692 has source and destination with matching CC modes, and that the
9693 CC mode is at least as constrained as REQ_MODE. */
9694
9695 int
9696 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
9697 {
9698 rtx set;
9699 enum machine_mode set_mode;
9700
9701 set = PATTERN (insn);
9702 if (GET_CODE (set) == PARALLEL)
9703 set = XVECEXP (set, 0, 0);
9704 gcc_assert (GET_CODE (set) == SET);
9705 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
9706
9707 set_mode = GET_MODE (SET_DEST (set));
9708 switch (set_mode)
9709 {
9710 case CCNOmode:
9711 if (req_mode != CCNOmode
9712 && (req_mode != CCmode
9713 || XEXP (SET_SRC (set), 1) != const0_rtx))
9714 return 0;
9715 break;
9716 case CCmode:
9717 if (req_mode == CCGCmode)
9718 return 0;
9719 /* FALLTHRU */
9720 case CCGCmode:
9721 if (req_mode == CCGOCmode || req_mode == CCNOmode)
9722 return 0;
9723 /* FALLTHRU */
9724 case CCGOCmode:
9725 if (req_mode == CCZmode)
9726 return 0;
9727 /* FALLTHRU */
9728 case CCZmode:
9729 break;
9730
9731 default:
9732 gcc_unreachable ();
9733 }
9734
9735 return (GET_MODE (SET_SRC (set)) == set_mode);
9736 }
9737
9738 /* Generate insn patterns to do an integer compare of OPERANDS. */
9739
9740 static rtx
9741 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
9742 {
9743 enum machine_mode cmpmode;
9744 rtx tmp, flags;
9745
9746 cmpmode = SELECT_CC_MODE (code, op0, op1);
9747 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
9748
9749 /* This is very simple, but making the interface the same as in the
9750 FP case makes the rest of the code easier. */
9751 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
9752 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
9753
9754 /* Return the test that should be put into the flags user, i.e.
9755 the bcc, scc, or cmov instruction. */
9756 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
9757 }
9758
9759 /* Figure out whether to use ordered or unordered fp comparisons.
9760 Return the appropriate mode to use. */
9761
9762 enum machine_mode
9763 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
9764 {
9765 /* ??? In order to make all comparisons reversible, we do all comparisons
9766 non-trapping when compiling for IEEE. Once gcc is able to distinguish
9767 all forms trapping and nontrapping comparisons, we can make inequality
9768 comparisons trapping again, since it results in better code when using
9769 FCOM based compares. */
9770 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
9771 }
9772
9773 enum machine_mode
9774 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9775 {
9776 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
9777 return ix86_fp_compare_mode (code);
9778 switch (code)
9779 {
9780 /* Only zero flag is needed. */
9781 case EQ: /* ZF=0 */
9782 case NE: /* ZF!=0 */
9783 return CCZmode;
9784 /* Codes needing carry flag. */
9785 case GEU: /* CF=0 */
9786 case GTU: /* CF=0 & ZF=0 */
9787 case LTU: /* CF=1 */
9788 case LEU: /* CF=1 | ZF=1 */
9789 return CCmode;
9790 /* Codes possibly doable only with sign flag when
9791 comparing against zero. */
9792 case GE: /* SF=OF or SF=0 */
9793 case LT: /* SF<>OF or SF=1 */
9794 if (op1 == const0_rtx)
9795 return CCGOCmode;
9796 else
9797 /* For other cases Carry flag is not required. */
9798 return CCGCmode;
9799 /* Codes doable only with sign flag when comparing
9800 against zero, but we miss jump instruction for it
9801 so we need to use relational tests against overflow
9802 that thus needs to be zero. */
9803 case GT: /* ZF=0 & SF=OF */
9804 case LE: /* ZF=1 | SF<>OF */
9805 if (op1 == const0_rtx)
9806 return CCNOmode;
9807 else
9808 return CCGCmode;
9809 /* strcmp pattern do (use flags) and combine may ask us for proper
9810 mode. */
9811 case USE:
9812 return CCmode;
9813 default:
9814 gcc_unreachable ();
9815 }
9816 }
9817
9818 /* Return the fixed registers used for condition codes. */
9819
9820 static bool
9821 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
9822 {
9823 *p1 = FLAGS_REG;
9824 *p2 = FPSR_REG;
9825 return true;
9826 }
9827
9828 /* If two condition code modes are compatible, return a condition code
9829 mode which is compatible with both. Otherwise, return
9830 VOIDmode. */
9831
9832 static enum machine_mode
9833 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
9834 {
9835 if (m1 == m2)
9836 return m1;
9837
9838 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
9839 return VOIDmode;
9840
9841 if ((m1 == CCGCmode && m2 == CCGOCmode)
9842 || (m1 == CCGOCmode && m2 == CCGCmode))
9843 return CCGCmode;
9844
9845 switch (m1)
9846 {
9847 default:
9848 gcc_unreachable ();
9849
9850 case CCmode:
9851 case CCGCmode:
9852 case CCGOCmode:
9853 case CCNOmode:
9854 case CCZmode:
9855 switch (m2)
9856 {
9857 default:
9858 return VOIDmode;
9859
9860 case CCmode:
9861 case CCGCmode:
9862 case CCGOCmode:
9863 case CCNOmode:
9864 case CCZmode:
9865 return CCmode;
9866 }
9867
9868 case CCFPmode:
9869 case CCFPUmode:
9870 /* These are only compatible with themselves, which we already
9871 checked above. */
9872 return VOIDmode;
9873 }
9874 }
9875
9876 /* Return true if we should use an FCOMI instruction for this fp comparison. */
9877
9878 int
9879 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
9880 {
9881 enum rtx_code swapped_code = swap_condition (code);
9882 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
9883 || (ix86_fp_comparison_cost (swapped_code)
9884 == ix86_fp_comparison_fcomi_cost (swapped_code)));
9885 }
9886
9887 /* Swap, force into registers, or otherwise massage the two operands
9888 to a fp comparison. The operands are updated in place; the new
9889 comparison code is returned. */
9890
9891 static enum rtx_code
9892 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
9893 {
9894 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
9895 rtx op0 = *pop0, op1 = *pop1;
9896 enum machine_mode op_mode = GET_MODE (op0);
9897 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
9898
9899 /* All of the unordered compare instructions only work on registers.
9900 The same is true of the fcomi compare instructions. The XFmode
9901 compare instructions require registers except when comparing
9902 against zero or when converting operand 1 from fixed point to
9903 floating point. */
9904
9905 if (!is_sse
9906 && (fpcmp_mode == CCFPUmode
9907 || (op_mode == XFmode
9908 && ! (standard_80387_constant_p (op0) == 1
9909 || standard_80387_constant_p (op1) == 1)
9910 && GET_CODE (op1) != FLOAT)
9911 || ix86_use_fcomi_compare (code)))
9912 {
9913 op0 = force_reg (op_mode, op0);
9914 op1 = force_reg (op_mode, op1);
9915 }
9916 else
9917 {
9918 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
9919 things around if they appear profitable, otherwise force op0
9920 into a register. */
9921
9922 if (standard_80387_constant_p (op0) == 0
9923 || (GET_CODE (op0) == MEM
9924 && ! (standard_80387_constant_p (op1) == 0
9925 || GET_CODE (op1) == MEM)))
9926 {
9927 rtx tmp;
9928 tmp = op0, op0 = op1, op1 = tmp;
9929 code = swap_condition (code);
9930 }
9931
9932 if (GET_CODE (op0) != REG)
9933 op0 = force_reg (op_mode, op0);
9934
9935 if (CONSTANT_P (op1))
9936 {
9937 int tmp = standard_80387_constant_p (op1);
9938 if (tmp == 0)
9939 op1 = validize_mem (force_const_mem (op_mode, op1));
9940 else if (tmp == 1)
9941 {
9942 if (TARGET_CMOVE)
9943 op1 = force_reg (op_mode, op1);
9944 }
9945 else
9946 op1 = force_reg (op_mode, op1);
9947 }
9948 }
9949
9950 /* Try to rearrange the comparison to make it cheaper. */
9951 if (ix86_fp_comparison_cost (code)
9952 > ix86_fp_comparison_cost (swap_condition (code))
9953 && (GET_CODE (op1) == REG || !no_new_pseudos))
9954 {
9955 rtx tmp;
9956 tmp = op0, op0 = op1, op1 = tmp;
9957 code = swap_condition (code);
9958 if (GET_CODE (op0) != REG)
9959 op0 = force_reg (op_mode, op0);
9960 }
9961
9962 *pop0 = op0;
9963 *pop1 = op1;
9964 return code;
9965 }
9966
9967 /* Convert comparison codes we use to represent FP comparison to integer
9968 code that will result in proper branch. Return UNKNOWN if no such code
9969 is available. */
9970
9971 enum rtx_code
9972 ix86_fp_compare_code_to_integer (enum rtx_code code)
9973 {
9974 switch (code)
9975 {
9976 case GT:
9977 return GTU;
9978 case GE:
9979 return GEU;
9980 case ORDERED:
9981 case UNORDERED:
9982 return code;
9983 break;
9984 case UNEQ:
9985 return EQ;
9986 break;
9987 case UNLT:
9988 return LTU;
9989 break;
9990 case UNLE:
9991 return LEU;
9992 break;
9993 case LTGT:
9994 return NE;
9995 break;
9996 default:
9997 return UNKNOWN;
9998 }
9999 }
10000
10001 /* Split comparison code CODE into comparisons we can do using branch
10002 instructions. BYPASS_CODE is comparison code for branch that will
10003 branch around FIRST_CODE and SECOND_CODE. If some of branches
10004 is not required, set value to UNKNOWN.
10005 We never require more than two branches. */
10006
10007 void
10008 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10009 enum rtx_code *first_code,
10010 enum rtx_code *second_code)
10011 {
10012 *first_code = code;
10013 *bypass_code = UNKNOWN;
10014 *second_code = UNKNOWN;
10015
10016 /* The fcomi comparison sets flags as follows:
10017
10018 cmp ZF PF CF
10019 > 0 0 0
10020 < 0 0 1
10021 = 1 0 0
10022 un 1 1 1 */
10023
10024 switch (code)
10025 {
10026 case GT: /* GTU - CF=0 & ZF=0 */
10027 case GE: /* GEU - CF=0 */
10028 case ORDERED: /* PF=0 */
10029 case UNORDERED: /* PF=1 */
10030 case UNEQ: /* EQ - ZF=1 */
10031 case UNLT: /* LTU - CF=1 */
10032 case UNLE: /* LEU - CF=1 | ZF=1 */
10033 case LTGT: /* EQ - ZF=0 */
10034 break;
10035 case LT: /* LTU - CF=1 - fails on unordered */
10036 *first_code = UNLT;
10037 *bypass_code = UNORDERED;
10038 break;
10039 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
10040 *first_code = UNLE;
10041 *bypass_code = UNORDERED;
10042 break;
10043 case EQ: /* EQ - ZF=1 - fails on unordered */
10044 *first_code = UNEQ;
10045 *bypass_code = UNORDERED;
10046 break;
10047 case NE: /* NE - ZF=0 - fails on unordered */
10048 *first_code = LTGT;
10049 *second_code = UNORDERED;
10050 break;
10051 case UNGE: /* GEU - CF=0 - fails on unordered */
10052 *first_code = GE;
10053 *second_code = UNORDERED;
10054 break;
10055 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
10056 *first_code = GT;
10057 *second_code = UNORDERED;
10058 break;
10059 default:
10060 gcc_unreachable ();
10061 }
10062 if (!TARGET_IEEE_FP)
10063 {
10064 *second_code = UNKNOWN;
10065 *bypass_code = UNKNOWN;
10066 }
10067 }
10068
10069 /* Return cost of comparison done fcom + arithmetics operations on AX.
10070 All following functions do use number of instructions as a cost metrics.
10071 In future this should be tweaked to compute bytes for optimize_size and
10072 take into account performance of various instructions on various CPUs. */
10073 static int
10074 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10075 {
10076 if (!TARGET_IEEE_FP)
10077 return 4;
10078 /* The cost of code output by ix86_expand_fp_compare. */
10079 switch (code)
10080 {
10081 case UNLE:
10082 case UNLT:
10083 case LTGT:
10084 case GT:
10085 case GE:
10086 case UNORDERED:
10087 case ORDERED:
10088 case UNEQ:
10089 return 4;
10090 break;
10091 case LT:
10092 case NE:
10093 case EQ:
10094 case UNGE:
10095 return 5;
10096 break;
10097 case LE:
10098 case UNGT:
10099 return 6;
10100 break;
10101 default:
10102 gcc_unreachable ();
10103 }
10104 }
10105
10106 /* Return cost of comparison done using fcomi operation.
10107 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10108 static int
10109 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10110 {
10111 enum rtx_code bypass_code, first_code, second_code;
10112 /* Return arbitrarily high cost when instruction is not supported - this
10113 prevents gcc from using it. */
10114 if (!TARGET_CMOVE)
10115 return 1024;
10116 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10117 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10118 }
10119
10120 /* Return cost of comparison done using sahf operation.
10121 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10122 static int
10123 ix86_fp_comparison_sahf_cost (enum rtx_code code)
10124 {
10125 enum rtx_code bypass_code, first_code, second_code;
10126 /* Return arbitrarily high cost when instruction is not preferred - this
10127 avoids gcc from using it. */
10128 if (!TARGET_USE_SAHF && !optimize_size)
10129 return 1024;
10130 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10131 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10132 }
10133
10134 /* Compute cost of the comparison done using any method.
10135 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10136 static int
10137 ix86_fp_comparison_cost (enum rtx_code code)
10138 {
10139 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10140 int min;
10141
10142 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10143 sahf_cost = ix86_fp_comparison_sahf_cost (code);
10144
10145 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10146 if (min > sahf_cost)
10147 min = sahf_cost;
10148 if (min > fcomi_cost)
10149 min = fcomi_cost;
10150 return min;
10151 }
10152
10153 /* Generate insn patterns to do a floating point compare of OPERANDS. */
10154
10155 static rtx
10156 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
10157 rtx *second_test, rtx *bypass_test)
10158 {
10159 enum machine_mode fpcmp_mode, intcmp_mode;
10160 rtx tmp, tmp2;
10161 int cost = ix86_fp_comparison_cost (code);
10162 enum rtx_code bypass_code, first_code, second_code;
10163
10164 fpcmp_mode = ix86_fp_compare_mode (code);
10165 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
10166
10167 if (second_test)
10168 *second_test = NULL_RTX;
10169 if (bypass_test)
10170 *bypass_test = NULL_RTX;
10171
10172 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10173
10174 /* Do fcomi/sahf based test when profitable. */
10175 if ((bypass_code == UNKNOWN || bypass_test)
10176 && (second_code == UNKNOWN || second_test)
10177 && ix86_fp_comparison_arithmetics_cost (code) > cost)
10178 {
10179 if (TARGET_CMOVE)
10180 {
10181 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10182 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
10183 tmp);
10184 emit_insn (tmp);
10185 }
10186 else
10187 {
10188 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10189 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10190 if (!scratch)
10191 scratch = gen_reg_rtx (HImode);
10192 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10193 emit_insn (gen_x86_sahf_1 (scratch));
10194 }
10195
10196 /* The FP codes work out to act like unsigned. */
10197 intcmp_mode = fpcmp_mode;
10198 code = first_code;
10199 if (bypass_code != UNKNOWN)
10200 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
10201 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10202 const0_rtx);
10203 if (second_code != UNKNOWN)
10204 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
10205 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10206 const0_rtx);
10207 }
10208 else
10209 {
10210 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10211 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10212 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10213 if (!scratch)
10214 scratch = gen_reg_rtx (HImode);
10215 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10216
10217 /* In the unordered case, we have to check C2 for NaN's, which
10218 doesn't happen to work out to anything nice combination-wise.
10219 So do some bit twiddling on the value we've got in AH to come
10220 up with an appropriate set of condition codes. */
10221
10222 intcmp_mode = CCNOmode;
10223 switch (code)
10224 {
10225 case GT:
10226 case UNGT:
10227 if (code == GT || !TARGET_IEEE_FP)
10228 {
10229 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10230 code = EQ;
10231 }
10232 else
10233 {
10234 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10235 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10236 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
10237 intcmp_mode = CCmode;
10238 code = GEU;
10239 }
10240 break;
10241 case LT:
10242 case UNLT:
10243 if (code == LT && TARGET_IEEE_FP)
10244 {
10245 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10246 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
10247 intcmp_mode = CCmode;
10248 code = EQ;
10249 }
10250 else
10251 {
10252 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
10253 code = NE;
10254 }
10255 break;
10256 case GE:
10257 case UNGE:
10258 if (code == GE || !TARGET_IEEE_FP)
10259 {
10260 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
10261 code = EQ;
10262 }
10263 else
10264 {
10265 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10266 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10267 GEN_INT (0x01)));
10268 code = NE;
10269 }
10270 break;
10271 case LE:
10272 case UNLE:
10273 if (code == LE && TARGET_IEEE_FP)
10274 {
10275 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10276 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10277 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10278 intcmp_mode = CCmode;
10279 code = LTU;
10280 }
10281 else
10282 {
10283 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10284 code = NE;
10285 }
10286 break;
10287 case EQ:
10288 case UNEQ:
10289 if (code == EQ && TARGET_IEEE_FP)
10290 {
10291 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10292 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10293 intcmp_mode = CCmode;
10294 code = EQ;
10295 }
10296 else
10297 {
10298 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10299 code = NE;
10300 break;
10301 }
10302 break;
10303 case NE:
10304 case LTGT:
10305 if (code == NE && TARGET_IEEE_FP)
10306 {
10307 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10308 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10309 GEN_INT (0x40)));
10310 code = NE;
10311 }
10312 else
10313 {
10314 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10315 code = EQ;
10316 }
10317 break;
10318
10319 case UNORDERED:
10320 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10321 code = NE;
10322 break;
10323 case ORDERED:
10324 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10325 code = EQ;
10326 break;
10327
10328 default:
10329 gcc_unreachable ();
10330 }
10331 }
10332
10333 /* Return the test that should be put into the flags user, i.e.
10334 the bcc, scc, or cmov instruction. */
10335 return gen_rtx_fmt_ee (code, VOIDmode,
10336 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10337 const0_rtx);
10338 }
10339
10340 rtx
10341 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
10342 {
10343 rtx op0, op1, ret;
10344 op0 = ix86_compare_op0;
10345 op1 = ix86_compare_op1;
10346
10347 if (second_test)
10348 *second_test = NULL_RTX;
10349 if (bypass_test)
10350 *bypass_test = NULL_RTX;
10351
10352 if (ix86_compare_emitted)
10353 {
10354 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
10355 ix86_compare_emitted = NULL_RTX;
10356 }
10357 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10358 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10359 second_test, bypass_test);
10360 else
10361 ret = ix86_expand_int_compare (code, op0, op1);
10362
10363 return ret;
10364 }
10365
10366 /* Return true if the CODE will result in nontrivial jump sequence. */
10367 bool
10368 ix86_fp_jump_nontrivial_p (enum rtx_code code)
10369 {
10370 enum rtx_code bypass_code, first_code, second_code;
10371 if (!TARGET_CMOVE)
10372 return true;
10373 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10374 return bypass_code != UNKNOWN || second_code != UNKNOWN;
10375 }
10376
10377 void
10378 ix86_expand_branch (enum rtx_code code, rtx label)
10379 {
10380 rtx tmp;
10381
10382 /* If we have emitted a compare insn, go straight to simple.
10383 ix86_expand_compare won't emit anything if ix86_compare_emitted
10384 is non NULL. */
10385 if (ix86_compare_emitted)
10386 goto simple;
10387
10388 switch (GET_MODE (ix86_compare_op0))
10389 {
10390 case QImode:
10391 case HImode:
10392 case SImode:
10393 simple:
10394 tmp = ix86_expand_compare (code, NULL, NULL);
10395 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10396 gen_rtx_LABEL_REF (VOIDmode, label),
10397 pc_rtx);
10398 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10399 return;
10400
10401 case SFmode:
10402 case DFmode:
10403 case XFmode:
10404 {
10405 rtvec vec;
10406 int use_fcomi;
10407 enum rtx_code bypass_code, first_code, second_code;
10408
10409 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
10410 &ix86_compare_op1);
10411
10412 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10413
10414 /* Check whether we will use the natural sequence with one jump. If
10415 so, we can expand jump early. Otherwise delay expansion by
10416 creating compound insn to not confuse optimizers. */
10417 if (bypass_code == UNKNOWN && second_code == UNKNOWN
10418 && TARGET_CMOVE)
10419 {
10420 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
10421 gen_rtx_LABEL_REF (VOIDmode, label),
10422 pc_rtx, NULL_RTX, NULL_RTX);
10423 }
10424 else
10425 {
10426 tmp = gen_rtx_fmt_ee (code, VOIDmode,
10427 ix86_compare_op0, ix86_compare_op1);
10428 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10429 gen_rtx_LABEL_REF (VOIDmode, label),
10430 pc_rtx);
10431 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
10432
10433 use_fcomi = ix86_use_fcomi_compare (code);
10434 vec = rtvec_alloc (3 + !use_fcomi);
10435 RTVEC_ELT (vec, 0) = tmp;
10436 RTVEC_ELT (vec, 1)
10437 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
10438 RTVEC_ELT (vec, 2)
10439 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
10440 if (! use_fcomi)
10441 RTVEC_ELT (vec, 3)
10442 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
10443
10444 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
10445 }
10446 return;
10447 }
10448
10449 case DImode:
10450 if (TARGET_64BIT)
10451 goto simple;
10452 case TImode:
10453 /* Expand DImode branch into multiple compare+branch. */
10454 {
10455 rtx lo[2], hi[2], label2;
10456 enum rtx_code code1, code2, code3;
10457 enum machine_mode submode;
10458
10459 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
10460 {
10461 tmp = ix86_compare_op0;
10462 ix86_compare_op0 = ix86_compare_op1;
10463 ix86_compare_op1 = tmp;
10464 code = swap_condition (code);
10465 }
10466 if (GET_MODE (ix86_compare_op0) == DImode)
10467 {
10468 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
10469 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
10470 submode = SImode;
10471 }
10472 else
10473 {
10474 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
10475 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
10476 submode = DImode;
10477 }
10478
10479 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10480 avoid two branches. This costs one extra insn, so disable when
10481 optimizing for size. */
10482
10483 if ((code == EQ || code == NE)
10484 && (!optimize_size
10485 || hi[1] == const0_rtx || lo[1] == const0_rtx))
10486 {
10487 rtx xor0, xor1;
10488
10489 xor1 = hi[0];
10490 if (hi[1] != const0_rtx)
10491 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
10492 NULL_RTX, 0, OPTAB_WIDEN);
10493
10494 xor0 = lo[0];
10495 if (lo[1] != const0_rtx)
10496 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
10497 NULL_RTX, 0, OPTAB_WIDEN);
10498
10499 tmp = expand_binop (submode, ior_optab, xor1, xor0,
10500 NULL_RTX, 0, OPTAB_WIDEN);
10501
10502 ix86_compare_op0 = tmp;
10503 ix86_compare_op1 = const0_rtx;
10504 ix86_expand_branch (code, label);
10505 return;
10506 }
10507
10508 /* Otherwise, if we are doing less-than or greater-or-equal-than,
10509 op1 is a constant and the low word is zero, then we can just
10510 examine the high word. */
10511
10512 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
10513 switch (code)
10514 {
10515 case LT: case LTU: case GE: case GEU:
10516 ix86_compare_op0 = hi[0];
10517 ix86_compare_op1 = hi[1];
10518 ix86_expand_branch (code, label);
10519 return;
10520 default:
10521 break;
10522 }
10523
10524 /* Otherwise, we need two or three jumps. */
10525
10526 label2 = gen_label_rtx ();
10527
10528 code1 = code;
10529 code2 = swap_condition (code);
10530 code3 = unsigned_condition (code);
10531
10532 switch (code)
10533 {
10534 case LT: case GT: case LTU: case GTU:
10535 break;
10536
10537 case LE: code1 = LT; code2 = GT; break;
10538 case GE: code1 = GT; code2 = LT; break;
10539 case LEU: code1 = LTU; code2 = GTU; break;
10540 case GEU: code1 = GTU; code2 = LTU; break;
10541
10542 case EQ: code1 = UNKNOWN; code2 = NE; break;
10543 case NE: code2 = UNKNOWN; break;
10544
10545 default:
10546 gcc_unreachable ();
10547 }
10548
10549 /*
10550 * a < b =>
10551 * if (hi(a) < hi(b)) goto true;
10552 * if (hi(a) > hi(b)) goto false;
10553 * if (lo(a) < lo(b)) goto true;
10554 * false:
10555 */
10556
10557 ix86_compare_op0 = hi[0];
10558 ix86_compare_op1 = hi[1];
10559
10560 if (code1 != UNKNOWN)
10561 ix86_expand_branch (code1, label);
10562 if (code2 != UNKNOWN)
10563 ix86_expand_branch (code2, label2);
10564
10565 ix86_compare_op0 = lo[0];
10566 ix86_compare_op1 = lo[1];
10567 ix86_expand_branch (code3, label);
10568
10569 if (code2 != UNKNOWN)
10570 emit_label (label2);
10571 return;
10572 }
10573
10574 default:
10575 gcc_unreachable ();
10576 }
10577 }
10578
10579 /* Split branch based on floating point condition. */
10580 void
10581 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
10582 rtx target1, rtx target2, rtx tmp, rtx pushed)
10583 {
10584 rtx second, bypass;
10585 rtx label = NULL_RTX;
10586 rtx condition;
10587 int bypass_probability = -1, second_probability = -1, probability = -1;
10588 rtx i;
10589
10590 if (target2 != pc_rtx)
10591 {
10592 rtx tmp = target2;
10593 code = reverse_condition_maybe_unordered (code);
10594 target2 = target1;
10595 target1 = tmp;
10596 }
10597
10598 condition = ix86_expand_fp_compare (code, op1, op2,
10599 tmp, &second, &bypass);
10600
10601 /* Remove pushed operand from stack. */
10602 if (pushed)
10603 ix86_free_from_memory (GET_MODE (pushed));
10604
10605 if (split_branch_probability >= 0)
10606 {
10607 /* Distribute the probabilities across the jumps.
10608 Assume the BYPASS and SECOND to be always test
10609 for UNORDERED. */
10610 probability = split_branch_probability;
10611
10612 /* Value of 1 is low enough to make no need for probability
10613 to be updated. Later we may run some experiments and see
10614 if unordered values are more frequent in practice. */
10615 if (bypass)
10616 bypass_probability = 1;
10617 if (second)
10618 second_probability = 1;
10619 }
10620 if (bypass != NULL_RTX)
10621 {
10622 label = gen_label_rtx ();
10623 i = emit_jump_insn (gen_rtx_SET
10624 (VOIDmode, pc_rtx,
10625 gen_rtx_IF_THEN_ELSE (VOIDmode,
10626 bypass,
10627 gen_rtx_LABEL_REF (VOIDmode,
10628 label),
10629 pc_rtx)));
10630 if (bypass_probability >= 0)
10631 REG_NOTES (i)
10632 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10633 GEN_INT (bypass_probability),
10634 REG_NOTES (i));
10635 }
10636 i = emit_jump_insn (gen_rtx_SET
10637 (VOIDmode, pc_rtx,
10638 gen_rtx_IF_THEN_ELSE (VOIDmode,
10639 condition, target1, target2)));
10640 if (probability >= 0)
10641 REG_NOTES (i)
10642 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10643 GEN_INT (probability),
10644 REG_NOTES (i));
10645 if (second != NULL_RTX)
10646 {
10647 i = emit_jump_insn (gen_rtx_SET
10648 (VOIDmode, pc_rtx,
10649 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
10650 target2)));
10651 if (second_probability >= 0)
10652 REG_NOTES (i)
10653 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10654 GEN_INT (second_probability),
10655 REG_NOTES (i));
10656 }
10657 if (label != NULL_RTX)
10658 emit_label (label);
10659 }
10660
10661 int
10662 ix86_expand_setcc (enum rtx_code code, rtx dest)
10663 {
10664 rtx ret, tmp, tmpreg, equiv;
10665 rtx second_test, bypass_test;
10666
10667 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
10668 return 0; /* FAIL */
10669
10670 gcc_assert (GET_MODE (dest) == QImode);
10671
10672 ret = ix86_expand_compare (code, &second_test, &bypass_test);
10673 PUT_MODE (ret, QImode);
10674
10675 tmp = dest;
10676 tmpreg = dest;
10677
10678 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
10679 if (bypass_test || second_test)
10680 {
10681 rtx test = second_test;
10682 int bypass = 0;
10683 rtx tmp2 = gen_reg_rtx (QImode);
10684 if (bypass_test)
10685 {
10686 gcc_assert (!second_test);
10687 test = bypass_test;
10688 bypass = 1;
10689 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
10690 }
10691 PUT_MODE (test, QImode);
10692 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
10693
10694 if (bypass)
10695 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
10696 else
10697 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
10698 }
10699
10700 /* Attach a REG_EQUAL note describing the comparison result. */
10701 if (ix86_compare_op0 && ix86_compare_op1)
10702 {
10703 equiv = simplify_gen_relational (code, QImode,
10704 GET_MODE (ix86_compare_op0),
10705 ix86_compare_op0, ix86_compare_op1);
10706 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
10707 }
10708
10709 return 1; /* DONE */
10710 }
10711
10712 /* Expand comparison setting or clearing carry flag. Return true when
10713 successful and set pop for the operation. */
10714 static bool
10715 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
10716 {
10717 enum machine_mode mode =
10718 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
10719
10720 /* Do not handle DImode compares that go through special path. Also we can't
10721 deal with FP compares yet. This is possible to add. */
10722 if (mode == (TARGET_64BIT ? TImode : DImode))
10723 return false;
10724 if (FLOAT_MODE_P (mode))
10725 {
10726 rtx second_test = NULL, bypass_test = NULL;
10727 rtx compare_op, compare_seq;
10728
10729 /* Shortcut: following common codes never translate into carry flag compares. */
10730 if (code == EQ || code == NE || code == UNEQ || code == LTGT
10731 || code == ORDERED || code == UNORDERED)
10732 return false;
10733
10734 /* These comparisons require zero flag; swap operands so they won't. */
10735 if ((code == GT || code == UNLE || code == LE || code == UNGT)
10736 && !TARGET_IEEE_FP)
10737 {
10738 rtx tmp = op0;
10739 op0 = op1;
10740 op1 = tmp;
10741 code = swap_condition (code);
10742 }
10743
10744 /* Try to expand the comparison and verify that we end up with carry flag
10745 based comparison. This is fails to be true only when we decide to expand
10746 comparison using arithmetic that is not too common scenario. */
10747 start_sequence ();
10748 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10749 &second_test, &bypass_test);
10750 compare_seq = get_insns ();
10751 end_sequence ();
10752
10753 if (second_test || bypass_test)
10754 return false;
10755 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10756 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10757 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
10758 else
10759 code = GET_CODE (compare_op);
10760 if (code != LTU && code != GEU)
10761 return false;
10762 emit_insn (compare_seq);
10763 *pop = compare_op;
10764 return true;
10765 }
10766 if (!INTEGRAL_MODE_P (mode))
10767 return false;
10768 switch (code)
10769 {
10770 case LTU:
10771 case GEU:
10772 break;
10773
10774 /* Convert a==0 into (unsigned)a<1. */
10775 case EQ:
10776 case NE:
10777 if (op1 != const0_rtx)
10778 return false;
10779 op1 = const1_rtx;
10780 code = (code == EQ ? LTU : GEU);
10781 break;
10782
10783 /* Convert a>b into b<a or a>=b-1. */
10784 case GTU:
10785 case LEU:
10786 if (GET_CODE (op1) == CONST_INT)
10787 {
10788 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
10789 /* Bail out on overflow. We still can swap operands but that
10790 would force loading of the constant into register. */
10791 if (op1 == const0_rtx
10792 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
10793 return false;
10794 code = (code == GTU ? GEU : LTU);
10795 }
10796 else
10797 {
10798 rtx tmp = op1;
10799 op1 = op0;
10800 op0 = tmp;
10801 code = (code == GTU ? LTU : GEU);
10802 }
10803 break;
10804
10805 /* Convert a>=0 into (unsigned)a<0x80000000. */
10806 case LT:
10807 case GE:
10808 if (mode == DImode || op1 != const0_rtx)
10809 return false;
10810 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10811 code = (code == LT ? GEU : LTU);
10812 break;
10813 case LE:
10814 case GT:
10815 if (mode == DImode || op1 != constm1_rtx)
10816 return false;
10817 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10818 code = (code == LE ? GEU : LTU);
10819 break;
10820
10821 default:
10822 return false;
10823 }
10824 /* Swapping operands may cause constant to appear as first operand. */
10825 if (!nonimmediate_operand (op0, VOIDmode))
10826 {
10827 if (no_new_pseudos)
10828 return false;
10829 op0 = force_reg (mode, op0);
10830 }
10831 ix86_compare_op0 = op0;
10832 ix86_compare_op1 = op1;
10833 *pop = ix86_expand_compare (code, NULL, NULL);
10834 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
10835 return true;
10836 }
10837
10838 int
10839 ix86_expand_int_movcc (rtx operands[])
10840 {
10841 enum rtx_code code = GET_CODE (operands[1]), compare_code;
10842 rtx compare_seq, compare_op;
10843 rtx second_test, bypass_test;
10844 enum machine_mode mode = GET_MODE (operands[0]);
10845 bool sign_bit_compare_p = false;;
10846
10847 start_sequence ();
10848 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10849 compare_seq = get_insns ();
10850 end_sequence ();
10851
10852 compare_code = GET_CODE (compare_op);
10853
10854 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
10855 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
10856 sign_bit_compare_p = true;
10857
10858 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
10859 HImode insns, we'd be swallowed in word prefix ops. */
10860
10861 if ((mode != HImode || TARGET_FAST_PREFIX)
10862 && (mode != (TARGET_64BIT ? TImode : DImode))
10863 && GET_CODE (operands[2]) == CONST_INT
10864 && GET_CODE (operands[3]) == CONST_INT)
10865 {
10866 rtx out = operands[0];
10867 HOST_WIDE_INT ct = INTVAL (operands[2]);
10868 HOST_WIDE_INT cf = INTVAL (operands[3]);
10869 HOST_WIDE_INT diff;
10870
10871 diff = ct - cf;
10872 /* Sign bit compares are better done using shifts than we do by using
10873 sbb. */
10874 if (sign_bit_compare_p
10875 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10876 ix86_compare_op1, &compare_op))
10877 {
10878 /* Detect overlap between destination and compare sources. */
10879 rtx tmp = out;
10880
10881 if (!sign_bit_compare_p)
10882 {
10883 bool fpcmp = false;
10884
10885 compare_code = GET_CODE (compare_op);
10886
10887 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10888 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10889 {
10890 fpcmp = true;
10891 compare_code = ix86_fp_compare_code_to_integer (compare_code);
10892 }
10893
10894 /* To simplify rest of code, restrict to the GEU case. */
10895 if (compare_code == LTU)
10896 {
10897 HOST_WIDE_INT tmp = ct;
10898 ct = cf;
10899 cf = tmp;
10900 compare_code = reverse_condition (compare_code);
10901 code = reverse_condition (code);
10902 }
10903 else
10904 {
10905 if (fpcmp)
10906 PUT_CODE (compare_op,
10907 reverse_condition_maybe_unordered
10908 (GET_CODE (compare_op)));
10909 else
10910 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10911 }
10912 diff = ct - cf;
10913
10914 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
10915 || reg_overlap_mentioned_p (out, ix86_compare_op1))
10916 tmp = gen_reg_rtx (mode);
10917
10918 if (mode == DImode)
10919 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
10920 else
10921 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
10922 }
10923 else
10924 {
10925 if (code == GT || code == GE)
10926 code = reverse_condition (code);
10927 else
10928 {
10929 HOST_WIDE_INT tmp = ct;
10930 ct = cf;
10931 cf = tmp;
10932 diff = ct - cf;
10933 }
10934 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
10935 ix86_compare_op1, VOIDmode, 0, -1);
10936 }
10937
10938 if (diff == 1)
10939 {
10940 /*
10941 * cmpl op0,op1
10942 * sbbl dest,dest
10943 * [addl dest, ct]
10944 *
10945 * Size 5 - 8.
10946 */
10947 if (ct)
10948 tmp = expand_simple_binop (mode, PLUS,
10949 tmp, GEN_INT (ct),
10950 copy_rtx (tmp), 1, OPTAB_DIRECT);
10951 }
10952 else if (cf == -1)
10953 {
10954 /*
10955 * cmpl op0,op1
10956 * sbbl dest,dest
10957 * orl $ct, dest
10958 *
10959 * Size 8.
10960 */
10961 tmp = expand_simple_binop (mode, IOR,
10962 tmp, GEN_INT (ct),
10963 copy_rtx (tmp), 1, OPTAB_DIRECT);
10964 }
10965 else if (diff == -1 && ct)
10966 {
10967 /*
10968 * cmpl op0,op1
10969 * sbbl dest,dest
10970 * notl dest
10971 * [addl dest, cf]
10972 *
10973 * Size 8 - 11.
10974 */
10975 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10976 if (cf)
10977 tmp = expand_simple_binop (mode, PLUS,
10978 copy_rtx (tmp), GEN_INT (cf),
10979 copy_rtx (tmp), 1, OPTAB_DIRECT);
10980 }
10981 else
10982 {
10983 /*
10984 * cmpl op0,op1
10985 * sbbl dest,dest
10986 * [notl dest]
10987 * andl cf - ct, dest
10988 * [addl dest, ct]
10989 *
10990 * Size 8 - 11.
10991 */
10992
10993 if (cf == 0)
10994 {
10995 cf = ct;
10996 ct = 0;
10997 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10998 }
10999
11000 tmp = expand_simple_binop (mode, AND,
11001 copy_rtx (tmp),
11002 gen_int_mode (cf - ct, mode),
11003 copy_rtx (tmp), 1, OPTAB_DIRECT);
11004 if (ct)
11005 tmp = expand_simple_binop (mode, PLUS,
11006 copy_rtx (tmp), GEN_INT (ct),
11007 copy_rtx (tmp), 1, OPTAB_DIRECT);
11008 }
11009
11010 if (!rtx_equal_p (tmp, out))
11011 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
11012
11013 return 1; /* DONE */
11014 }
11015
11016 if (diff < 0)
11017 {
11018 HOST_WIDE_INT tmp;
11019 tmp = ct, ct = cf, cf = tmp;
11020 diff = -diff;
11021 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11022 {
11023 /* We may be reversing unordered compare to normal compare, that
11024 is not valid in general (we may convert non-trapping condition
11025 to trapping one), however on i386 we currently emit all
11026 comparisons unordered. */
11027 compare_code = reverse_condition_maybe_unordered (compare_code);
11028 code = reverse_condition_maybe_unordered (code);
11029 }
11030 else
11031 {
11032 compare_code = reverse_condition (compare_code);
11033 code = reverse_condition (code);
11034 }
11035 }
11036
11037 compare_code = UNKNOWN;
11038 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
11039 && GET_CODE (ix86_compare_op1) == CONST_INT)
11040 {
11041 if (ix86_compare_op1 == const0_rtx
11042 && (code == LT || code == GE))
11043 compare_code = code;
11044 else if (ix86_compare_op1 == constm1_rtx)
11045 {
11046 if (code == LE)
11047 compare_code = LT;
11048 else if (code == GT)
11049 compare_code = GE;
11050 }
11051 }
11052
11053 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11054 if (compare_code != UNKNOWN
11055 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11056 && (cf == -1 || ct == -1))
11057 {
11058 /* If lea code below could be used, only optimize
11059 if it results in a 2 insn sequence. */
11060
11061 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11062 || diff == 3 || diff == 5 || diff == 9)
11063 || (compare_code == LT && ct == -1)
11064 || (compare_code == GE && cf == -1))
11065 {
11066 /*
11067 * notl op1 (if necessary)
11068 * sarl $31, op1
11069 * orl cf, op1
11070 */
11071 if (ct != -1)
11072 {
11073 cf = ct;
11074 ct = -1;
11075 code = reverse_condition (code);
11076 }
11077
11078 out = emit_store_flag (out, code, ix86_compare_op0,
11079 ix86_compare_op1, VOIDmode, 0, -1);
11080
11081 out = expand_simple_binop (mode, IOR,
11082 out, GEN_INT (cf),
11083 out, 1, OPTAB_DIRECT);
11084 if (out != operands[0])
11085 emit_move_insn (operands[0], out);
11086
11087 return 1; /* DONE */
11088 }
11089 }
11090
11091
11092 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11093 || diff == 3 || diff == 5 || diff == 9)
11094 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11095 && (mode != DImode
11096 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11097 {
11098 /*
11099 * xorl dest,dest
11100 * cmpl op1,op2
11101 * setcc dest
11102 * lea cf(dest*(ct-cf)),dest
11103 *
11104 * Size 14.
11105 *
11106 * This also catches the degenerate setcc-only case.
11107 */
11108
11109 rtx tmp;
11110 int nops;
11111
11112 out = emit_store_flag (out, code, ix86_compare_op0,
11113 ix86_compare_op1, VOIDmode, 0, 1);
11114
11115 nops = 0;
11116 /* On x86_64 the lea instruction operates on Pmode, so we need
11117 to get arithmetics done in proper mode to match. */
11118 if (diff == 1)
11119 tmp = copy_rtx (out);
11120 else
11121 {
11122 rtx out1;
11123 out1 = copy_rtx (out);
11124 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11125 nops++;
11126 if (diff & 1)
11127 {
11128 tmp = gen_rtx_PLUS (mode, tmp, out1);
11129 nops++;
11130 }
11131 }
11132 if (cf != 0)
11133 {
11134 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11135 nops++;
11136 }
11137 if (!rtx_equal_p (tmp, out))
11138 {
11139 if (nops == 1)
11140 out = force_operand (tmp, copy_rtx (out));
11141 else
11142 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
11143 }
11144 if (!rtx_equal_p (out, operands[0]))
11145 emit_move_insn (operands[0], copy_rtx (out));
11146
11147 return 1; /* DONE */
11148 }
11149
11150 /*
11151 * General case: Jumpful:
11152 * xorl dest,dest cmpl op1, op2
11153 * cmpl op1, op2 movl ct, dest
11154 * setcc dest jcc 1f
11155 * decl dest movl cf, dest
11156 * andl (cf-ct),dest 1:
11157 * addl ct,dest
11158 *
11159 * Size 20. Size 14.
11160 *
11161 * This is reasonably steep, but branch mispredict costs are
11162 * high on modern cpus, so consider failing only if optimizing
11163 * for space.
11164 */
11165
11166 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11167 && BRANCH_COST >= 2)
11168 {
11169 if (cf == 0)
11170 {
11171 cf = ct;
11172 ct = 0;
11173 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11174 /* We may be reversing unordered compare to normal compare,
11175 that is not valid in general (we may convert non-trapping
11176 condition to trapping one), however on i386 we currently
11177 emit all comparisons unordered. */
11178 code = reverse_condition_maybe_unordered (code);
11179 else
11180 {
11181 code = reverse_condition (code);
11182 if (compare_code != UNKNOWN)
11183 compare_code = reverse_condition (compare_code);
11184 }
11185 }
11186
11187 if (compare_code != UNKNOWN)
11188 {
11189 /* notl op1 (if needed)
11190 sarl $31, op1
11191 andl (cf-ct), op1
11192 addl ct, op1
11193
11194 For x < 0 (resp. x <= -1) there will be no notl,
11195 so if possible swap the constants to get rid of the
11196 complement.
11197 True/false will be -1/0 while code below (store flag
11198 followed by decrement) is 0/-1, so the constants need
11199 to be exchanged once more. */
11200
11201 if (compare_code == GE || !cf)
11202 {
11203 code = reverse_condition (code);
11204 compare_code = LT;
11205 }
11206 else
11207 {
11208 HOST_WIDE_INT tmp = cf;
11209 cf = ct;
11210 ct = tmp;
11211 }
11212
11213 out = emit_store_flag (out, code, ix86_compare_op0,
11214 ix86_compare_op1, VOIDmode, 0, -1);
11215 }
11216 else
11217 {
11218 out = emit_store_flag (out, code, ix86_compare_op0,
11219 ix86_compare_op1, VOIDmode, 0, 1);
11220
11221 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
11222 copy_rtx (out), 1, OPTAB_DIRECT);
11223 }
11224
11225 out = expand_simple_binop (mode, AND, copy_rtx (out),
11226 gen_int_mode (cf - ct, mode),
11227 copy_rtx (out), 1, OPTAB_DIRECT);
11228 if (ct)
11229 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
11230 copy_rtx (out), 1, OPTAB_DIRECT);
11231 if (!rtx_equal_p (out, operands[0]))
11232 emit_move_insn (operands[0], copy_rtx (out));
11233
11234 return 1; /* DONE */
11235 }
11236 }
11237
11238 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11239 {
11240 /* Try a few things more with specific constants and a variable. */
11241
11242 optab op;
11243 rtx var, orig_out, out, tmp;
11244
11245 if (BRANCH_COST <= 2)
11246 return 0; /* FAIL */
11247
11248 /* If one of the two operands is an interesting constant, load a
11249 constant with the above and mask it in with a logical operation. */
11250
11251 if (GET_CODE (operands[2]) == CONST_INT)
11252 {
11253 var = operands[3];
11254 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
11255 operands[3] = constm1_rtx, op = and_optab;
11256 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
11257 operands[3] = const0_rtx, op = ior_optab;
11258 else
11259 return 0; /* FAIL */
11260 }
11261 else if (GET_CODE (operands[3]) == CONST_INT)
11262 {
11263 var = operands[2];
11264 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
11265 operands[2] = constm1_rtx, op = and_optab;
11266 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
11267 operands[2] = const0_rtx, op = ior_optab;
11268 else
11269 return 0; /* FAIL */
11270 }
11271 else
11272 return 0; /* FAIL */
11273
11274 orig_out = operands[0];
11275 tmp = gen_reg_rtx (mode);
11276 operands[0] = tmp;
11277
11278 /* Recurse to get the constant loaded. */
11279 if (ix86_expand_int_movcc (operands) == 0)
11280 return 0; /* FAIL */
11281
11282 /* Mask in the interesting variable. */
11283 out = expand_binop (mode, op, var, tmp, orig_out, 0,
11284 OPTAB_WIDEN);
11285 if (!rtx_equal_p (out, orig_out))
11286 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
11287
11288 return 1; /* DONE */
11289 }
11290
11291 /*
11292 * For comparison with above,
11293 *
11294 * movl cf,dest
11295 * movl ct,tmp
11296 * cmpl op1,op2
11297 * cmovcc tmp,dest
11298 *
11299 * Size 15.
11300 */
11301
11302 if (! nonimmediate_operand (operands[2], mode))
11303 operands[2] = force_reg (mode, operands[2]);
11304 if (! nonimmediate_operand (operands[3], mode))
11305 operands[3] = force_reg (mode, operands[3]);
11306
11307 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11308 {
11309 rtx tmp = gen_reg_rtx (mode);
11310 emit_move_insn (tmp, operands[3]);
11311 operands[3] = tmp;
11312 }
11313 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11314 {
11315 rtx tmp = gen_reg_rtx (mode);
11316 emit_move_insn (tmp, operands[2]);
11317 operands[2] = tmp;
11318 }
11319
11320 if (! register_operand (operands[2], VOIDmode)
11321 && (mode == QImode
11322 || ! register_operand (operands[3], VOIDmode)))
11323 operands[2] = force_reg (mode, operands[2]);
11324
11325 if (mode == QImode
11326 && ! register_operand (operands[3], VOIDmode))
11327 operands[3] = force_reg (mode, operands[3]);
11328
11329 emit_insn (compare_seq);
11330 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11331 gen_rtx_IF_THEN_ELSE (mode,
11332 compare_op, operands[2],
11333 operands[3])));
11334 if (bypass_test)
11335 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11336 gen_rtx_IF_THEN_ELSE (mode,
11337 bypass_test,
11338 copy_rtx (operands[3]),
11339 copy_rtx (operands[0]))));
11340 if (second_test)
11341 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11342 gen_rtx_IF_THEN_ELSE (mode,
11343 second_test,
11344 copy_rtx (operands[2]),
11345 copy_rtx (operands[0]))));
11346
11347 return 1; /* DONE */
11348 }
11349
11350 /* Swap, force into registers, or otherwise massage the two operands
11351 to an sse comparison with a mask result. Thus we differ a bit from
11352 ix86_prepare_fp_compare_args which expects to produce a flags result.
11353
11354 The DEST operand exists to help determine whether to commute commutative
11355 operators. The POP0/POP1 operands are updated in place. The new
11356 comparison code is returned, or UNKNOWN if not implementable. */
11357
11358 static enum rtx_code
11359 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
11360 rtx *pop0, rtx *pop1)
11361 {
11362 rtx tmp;
11363
11364 switch (code)
11365 {
11366 case LTGT:
11367 case UNEQ:
11368 /* We have no LTGT as an operator. We could implement it with
11369 NE & ORDERED, but this requires an extra temporary. It's
11370 not clear that it's worth it. */
11371 return UNKNOWN;
11372
11373 case LT:
11374 case LE:
11375 case UNGT:
11376 case UNGE:
11377 /* These are supported directly. */
11378 break;
11379
11380 case EQ:
11381 case NE:
11382 case UNORDERED:
11383 case ORDERED:
11384 /* For commutative operators, try to canonicalize the destination
11385 operand to be first in the comparison - this helps reload to
11386 avoid extra moves. */
11387 if (!dest || !rtx_equal_p (dest, *pop1))
11388 break;
11389 /* FALLTHRU */
11390
11391 case GE:
11392 case GT:
11393 case UNLE:
11394 case UNLT:
11395 /* These are not supported directly. Swap the comparison operands
11396 to transform into something that is supported. */
11397 tmp = *pop0;
11398 *pop0 = *pop1;
11399 *pop1 = tmp;
11400 code = swap_condition (code);
11401 break;
11402
11403 default:
11404 gcc_unreachable ();
11405 }
11406
11407 return code;
11408 }
11409
11410 /* Detect conditional moves that exactly match min/max operational
11411 semantics. Note that this is IEEE safe, as long as we don't
11412 interchange the operands.
11413
11414 Returns FALSE if this conditional move doesn't match a MIN/MAX,
11415 and TRUE if the operation is successful and instructions are emitted. */
11416
11417 static bool
11418 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
11419 rtx cmp_op1, rtx if_true, rtx if_false)
11420 {
11421 enum machine_mode mode;
11422 bool is_min;
11423 rtx tmp;
11424
11425 if (code == LT)
11426 ;
11427 else if (code == UNGE)
11428 {
11429 tmp = if_true;
11430 if_true = if_false;
11431 if_false = tmp;
11432 }
11433 else
11434 return false;
11435
11436 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
11437 is_min = true;
11438 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
11439 is_min = false;
11440 else
11441 return false;
11442
11443 mode = GET_MODE (dest);
11444
11445 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11446 but MODE may be a vector mode and thus not appropriate. */
11447 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
11448 {
11449 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
11450 rtvec v;
11451
11452 if_true = force_reg (mode, if_true);
11453 v = gen_rtvec (2, if_true, if_false);
11454 tmp = gen_rtx_UNSPEC (mode, v, u);
11455 }
11456 else
11457 {
11458 code = is_min ? SMIN : SMAX;
11459 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
11460 }
11461
11462 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
11463 return true;
11464 }
11465
11466 /* Expand an sse vector comparison. Return the register with the result. */
11467
11468 static rtx
11469 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
11470 rtx op_true, rtx op_false)
11471 {
11472 enum machine_mode mode = GET_MODE (dest);
11473 rtx x;
11474
11475 cmp_op0 = force_reg (mode, cmp_op0);
11476 if (!nonimmediate_operand (cmp_op1, mode))
11477 cmp_op1 = force_reg (mode, cmp_op1);
11478
11479 if (optimize
11480 || reg_overlap_mentioned_p (dest, op_true)
11481 || reg_overlap_mentioned_p (dest, op_false))
11482 dest = gen_reg_rtx (mode);
11483
11484 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
11485 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11486
11487 return dest;
11488 }
11489
11490 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11491 operations. This is used for both scalar and vector conditional moves. */
11492
11493 static void
11494 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
11495 {
11496 enum machine_mode mode = GET_MODE (dest);
11497 rtx t2, t3, x;
11498
11499 if (op_false == CONST0_RTX (mode))
11500 {
11501 op_true = force_reg (mode, op_true);
11502 x = gen_rtx_AND (mode, cmp, op_true);
11503 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11504 }
11505 else if (op_true == CONST0_RTX (mode))
11506 {
11507 op_false = force_reg (mode, op_false);
11508 x = gen_rtx_NOT (mode, cmp);
11509 x = gen_rtx_AND (mode, x, op_false);
11510 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11511 }
11512 else
11513 {
11514 op_true = force_reg (mode, op_true);
11515 op_false = force_reg (mode, op_false);
11516
11517 t2 = gen_reg_rtx (mode);
11518 if (optimize)
11519 t3 = gen_reg_rtx (mode);
11520 else
11521 t3 = dest;
11522
11523 x = gen_rtx_AND (mode, op_true, cmp);
11524 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
11525
11526 x = gen_rtx_NOT (mode, cmp);
11527 x = gen_rtx_AND (mode, x, op_false);
11528 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
11529
11530 x = gen_rtx_IOR (mode, t3, t2);
11531 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11532 }
11533 }
11534
11535 /* Expand a floating-point conditional move. Return true if successful. */
11536
11537 int
11538 ix86_expand_fp_movcc (rtx operands[])
11539 {
11540 enum machine_mode mode = GET_MODE (operands[0]);
11541 enum rtx_code code = GET_CODE (operands[1]);
11542 rtx tmp, compare_op, second_test, bypass_test;
11543
11544 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
11545 {
11546 enum machine_mode cmode;
11547
11548 /* Since we've no cmove for sse registers, don't force bad register
11549 allocation just to gain access to it. Deny movcc when the
11550 comparison mode doesn't match the move mode. */
11551 cmode = GET_MODE (ix86_compare_op0);
11552 if (cmode == VOIDmode)
11553 cmode = GET_MODE (ix86_compare_op1);
11554 if (cmode != mode)
11555 return 0;
11556
11557 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11558 &ix86_compare_op0,
11559 &ix86_compare_op1);
11560 if (code == UNKNOWN)
11561 return 0;
11562
11563 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
11564 ix86_compare_op1, operands[2],
11565 operands[3]))
11566 return 1;
11567
11568 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
11569 ix86_compare_op1, operands[2], operands[3]);
11570 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
11571 return 1;
11572 }
11573
11574 /* The floating point conditional move instructions don't directly
11575 support conditions resulting from a signed integer comparison. */
11576
11577 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11578
11579 /* The floating point conditional move instructions don't directly
11580 support signed integer comparisons. */
11581
11582 if (!fcmov_comparison_operator (compare_op, VOIDmode))
11583 {
11584 gcc_assert (!second_test && !bypass_test);
11585 tmp = gen_reg_rtx (QImode);
11586 ix86_expand_setcc (code, tmp);
11587 code = NE;
11588 ix86_compare_op0 = tmp;
11589 ix86_compare_op1 = const0_rtx;
11590 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11591 }
11592 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11593 {
11594 tmp = gen_reg_rtx (mode);
11595 emit_move_insn (tmp, operands[3]);
11596 operands[3] = tmp;
11597 }
11598 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11599 {
11600 tmp = gen_reg_rtx (mode);
11601 emit_move_insn (tmp, operands[2]);
11602 operands[2] = tmp;
11603 }
11604
11605 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11606 gen_rtx_IF_THEN_ELSE (mode, compare_op,
11607 operands[2], operands[3])));
11608 if (bypass_test)
11609 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11610 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
11611 operands[3], operands[0])));
11612 if (second_test)
11613 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11614 gen_rtx_IF_THEN_ELSE (mode, second_test,
11615 operands[2], operands[0])));
11616
11617 return 1;
11618 }
11619
11620 /* Expand a floating-point vector conditional move; a vcond operation
11621 rather than a movcc operation. */
11622
11623 bool
11624 ix86_expand_fp_vcond (rtx operands[])
11625 {
11626 enum rtx_code code = GET_CODE (operands[3]);
11627 rtx cmp;
11628
11629 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11630 &operands[4], &operands[5]);
11631 if (code == UNKNOWN)
11632 return false;
11633
11634 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
11635 operands[5], operands[1], operands[2]))
11636 return true;
11637
11638 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
11639 operands[1], operands[2]);
11640 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
11641 return true;
11642 }
11643
11644 /* Expand a signed integral vector conditional move. */
11645
11646 bool
11647 ix86_expand_int_vcond (rtx operands[])
11648 {
11649 enum machine_mode mode = GET_MODE (operands[0]);
11650 enum rtx_code code = GET_CODE (operands[3]);
11651 bool negate = false;
11652 rtx x, cop0, cop1;
11653
11654 cop0 = operands[4];
11655 cop1 = operands[5];
11656
11657 /* Canonicalize the comparison to EQ, GT, GTU. */
11658 switch (code)
11659 {
11660 case EQ:
11661 case GT:
11662 case GTU:
11663 break;
11664
11665 case NE:
11666 case LE:
11667 case LEU:
11668 code = reverse_condition (code);
11669 negate = true;
11670 break;
11671
11672 case GE:
11673 case GEU:
11674 code = reverse_condition (code);
11675 negate = true;
11676 /* FALLTHRU */
11677
11678 case LT:
11679 case LTU:
11680 code = swap_condition (code);
11681 x = cop0, cop0 = cop1, cop1 = x;
11682 break;
11683
11684 default:
11685 gcc_unreachable ();
11686 }
11687
11688 /* Unsigned parallel compare is not supported by the hardware. Play some
11689 tricks to turn this into a signed comparison against 0. */
11690 if (code == GTU)
11691 {
11692 cop0 = force_reg (mode, cop0);
11693
11694 switch (mode)
11695 {
11696 case V4SImode:
11697 {
11698 rtx t1, t2, mask;
11699
11700 /* Perform a parallel modulo subtraction. */
11701 t1 = gen_reg_rtx (mode);
11702 emit_insn (gen_subv4si3 (t1, cop0, cop1));
11703
11704 /* Extract the original sign bit of op0. */
11705 mask = GEN_INT (-0x80000000);
11706 mask = gen_rtx_CONST_VECTOR (mode,
11707 gen_rtvec (4, mask, mask, mask, mask));
11708 mask = force_reg (mode, mask);
11709 t2 = gen_reg_rtx (mode);
11710 emit_insn (gen_andv4si3 (t2, cop0, mask));
11711
11712 /* XOR it back into the result of the subtraction. This results
11713 in the sign bit set iff we saw unsigned underflow. */
11714 x = gen_reg_rtx (mode);
11715 emit_insn (gen_xorv4si3 (x, t1, t2));
11716
11717 code = GT;
11718 }
11719 break;
11720
11721 case V16QImode:
11722 case V8HImode:
11723 /* Perform a parallel unsigned saturating subtraction. */
11724 x = gen_reg_rtx (mode);
11725 emit_insn (gen_rtx_SET (VOIDmode, x,
11726 gen_rtx_US_MINUS (mode, cop0, cop1)));
11727
11728 code = EQ;
11729 negate = !negate;
11730 break;
11731
11732 default:
11733 gcc_unreachable ();
11734 }
11735
11736 cop0 = x;
11737 cop1 = CONST0_RTX (mode);
11738 }
11739
11740 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
11741 operands[1+negate], operands[2-negate]);
11742
11743 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
11744 operands[2-negate]);
11745 return true;
11746 }
11747
11748 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
11749 true if we should do zero extension, else sign extension. HIGH_P is
11750 true if we want the N/2 high elements, else the low elements. */
11751
11752 void
11753 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
11754 {
11755 enum machine_mode imode = GET_MODE (operands[1]);
11756 rtx (*unpack)(rtx, rtx, rtx);
11757 rtx se, dest;
11758
11759 switch (imode)
11760 {
11761 case V16QImode:
11762 if (high_p)
11763 unpack = gen_vec_interleave_highv16qi;
11764 else
11765 unpack = gen_vec_interleave_lowv16qi;
11766 break;
11767 case V8HImode:
11768 if (high_p)
11769 unpack = gen_vec_interleave_highv8hi;
11770 else
11771 unpack = gen_vec_interleave_lowv8hi;
11772 break;
11773 case V4SImode:
11774 if (high_p)
11775 unpack = gen_vec_interleave_highv4si;
11776 else
11777 unpack = gen_vec_interleave_lowv4si;
11778 break;
11779 default:
11780 gcc_unreachable ();
11781 }
11782
11783 dest = gen_lowpart (imode, operands[0]);
11784
11785 if (unsigned_p)
11786 se = force_reg (imode, CONST0_RTX (imode));
11787 else
11788 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
11789 operands[1], pc_rtx, pc_rtx);
11790
11791 emit_insn (unpack (dest, operands[1], se));
11792 }
11793
11794 /* Expand conditional increment or decrement using adb/sbb instructions.
11795 The default case using setcc followed by the conditional move can be
11796 done by generic code. */
11797 int
11798 ix86_expand_int_addcc (rtx operands[])
11799 {
11800 enum rtx_code code = GET_CODE (operands[1]);
11801 rtx compare_op;
11802 rtx val = const0_rtx;
11803 bool fpcmp = false;
11804 enum machine_mode mode = GET_MODE (operands[0]);
11805
11806 if (operands[3] != const1_rtx
11807 && operands[3] != constm1_rtx)
11808 return 0;
11809 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11810 ix86_compare_op1, &compare_op))
11811 return 0;
11812 code = GET_CODE (compare_op);
11813
11814 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11815 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11816 {
11817 fpcmp = true;
11818 code = ix86_fp_compare_code_to_integer (code);
11819 }
11820
11821 if (code != LTU)
11822 {
11823 val = constm1_rtx;
11824 if (fpcmp)
11825 PUT_CODE (compare_op,
11826 reverse_condition_maybe_unordered
11827 (GET_CODE (compare_op)));
11828 else
11829 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11830 }
11831 PUT_MODE (compare_op, mode);
11832
11833 /* Construct either adc or sbb insn. */
11834 if ((code == LTU) == (operands[3] == constm1_rtx))
11835 {
11836 switch (GET_MODE (operands[0]))
11837 {
11838 case QImode:
11839 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
11840 break;
11841 case HImode:
11842 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
11843 break;
11844 case SImode:
11845 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
11846 break;
11847 case DImode:
11848 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11849 break;
11850 default:
11851 gcc_unreachable ();
11852 }
11853 }
11854 else
11855 {
11856 switch (GET_MODE (operands[0]))
11857 {
11858 case QImode:
11859 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
11860 break;
11861 case HImode:
11862 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
11863 break;
11864 case SImode:
11865 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
11866 break;
11867 case DImode:
11868 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11869 break;
11870 default:
11871 gcc_unreachable ();
11872 }
11873 }
11874 return 1; /* DONE */
11875 }
11876
11877
11878 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
11879 works for floating pointer parameters and nonoffsetable memories.
11880 For pushes, it returns just stack offsets; the values will be saved
11881 in the right order. Maximally three parts are generated. */
11882
11883 static int
11884 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
11885 {
11886 int size;
11887
11888 if (!TARGET_64BIT)
11889 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
11890 else
11891 size = (GET_MODE_SIZE (mode) + 4) / 8;
11892
11893 gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
11894 gcc_assert (size >= 2 && size <= 3);
11895
11896 /* Optimize constant pool reference to immediates. This is used by fp
11897 moves, that force all constants to memory to allow combining. */
11898 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
11899 {
11900 rtx tmp = maybe_get_pool_constant (operand);
11901 if (tmp)
11902 operand = tmp;
11903 }
11904
11905 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
11906 {
11907 /* The only non-offsetable memories we handle are pushes. */
11908 int ok = push_operand (operand, VOIDmode);
11909
11910 gcc_assert (ok);
11911
11912 operand = copy_rtx (operand);
11913 PUT_MODE (operand, Pmode);
11914 parts[0] = parts[1] = parts[2] = operand;
11915 return size;
11916 }
11917
11918 if (GET_CODE (operand) == CONST_VECTOR)
11919 {
11920 enum machine_mode imode = int_mode_for_mode (mode);
11921 /* Caution: if we looked through a constant pool memory above,
11922 the operand may actually have a different mode now. That's
11923 ok, since we want to pun this all the way back to an integer. */
11924 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
11925 gcc_assert (operand != NULL);
11926 mode = imode;
11927 }
11928
11929 if (!TARGET_64BIT)
11930 {
11931 if (mode == DImode)
11932 split_di (&operand, 1, &parts[0], &parts[1]);
11933 else
11934 {
11935 if (REG_P (operand))
11936 {
11937 gcc_assert (reload_completed);
11938 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
11939 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
11940 if (size == 3)
11941 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
11942 }
11943 else if (offsettable_memref_p (operand))
11944 {
11945 operand = adjust_address (operand, SImode, 0);
11946 parts[0] = operand;
11947 parts[1] = adjust_address (operand, SImode, 4);
11948 if (size == 3)
11949 parts[2] = adjust_address (operand, SImode, 8);
11950 }
11951 else if (GET_CODE (operand) == CONST_DOUBLE)
11952 {
11953 REAL_VALUE_TYPE r;
11954 long l[4];
11955
11956 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11957 switch (mode)
11958 {
11959 case XFmode:
11960 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
11961 parts[2] = gen_int_mode (l[2], SImode);
11962 break;
11963 case DFmode:
11964 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
11965 break;
11966 default:
11967 gcc_unreachable ();
11968 }
11969 parts[1] = gen_int_mode (l[1], SImode);
11970 parts[0] = gen_int_mode (l[0], SImode);
11971 }
11972 else
11973 gcc_unreachable ();
11974 }
11975 }
11976 else
11977 {
11978 if (mode == TImode)
11979 split_ti (&operand, 1, &parts[0], &parts[1]);
11980 if (mode == XFmode || mode == TFmode)
11981 {
11982 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
11983 if (REG_P (operand))
11984 {
11985 gcc_assert (reload_completed);
11986 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
11987 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
11988 }
11989 else if (offsettable_memref_p (operand))
11990 {
11991 operand = adjust_address (operand, DImode, 0);
11992 parts[0] = operand;
11993 parts[1] = adjust_address (operand, upper_mode, 8);
11994 }
11995 else if (GET_CODE (operand) == CONST_DOUBLE)
11996 {
11997 REAL_VALUE_TYPE r;
11998 long l[4];
11999
12000 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12001 real_to_target (l, &r, mode);
12002
12003 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12004 if (HOST_BITS_PER_WIDE_INT >= 64)
12005 parts[0]
12006 = gen_int_mode
12007 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
12008 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
12009 DImode);
12010 else
12011 parts[0] = immed_double_const (l[0], l[1], DImode);
12012
12013 if (upper_mode == SImode)
12014 parts[1] = gen_int_mode (l[2], SImode);
12015 else if (HOST_BITS_PER_WIDE_INT >= 64)
12016 parts[1]
12017 = gen_int_mode
12018 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
12019 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
12020 DImode);
12021 else
12022 parts[1] = immed_double_const (l[2], l[3], DImode);
12023 }
12024 else
12025 gcc_unreachable ();
12026 }
12027 }
12028
12029 return size;
12030 }
12031
12032 /* Emit insns to perform a move or push of DI, DF, and XF values.
12033 Return false when normal moves are needed; true when all required
12034 insns have been emitted. Operands 2-4 contain the input values
12035 int the correct order; operands 5-7 contain the output values. */
12036
12037 void
12038 ix86_split_long_move (rtx operands[])
12039 {
12040 rtx part[2][3];
12041 int nparts;
12042 int push = 0;
12043 int collisions = 0;
12044 enum machine_mode mode = GET_MODE (operands[0]);
12045
12046 /* The DFmode expanders may ask us to move double.
12047 For 64bit target this is single move. By hiding the fact
12048 here we simplify i386.md splitters. */
12049 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
12050 {
12051 /* Optimize constant pool reference to immediates. This is used by
12052 fp moves, that force all constants to memory to allow combining. */
12053
12054 if (GET_CODE (operands[1]) == MEM
12055 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
12056 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
12057 operands[1] = get_pool_constant (XEXP (operands[1], 0));
12058 if (push_operand (operands[0], VOIDmode))
12059 {
12060 operands[0] = copy_rtx (operands[0]);
12061 PUT_MODE (operands[0], Pmode);
12062 }
12063 else
12064 operands[0] = gen_lowpart (DImode, operands[0]);
12065 operands[1] = gen_lowpart (DImode, operands[1]);
12066 emit_move_insn (operands[0], operands[1]);
12067 return;
12068 }
12069
12070 /* The only non-offsettable memory we handle is push. */
12071 if (push_operand (operands[0], VOIDmode))
12072 push = 1;
12073 else
12074 gcc_assert (GET_CODE (operands[0]) != MEM
12075 || offsettable_memref_p (operands[0]));
12076
12077 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
12078 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
12079
12080 /* When emitting push, take care for source operands on the stack. */
12081 if (push && GET_CODE (operands[1]) == MEM
12082 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
12083 {
12084 if (nparts == 3)
12085 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
12086 XEXP (part[1][2], 0));
12087 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
12088 XEXP (part[1][1], 0));
12089 }
12090
12091 /* We need to do copy in the right order in case an address register
12092 of the source overlaps the destination. */
12093 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
12094 {
12095 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
12096 collisions++;
12097 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12098 collisions++;
12099 if (nparts == 3
12100 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
12101 collisions++;
12102
12103 /* Collision in the middle part can be handled by reordering. */
12104 if (collisions == 1 && nparts == 3
12105 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12106 {
12107 rtx tmp;
12108 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
12109 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
12110 }
12111
12112 /* If there are more collisions, we can't handle it by reordering.
12113 Do an lea to the last part and use only one colliding move. */
12114 else if (collisions > 1)
12115 {
12116 rtx base;
12117
12118 collisions = 1;
12119
12120 base = part[0][nparts - 1];
12121
12122 /* Handle the case when the last part isn't valid for lea.
12123 Happens in 64-bit mode storing the 12-byte XFmode. */
12124 if (GET_MODE (base) != Pmode)
12125 base = gen_rtx_REG (Pmode, REGNO (base));
12126
12127 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
12128 part[1][0] = replace_equiv_address (part[1][0], base);
12129 part[1][1] = replace_equiv_address (part[1][1],
12130 plus_constant (base, UNITS_PER_WORD));
12131 if (nparts == 3)
12132 part[1][2] = replace_equiv_address (part[1][2],
12133 plus_constant (base, 8));
12134 }
12135 }
12136
12137 if (push)
12138 {
12139 if (!TARGET_64BIT)
12140 {
12141 if (nparts == 3)
12142 {
12143 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
12144 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
12145 emit_move_insn (part[0][2], part[1][2]);
12146 }
12147 }
12148 else
12149 {
12150 /* In 64bit mode we don't have 32bit push available. In case this is
12151 register, it is OK - we will just use larger counterpart. We also
12152 retype memory - these comes from attempt to avoid REX prefix on
12153 moving of second half of TFmode value. */
12154 if (GET_MODE (part[1][1]) == SImode)
12155 {
12156 switch (GET_CODE (part[1][1]))
12157 {
12158 case MEM:
12159 part[1][1] = adjust_address (part[1][1], DImode, 0);
12160 break;
12161
12162 case REG:
12163 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
12164 break;
12165
12166 default:
12167 gcc_unreachable ();
12168 }
12169
12170 if (GET_MODE (part[1][0]) == SImode)
12171 part[1][0] = part[1][1];
12172 }
12173 }
12174 emit_move_insn (part[0][1], part[1][1]);
12175 emit_move_insn (part[0][0], part[1][0]);
12176 return;
12177 }
12178
12179 /* Choose correct order to not overwrite the source before it is copied. */
12180 if ((REG_P (part[0][0])
12181 && REG_P (part[1][1])
12182 && (REGNO (part[0][0]) == REGNO (part[1][1])
12183 || (nparts == 3
12184 && REGNO (part[0][0]) == REGNO (part[1][2]))))
12185 || (collisions > 0
12186 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
12187 {
12188 if (nparts == 3)
12189 {
12190 operands[2] = part[0][2];
12191 operands[3] = part[0][1];
12192 operands[4] = part[0][0];
12193 operands[5] = part[1][2];
12194 operands[6] = part[1][1];
12195 operands[7] = part[1][0];
12196 }
12197 else
12198 {
12199 operands[2] = part[0][1];
12200 operands[3] = part[0][0];
12201 operands[5] = part[1][1];
12202 operands[6] = part[1][0];
12203 }
12204 }
12205 else
12206 {
12207 if (nparts == 3)
12208 {
12209 operands[2] = part[0][0];
12210 operands[3] = part[0][1];
12211 operands[4] = part[0][2];
12212 operands[5] = part[1][0];
12213 operands[6] = part[1][1];
12214 operands[7] = part[1][2];
12215 }
12216 else
12217 {
12218 operands[2] = part[0][0];
12219 operands[3] = part[0][1];
12220 operands[5] = part[1][0];
12221 operands[6] = part[1][1];
12222 }
12223 }
12224
12225 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12226 if (optimize_size)
12227 {
12228 if (GET_CODE (operands[5]) == CONST_INT
12229 && operands[5] != const0_rtx
12230 && REG_P (operands[2]))
12231 {
12232 if (GET_CODE (operands[6]) == CONST_INT
12233 && INTVAL (operands[6]) == INTVAL (operands[5]))
12234 operands[6] = operands[2];
12235
12236 if (nparts == 3
12237 && GET_CODE (operands[7]) == CONST_INT
12238 && INTVAL (operands[7]) == INTVAL (operands[5]))
12239 operands[7] = operands[2];
12240 }
12241
12242 if (nparts == 3
12243 && GET_CODE (operands[6]) == CONST_INT
12244 && operands[6] != const0_rtx
12245 && REG_P (operands[3])
12246 && GET_CODE (operands[7]) == CONST_INT
12247 && INTVAL (operands[7]) == INTVAL (operands[6]))
12248 operands[7] = operands[3];
12249 }
12250
12251 emit_move_insn (operands[2], operands[5]);
12252 emit_move_insn (operands[3], operands[6]);
12253 if (nparts == 3)
12254 emit_move_insn (operands[4], operands[7]);
12255
12256 return;
12257 }
12258
12259 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
12260 left shift by a constant, either using a single shift or
12261 a sequence of add instructions. */
12262
12263 static void
12264 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
12265 {
12266 if (count == 1)
12267 {
12268 emit_insn ((mode == DImode
12269 ? gen_addsi3
12270 : gen_adddi3) (operand, operand, operand));
12271 }
12272 else if (!optimize_size
12273 && count * ix86_cost->add <= ix86_cost->shift_const)
12274 {
12275 int i;
12276 for (i=0; i<count; i++)
12277 {
12278 emit_insn ((mode == DImode
12279 ? gen_addsi3
12280 : gen_adddi3) (operand, operand, operand));
12281 }
12282 }
12283 else
12284 emit_insn ((mode == DImode
12285 ? gen_ashlsi3
12286 : gen_ashldi3) (operand, operand, GEN_INT (count)));
12287 }
12288
12289 void
12290 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
12291 {
12292 rtx low[2], high[2];
12293 int count;
12294 const int single_width = mode == DImode ? 32 : 64;
12295
12296 if (GET_CODE (operands[2]) == CONST_INT)
12297 {
12298 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12299 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12300
12301 if (count >= single_width)
12302 {
12303 emit_move_insn (high[0], low[1]);
12304 emit_move_insn (low[0], const0_rtx);
12305
12306 if (count > single_width)
12307 ix86_expand_ashl_const (high[0], count - single_width, mode);
12308 }
12309 else
12310 {
12311 if (!rtx_equal_p (operands[0], operands[1]))
12312 emit_move_insn (operands[0], operands[1]);
12313 emit_insn ((mode == DImode
12314 ? gen_x86_shld_1
12315 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
12316 ix86_expand_ashl_const (low[0], count, mode);
12317 }
12318 return;
12319 }
12320
12321 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12322
12323 if (operands[1] == const1_rtx)
12324 {
12325 /* Assuming we've chosen a QImode capable registers, then 1 << N
12326 can be done with two 32/64-bit shifts, no branches, no cmoves. */
12327 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
12328 {
12329 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
12330
12331 ix86_expand_clear (low[0]);
12332 ix86_expand_clear (high[0]);
12333 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
12334
12335 d = gen_lowpart (QImode, low[0]);
12336 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12337 s = gen_rtx_EQ (QImode, flags, const0_rtx);
12338 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12339
12340 d = gen_lowpart (QImode, high[0]);
12341 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12342 s = gen_rtx_NE (QImode, flags, const0_rtx);
12343 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12344 }
12345
12346 /* Otherwise, we can get the same results by manually performing
12347 a bit extract operation on bit 5/6, and then performing the two
12348 shifts. The two methods of getting 0/1 into low/high are exactly
12349 the same size. Avoiding the shift in the bit extract case helps
12350 pentium4 a bit; no one else seems to care much either way. */
12351 else
12352 {
12353 rtx x;
12354
12355 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
12356 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
12357 else
12358 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
12359 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
12360
12361 emit_insn ((mode == DImode
12362 ? gen_lshrsi3
12363 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
12364 emit_insn ((mode == DImode
12365 ? gen_andsi3
12366 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
12367 emit_move_insn (low[0], high[0]);
12368 emit_insn ((mode == DImode
12369 ? gen_xorsi3
12370 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
12371 }
12372
12373 emit_insn ((mode == DImode
12374 ? gen_ashlsi3
12375 : gen_ashldi3) (low[0], low[0], operands[2]));
12376 emit_insn ((mode == DImode
12377 ? gen_ashlsi3
12378 : gen_ashldi3) (high[0], high[0], operands[2]));
12379 return;
12380 }
12381
12382 if (operands[1] == constm1_rtx)
12383 {
12384 /* For -1 << N, we can avoid the shld instruction, because we
12385 know that we're shifting 0...31/63 ones into a -1. */
12386 emit_move_insn (low[0], constm1_rtx);
12387 if (optimize_size)
12388 emit_move_insn (high[0], low[0]);
12389 else
12390 emit_move_insn (high[0], constm1_rtx);
12391 }
12392 else
12393 {
12394 if (!rtx_equal_p (operands[0], operands[1]))
12395 emit_move_insn (operands[0], operands[1]);
12396
12397 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12398 emit_insn ((mode == DImode
12399 ? gen_x86_shld_1
12400 : gen_x86_64_shld) (high[0], low[0], operands[2]));
12401 }
12402
12403 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
12404
12405 if (TARGET_CMOVE && scratch)
12406 {
12407 ix86_expand_clear (scratch);
12408 emit_insn ((mode == DImode
12409 ? gen_x86_shift_adj_1
12410 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
12411 }
12412 else
12413 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
12414 }
12415
12416 void
12417 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
12418 {
12419 rtx low[2], high[2];
12420 int count;
12421 const int single_width = mode == DImode ? 32 : 64;
12422
12423 if (GET_CODE (operands[2]) == CONST_INT)
12424 {
12425 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12426 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12427
12428 if (count == single_width * 2 - 1)
12429 {
12430 emit_move_insn (high[0], high[1]);
12431 emit_insn ((mode == DImode
12432 ? gen_ashrsi3
12433 : gen_ashrdi3) (high[0], high[0],
12434 GEN_INT (single_width - 1)));
12435 emit_move_insn (low[0], high[0]);
12436
12437 }
12438 else if (count >= single_width)
12439 {
12440 emit_move_insn (low[0], high[1]);
12441 emit_move_insn (high[0], low[0]);
12442 emit_insn ((mode == DImode
12443 ? gen_ashrsi3
12444 : gen_ashrdi3) (high[0], high[0],
12445 GEN_INT (single_width - 1)));
12446 if (count > single_width)
12447 emit_insn ((mode == DImode
12448 ? gen_ashrsi3
12449 : gen_ashrdi3) (low[0], low[0],
12450 GEN_INT (count - single_width)));
12451 }
12452 else
12453 {
12454 if (!rtx_equal_p (operands[0], operands[1]))
12455 emit_move_insn (operands[0], operands[1]);
12456 emit_insn ((mode == DImode
12457 ? gen_x86_shrd_1
12458 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12459 emit_insn ((mode == DImode
12460 ? gen_ashrsi3
12461 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
12462 }
12463 }
12464 else
12465 {
12466 if (!rtx_equal_p (operands[0], operands[1]))
12467 emit_move_insn (operands[0], operands[1]);
12468
12469 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12470
12471 emit_insn ((mode == DImode
12472 ? gen_x86_shrd_1
12473 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12474 emit_insn ((mode == DImode
12475 ? gen_ashrsi3
12476 : gen_ashrdi3) (high[0], high[0], operands[2]));
12477
12478 if (TARGET_CMOVE && scratch)
12479 {
12480 emit_move_insn (scratch, high[0]);
12481 emit_insn ((mode == DImode
12482 ? gen_ashrsi3
12483 : gen_ashrdi3) (scratch, scratch,
12484 GEN_INT (single_width - 1)));
12485 emit_insn ((mode == DImode
12486 ? gen_x86_shift_adj_1
12487 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12488 scratch));
12489 }
12490 else
12491 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
12492 }
12493 }
12494
12495 void
12496 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
12497 {
12498 rtx low[2], high[2];
12499 int count;
12500 const int single_width = mode == DImode ? 32 : 64;
12501
12502 if (GET_CODE (operands[2]) == CONST_INT)
12503 {
12504 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12505 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12506
12507 if (count >= single_width)
12508 {
12509 emit_move_insn (low[0], high[1]);
12510 ix86_expand_clear (high[0]);
12511
12512 if (count > single_width)
12513 emit_insn ((mode == DImode
12514 ? gen_lshrsi3
12515 : gen_lshrdi3) (low[0], low[0],
12516 GEN_INT (count - single_width)));
12517 }
12518 else
12519 {
12520 if (!rtx_equal_p (operands[0], operands[1]))
12521 emit_move_insn (operands[0], operands[1]);
12522 emit_insn ((mode == DImode
12523 ? gen_x86_shrd_1
12524 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12525 emit_insn ((mode == DImode
12526 ? gen_lshrsi3
12527 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
12528 }
12529 }
12530 else
12531 {
12532 if (!rtx_equal_p (operands[0], operands[1]))
12533 emit_move_insn (operands[0], operands[1]);
12534
12535 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12536
12537 emit_insn ((mode == DImode
12538 ? gen_x86_shrd_1
12539 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12540 emit_insn ((mode == DImode
12541 ? gen_lshrsi3
12542 : gen_lshrdi3) (high[0], high[0], operands[2]));
12543
12544 /* Heh. By reversing the arguments, we can reuse this pattern. */
12545 if (TARGET_CMOVE && scratch)
12546 {
12547 ix86_expand_clear (scratch);
12548 emit_insn ((mode == DImode
12549 ? gen_x86_shift_adj_1
12550 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12551 scratch));
12552 }
12553 else
12554 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
12555 }
12556 }
12557
12558 /* Helper function for the string operations below. Dest VARIABLE whether
12559 it is aligned to VALUE bytes. If true, jump to the label. */
12560 static rtx
12561 ix86_expand_aligntest (rtx variable, int value)
12562 {
12563 rtx label = gen_label_rtx ();
12564 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
12565 if (GET_MODE (variable) == DImode)
12566 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
12567 else
12568 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
12569 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
12570 1, label);
12571 return label;
12572 }
12573
12574 /* Adjust COUNTER by the VALUE. */
12575 static void
12576 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
12577 {
12578 if (GET_MODE (countreg) == DImode)
12579 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
12580 else
12581 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
12582 }
12583
12584 /* Zero extend possibly SImode EXP to Pmode register. */
12585 rtx
12586 ix86_zero_extend_to_Pmode (rtx exp)
12587 {
12588 rtx r;
12589 if (GET_MODE (exp) == VOIDmode)
12590 return force_reg (Pmode, exp);
12591 if (GET_MODE (exp) == Pmode)
12592 return copy_to_mode_reg (Pmode, exp);
12593 r = gen_reg_rtx (Pmode);
12594 emit_insn (gen_zero_extendsidi2 (r, exp));
12595 return r;
12596 }
12597
12598 /* Expand string move (memcpy) operation. Use i386 string operations when
12599 profitable. expand_clrmem contains similar code. */
12600 int
12601 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
12602 {
12603 rtx srcreg, destreg, countreg, srcexp, destexp;
12604 enum machine_mode counter_mode;
12605 HOST_WIDE_INT align = 0;
12606 unsigned HOST_WIDE_INT count = 0;
12607
12608 if (GET_CODE (align_exp) == CONST_INT)
12609 align = INTVAL (align_exp);
12610
12611 /* Can't use any of this if the user has appropriated esi or edi. */
12612 if (global_regs[4] || global_regs[5])
12613 return 0;
12614
12615 /* This simple hack avoids all inlining code and simplifies code below. */
12616 if (!TARGET_ALIGN_STRINGOPS)
12617 align = 64;
12618
12619 if (GET_CODE (count_exp) == CONST_INT)
12620 {
12621 count = INTVAL (count_exp);
12622 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12623 return 0;
12624 }
12625
12626 /* Figure out proper mode for counter. For 32bits it is always SImode,
12627 for 64bits use SImode when possible, otherwise DImode.
12628 Set count to number of bytes copied when known at compile time. */
12629 if (!TARGET_64BIT
12630 || GET_MODE (count_exp) == SImode
12631 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12632 counter_mode = SImode;
12633 else
12634 counter_mode = DImode;
12635
12636 gcc_assert (counter_mode == SImode || counter_mode == DImode);
12637
12638 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12639 if (destreg != XEXP (dst, 0))
12640 dst = replace_equiv_address_nv (dst, destreg);
12641 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
12642 if (srcreg != XEXP (src, 0))
12643 src = replace_equiv_address_nv (src, srcreg);
12644
12645 /* When optimizing for size emit simple rep ; movsb instruction for
12646 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
12647 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
12648 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
12649 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
12650 but we don't know whether upper 24 (resp. 56) bits of %ecx will be
12651 known to be zero or not. The rep; movsb sequence causes higher
12652 register pressure though, so take that into account. */
12653
12654 if ((!optimize || optimize_size)
12655 && (count == 0
12656 || ((count & 0x03)
12657 && (!optimize_size
12658 || count > 5 * 4
12659 || (count & 3) + count / 4 > 6))))
12660 {
12661 emit_insn (gen_cld ());
12662 countreg = ix86_zero_extend_to_Pmode (count_exp);
12663 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12664 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
12665 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
12666 destexp, srcexp));
12667 }
12668
12669 /* For constant aligned (or small unaligned) copies use rep movsl
12670 followed by code copying the rest. For PentiumPro ensure 8 byte
12671 alignment to allow rep movsl acceleration. */
12672
12673 else if (count != 0
12674 && (align >= 8
12675 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12676 || optimize_size || count < (unsigned int) 64))
12677 {
12678 unsigned HOST_WIDE_INT offset = 0;
12679 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12680 rtx srcmem, dstmem;
12681
12682 emit_insn (gen_cld ());
12683 if (count & ~(size - 1))
12684 {
12685 if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
12686 {
12687 enum machine_mode movs_mode = size == 4 ? SImode : DImode;
12688
12689 while (offset < (count & ~(size - 1)))
12690 {
12691 srcmem = adjust_automodify_address_nv (src, movs_mode,
12692 srcreg, offset);
12693 dstmem = adjust_automodify_address_nv (dst, movs_mode,
12694 destreg, offset);
12695 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12696 offset += size;
12697 }
12698 }
12699 else
12700 {
12701 countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
12702 & (TARGET_64BIT ? -1 : 0x3fffffff));
12703 countreg = copy_to_mode_reg (counter_mode, countreg);
12704 countreg = ix86_zero_extend_to_Pmode (countreg);
12705
12706 destexp = gen_rtx_ASHIFT (Pmode, countreg,
12707 GEN_INT (size == 4 ? 2 : 3));
12708 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12709 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12710
12711 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12712 countreg, destexp, srcexp));
12713 offset = count & ~(size - 1);
12714 }
12715 }
12716 if (size == 8 && (count & 0x04))
12717 {
12718 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
12719 offset);
12720 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
12721 offset);
12722 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12723 offset += 4;
12724 }
12725 if (count & 0x02)
12726 {
12727 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
12728 offset);
12729 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
12730 offset);
12731 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12732 offset += 2;
12733 }
12734 if (count & 0x01)
12735 {
12736 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
12737 offset);
12738 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
12739 offset);
12740 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12741 }
12742 }
12743 /* The generic code based on the glibc implementation:
12744 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
12745 allowing accelerated copying there)
12746 - copy the data using rep movsl
12747 - copy the rest. */
12748 else
12749 {
12750 rtx countreg2;
12751 rtx label = NULL;
12752 rtx srcmem, dstmem;
12753 int desired_alignment = (TARGET_PENTIUMPRO
12754 && (count == 0 || count >= (unsigned int) 260)
12755 ? 8 : UNITS_PER_WORD);
12756 /* Get rid of MEM_OFFSETs, they won't be accurate. */
12757 dst = change_address (dst, BLKmode, destreg);
12758 src = change_address (src, BLKmode, srcreg);
12759
12760 /* In case we don't know anything about the alignment, default to
12761 library version, since it is usually equally fast and result in
12762 shorter code.
12763
12764 Also emit call when we know that the count is large and call overhead
12765 will not be important. */
12766 if (!TARGET_INLINE_ALL_STRINGOPS
12767 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12768 return 0;
12769
12770 if (TARGET_SINGLE_STRINGOP)
12771 emit_insn (gen_cld ());
12772
12773 countreg2 = gen_reg_rtx (Pmode);
12774 countreg = copy_to_mode_reg (counter_mode, count_exp);
12775
12776 /* We don't use loops to align destination and to copy parts smaller
12777 than 4 bytes, because gcc is able to optimize such code better (in
12778 the case the destination or the count really is aligned, gcc is often
12779 able to predict the branches) and also it is friendlier to the
12780 hardware branch prediction.
12781
12782 Using loops is beneficial for generic case, because we can
12783 handle small counts using the loops. Many CPUs (such as Athlon)
12784 have large REP prefix setup costs.
12785
12786 This is quite costly. Maybe we can revisit this decision later or
12787 add some customizability to this code. */
12788
12789 if (count == 0 && align < desired_alignment)
12790 {
12791 label = gen_label_rtx ();
12792 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12793 LEU, 0, counter_mode, 1, label);
12794 }
12795 if (align <= 1)
12796 {
12797 rtx label = ix86_expand_aligntest (destreg, 1);
12798 srcmem = change_address (src, QImode, srcreg);
12799 dstmem = change_address (dst, QImode, destreg);
12800 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12801 ix86_adjust_counter (countreg, 1);
12802 emit_label (label);
12803 LABEL_NUSES (label) = 1;
12804 }
12805 if (align <= 2)
12806 {
12807 rtx label = ix86_expand_aligntest (destreg, 2);
12808 srcmem = change_address (src, HImode, srcreg);
12809 dstmem = change_address (dst, HImode, destreg);
12810 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12811 ix86_adjust_counter (countreg, 2);
12812 emit_label (label);
12813 LABEL_NUSES (label) = 1;
12814 }
12815 if (align <= 4 && desired_alignment > 4)
12816 {
12817 rtx label = ix86_expand_aligntest (destreg, 4);
12818 srcmem = change_address (src, SImode, srcreg);
12819 dstmem = change_address (dst, SImode, destreg);
12820 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12821 ix86_adjust_counter (countreg, 4);
12822 emit_label (label);
12823 LABEL_NUSES (label) = 1;
12824 }
12825
12826 if (label && desired_alignment > 4 && !TARGET_64BIT)
12827 {
12828 emit_label (label);
12829 LABEL_NUSES (label) = 1;
12830 label = NULL_RTX;
12831 }
12832 if (!TARGET_SINGLE_STRINGOP)
12833 emit_insn (gen_cld ());
12834 if (TARGET_64BIT)
12835 {
12836 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
12837 GEN_INT (3)));
12838 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
12839 }
12840 else
12841 {
12842 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
12843 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
12844 }
12845 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12846 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12847 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12848 countreg2, destexp, srcexp));
12849
12850 if (label)
12851 {
12852 emit_label (label);
12853 LABEL_NUSES (label) = 1;
12854 }
12855 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
12856 {
12857 srcmem = change_address (src, SImode, srcreg);
12858 dstmem = change_address (dst, SImode, destreg);
12859 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12860 }
12861 if ((align <= 4 || count == 0) && TARGET_64BIT)
12862 {
12863 rtx label = ix86_expand_aligntest (countreg, 4);
12864 srcmem = change_address (src, SImode, srcreg);
12865 dstmem = change_address (dst, SImode, destreg);
12866 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12867 emit_label (label);
12868 LABEL_NUSES (label) = 1;
12869 }
12870 if (align > 2 && count != 0 && (count & 2))
12871 {
12872 srcmem = change_address (src, HImode, srcreg);
12873 dstmem = change_address (dst, HImode, destreg);
12874 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12875 }
12876 if (align <= 2 || count == 0)
12877 {
12878 rtx label = ix86_expand_aligntest (countreg, 2);
12879 srcmem = change_address (src, HImode, srcreg);
12880 dstmem = change_address (dst, HImode, destreg);
12881 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12882 emit_label (label);
12883 LABEL_NUSES (label) = 1;
12884 }
12885 if (align > 1 && count != 0 && (count & 1))
12886 {
12887 srcmem = change_address (src, QImode, srcreg);
12888 dstmem = change_address (dst, QImode, destreg);
12889 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12890 }
12891 if (align <= 1 || count == 0)
12892 {
12893 rtx label = ix86_expand_aligntest (countreg, 1);
12894 srcmem = change_address (src, QImode, srcreg);
12895 dstmem = change_address (dst, QImode, destreg);
12896 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12897 emit_label (label);
12898 LABEL_NUSES (label) = 1;
12899 }
12900 }
12901
12902 return 1;
12903 }
12904
12905 /* Expand string clear operation (bzero). Use i386 string operations when
12906 profitable. expand_movmem contains similar code. */
12907 int
12908 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
12909 {
12910 rtx destreg, zeroreg, countreg, destexp;
12911 enum machine_mode counter_mode;
12912 HOST_WIDE_INT align = 0;
12913 unsigned HOST_WIDE_INT count = 0;
12914
12915 if (GET_CODE (align_exp) == CONST_INT)
12916 align = INTVAL (align_exp);
12917
12918 /* Can't use any of this if the user has appropriated esi. */
12919 if (global_regs[4])
12920 return 0;
12921
12922 /* This simple hack avoids all inlining code and simplifies code below. */
12923 if (!TARGET_ALIGN_STRINGOPS)
12924 align = 32;
12925
12926 if (GET_CODE (count_exp) == CONST_INT)
12927 {
12928 count = INTVAL (count_exp);
12929 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12930 return 0;
12931 }
12932 /* Figure out proper mode for counter. For 32bits it is always SImode,
12933 for 64bits use SImode when possible, otherwise DImode.
12934 Set count to number of bytes copied when known at compile time. */
12935 if (!TARGET_64BIT
12936 || GET_MODE (count_exp) == SImode
12937 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12938 counter_mode = SImode;
12939 else
12940 counter_mode = DImode;
12941
12942 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12943 if (destreg != XEXP (dst, 0))
12944 dst = replace_equiv_address_nv (dst, destreg);
12945
12946
12947 /* When optimizing for size emit simple rep ; movsb instruction for
12948 counts not divisible by 4. The movl $N, %ecx; rep; stosb
12949 sequence is 7 bytes long, so if optimizing for size and count is
12950 small enough that some stosl, stosw and stosb instructions without
12951 rep are shorter, fall back into the next if. */
12952
12953 if ((!optimize || optimize_size)
12954 && (count == 0
12955 || ((count & 0x03)
12956 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
12957 {
12958 emit_insn (gen_cld ());
12959
12960 countreg = ix86_zero_extend_to_Pmode (count_exp);
12961 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
12962 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12963 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
12964 }
12965 else if (count != 0
12966 && (align >= 8
12967 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12968 || optimize_size || count < (unsigned int) 64))
12969 {
12970 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12971 unsigned HOST_WIDE_INT offset = 0;
12972
12973 emit_insn (gen_cld ());
12974
12975 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
12976 if (count & ~(size - 1))
12977 {
12978 unsigned HOST_WIDE_INT repcount;
12979 unsigned int max_nonrep;
12980
12981 repcount = count >> (size == 4 ? 2 : 3);
12982 if (!TARGET_64BIT)
12983 repcount &= 0x3fffffff;
12984
12985 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
12986 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
12987 bytes. In both cases the latter seems to be faster for small
12988 values of N. */
12989 max_nonrep = size == 4 ? 7 : 4;
12990 if (!optimize_size)
12991 switch (ix86_tune)
12992 {
12993 case PROCESSOR_PENTIUM4:
12994 case PROCESSOR_NOCONA:
12995 max_nonrep = 3;
12996 break;
12997 default:
12998 break;
12999 }
13000
13001 if (repcount <= max_nonrep)
13002 while (repcount-- > 0)
13003 {
13004 rtx mem = adjust_automodify_address_nv (dst,
13005 GET_MODE (zeroreg),
13006 destreg, offset);
13007 emit_insn (gen_strset (destreg, mem, zeroreg));
13008 offset += size;
13009 }
13010 else
13011 {
13012 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
13013 countreg = ix86_zero_extend_to_Pmode (countreg);
13014 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13015 GEN_INT (size == 4 ? 2 : 3));
13016 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13017 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
13018 destexp));
13019 offset = count & ~(size - 1);
13020 }
13021 }
13022 if (size == 8 && (count & 0x04))
13023 {
13024 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
13025 offset);
13026 emit_insn (gen_strset (destreg, mem,
13027 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13028 offset += 4;
13029 }
13030 if (count & 0x02)
13031 {
13032 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
13033 offset);
13034 emit_insn (gen_strset (destreg, mem,
13035 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13036 offset += 2;
13037 }
13038 if (count & 0x01)
13039 {
13040 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
13041 offset);
13042 emit_insn (gen_strset (destreg, mem,
13043 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13044 }
13045 }
13046 else
13047 {
13048 rtx countreg2;
13049 rtx label = NULL;
13050 /* Compute desired alignment of the string operation. */
13051 int desired_alignment = (TARGET_PENTIUMPRO
13052 && (count == 0 || count >= (unsigned int) 260)
13053 ? 8 : UNITS_PER_WORD);
13054
13055 /* In case we don't know anything about the alignment, default to
13056 library version, since it is usually equally fast and result in
13057 shorter code.
13058
13059 Also emit call when we know that the count is large and call overhead
13060 will not be important. */
13061 if (!TARGET_INLINE_ALL_STRINGOPS
13062 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
13063 return 0;
13064
13065 if (TARGET_SINGLE_STRINGOP)
13066 emit_insn (gen_cld ());
13067
13068 countreg2 = gen_reg_rtx (Pmode);
13069 countreg = copy_to_mode_reg (counter_mode, count_exp);
13070 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
13071 /* Get rid of MEM_OFFSET, it won't be accurate. */
13072 dst = change_address (dst, BLKmode, destreg);
13073
13074 if (count == 0 && align < desired_alignment)
13075 {
13076 label = gen_label_rtx ();
13077 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
13078 LEU, 0, counter_mode, 1, label);
13079 }
13080 if (align <= 1)
13081 {
13082 rtx label = ix86_expand_aligntest (destreg, 1);
13083 emit_insn (gen_strset (destreg, dst,
13084 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13085 ix86_adjust_counter (countreg, 1);
13086 emit_label (label);
13087 LABEL_NUSES (label) = 1;
13088 }
13089 if (align <= 2)
13090 {
13091 rtx label = ix86_expand_aligntest (destreg, 2);
13092 emit_insn (gen_strset (destreg, dst,
13093 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13094 ix86_adjust_counter (countreg, 2);
13095 emit_label (label);
13096 LABEL_NUSES (label) = 1;
13097 }
13098 if (align <= 4 && desired_alignment > 4)
13099 {
13100 rtx label = ix86_expand_aligntest (destreg, 4);
13101 emit_insn (gen_strset (destreg, dst,
13102 (TARGET_64BIT
13103 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
13104 : zeroreg)));
13105 ix86_adjust_counter (countreg, 4);
13106 emit_label (label);
13107 LABEL_NUSES (label) = 1;
13108 }
13109
13110 if (label && desired_alignment > 4 && !TARGET_64BIT)
13111 {
13112 emit_label (label);
13113 LABEL_NUSES (label) = 1;
13114 label = NULL_RTX;
13115 }
13116
13117 if (!TARGET_SINGLE_STRINGOP)
13118 emit_insn (gen_cld ());
13119 if (TARGET_64BIT)
13120 {
13121 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
13122 GEN_INT (3)));
13123 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
13124 }
13125 else
13126 {
13127 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
13128 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
13129 }
13130 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13131 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
13132
13133 if (label)
13134 {
13135 emit_label (label);
13136 LABEL_NUSES (label) = 1;
13137 }
13138
13139 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
13140 emit_insn (gen_strset (destreg, dst,
13141 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13142 if (TARGET_64BIT && (align <= 4 || count == 0))
13143 {
13144 rtx label = ix86_expand_aligntest (countreg, 4);
13145 emit_insn (gen_strset (destreg, dst,
13146 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13147 emit_label (label);
13148 LABEL_NUSES (label) = 1;
13149 }
13150 if (align > 2 && count != 0 && (count & 2))
13151 emit_insn (gen_strset (destreg, dst,
13152 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13153 if (align <= 2 || count == 0)
13154 {
13155 rtx label = ix86_expand_aligntest (countreg, 2);
13156 emit_insn (gen_strset (destreg, dst,
13157 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13158 emit_label (label);
13159 LABEL_NUSES (label) = 1;
13160 }
13161 if (align > 1 && count != 0 && (count & 1))
13162 emit_insn (gen_strset (destreg, dst,
13163 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13164 if (align <= 1 || count == 0)
13165 {
13166 rtx label = ix86_expand_aligntest (countreg, 1);
13167 emit_insn (gen_strset (destreg, dst,
13168 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13169 emit_label (label);
13170 LABEL_NUSES (label) = 1;
13171 }
13172 }
13173 return 1;
13174 }
13175
13176 /* Expand strlen. */
13177 int
13178 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
13179 {
13180 rtx addr, scratch1, scratch2, scratch3, scratch4;
13181
13182 /* The generic case of strlen expander is long. Avoid it's
13183 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
13184
13185 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13186 && !TARGET_INLINE_ALL_STRINGOPS
13187 && !optimize_size
13188 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
13189 return 0;
13190
13191 addr = force_reg (Pmode, XEXP (src, 0));
13192 scratch1 = gen_reg_rtx (Pmode);
13193
13194 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13195 && !optimize_size)
13196 {
13197 /* Well it seems that some optimizer does not combine a call like
13198 foo(strlen(bar), strlen(bar));
13199 when the move and the subtraction is done here. It does calculate
13200 the length just once when these instructions are done inside of
13201 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
13202 often used and I use one fewer register for the lifetime of
13203 output_strlen_unroll() this is better. */
13204
13205 emit_move_insn (out, addr);
13206
13207 ix86_expand_strlensi_unroll_1 (out, src, align);
13208
13209 /* strlensi_unroll_1 returns the address of the zero at the end of
13210 the string, like memchr(), so compute the length by subtracting
13211 the start address. */
13212 if (TARGET_64BIT)
13213 emit_insn (gen_subdi3 (out, out, addr));
13214 else
13215 emit_insn (gen_subsi3 (out, out, addr));
13216 }
13217 else
13218 {
13219 rtx unspec;
13220 scratch2 = gen_reg_rtx (Pmode);
13221 scratch3 = gen_reg_rtx (Pmode);
13222 scratch4 = force_reg (Pmode, constm1_rtx);
13223
13224 emit_move_insn (scratch3, addr);
13225 eoschar = force_reg (QImode, eoschar);
13226
13227 emit_insn (gen_cld ());
13228 src = replace_equiv_address_nv (src, scratch3);
13229
13230 /* If .md starts supporting :P, this can be done in .md. */
13231 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
13232 scratch4), UNSPEC_SCAS);
13233 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
13234 if (TARGET_64BIT)
13235 {
13236 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
13237 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
13238 }
13239 else
13240 {
13241 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
13242 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
13243 }
13244 }
13245 return 1;
13246 }
13247
13248 /* Expand the appropriate insns for doing strlen if not just doing
13249 repnz; scasb
13250
13251 out = result, initialized with the start address
13252 align_rtx = alignment of the address.
13253 scratch = scratch register, initialized with the startaddress when
13254 not aligned, otherwise undefined
13255
13256 This is just the body. It needs the initializations mentioned above and
13257 some address computing at the end. These things are done in i386.md. */
13258
13259 static void
13260 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
13261 {
13262 int align;
13263 rtx tmp;
13264 rtx align_2_label = NULL_RTX;
13265 rtx align_3_label = NULL_RTX;
13266 rtx align_4_label = gen_label_rtx ();
13267 rtx end_0_label = gen_label_rtx ();
13268 rtx mem;
13269 rtx tmpreg = gen_reg_rtx (SImode);
13270 rtx scratch = gen_reg_rtx (SImode);
13271 rtx cmp;
13272
13273 align = 0;
13274 if (GET_CODE (align_rtx) == CONST_INT)
13275 align = INTVAL (align_rtx);
13276
13277 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
13278
13279 /* Is there a known alignment and is it less than 4? */
13280 if (align < 4)
13281 {
13282 rtx scratch1 = gen_reg_rtx (Pmode);
13283 emit_move_insn (scratch1, out);
13284 /* Is there a known alignment and is it not 2? */
13285 if (align != 2)
13286 {
13287 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
13288 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
13289
13290 /* Leave just the 3 lower bits. */
13291 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
13292 NULL_RTX, 0, OPTAB_WIDEN);
13293
13294 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13295 Pmode, 1, align_4_label);
13296 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
13297 Pmode, 1, align_2_label);
13298 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
13299 Pmode, 1, align_3_label);
13300 }
13301 else
13302 {
13303 /* Since the alignment is 2, we have to check 2 or 0 bytes;
13304 check if is aligned to 4 - byte. */
13305
13306 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
13307 NULL_RTX, 0, OPTAB_WIDEN);
13308
13309 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13310 Pmode, 1, align_4_label);
13311 }
13312
13313 mem = change_address (src, QImode, out);
13314
13315 /* Now compare the bytes. */
13316
13317 /* Compare the first n unaligned byte on a byte per byte basis. */
13318 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
13319 QImode, 1, end_0_label);
13320
13321 /* Increment the address. */
13322 if (TARGET_64BIT)
13323 emit_insn (gen_adddi3 (out, out, const1_rtx));
13324 else
13325 emit_insn (gen_addsi3 (out, out, const1_rtx));
13326
13327 /* Not needed with an alignment of 2 */
13328 if (align != 2)
13329 {
13330 emit_label (align_2_label);
13331
13332 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13333 end_0_label);
13334
13335 if (TARGET_64BIT)
13336 emit_insn (gen_adddi3 (out, out, const1_rtx));
13337 else
13338 emit_insn (gen_addsi3 (out, out, const1_rtx));
13339
13340 emit_label (align_3_label);
13341 }
13342
13343 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13344 end_0_label);
13345
13346 if (TARGET_64BIT)
13347 emit_insn (gen_adddi3 (out, out, const1_rtx));
13348 else
13349 emit_insn (gen_addsi3 (out, out, const1_rtx));
13350 }
13351
13352 /* Generate loop to check 4 bytes at a time. It is not a good idea to
13353 align this loop. It gives only huge programs, but does not help to
13354 speed up. */
13355 emit_label (align_4_label);
13356
13357 mem = change_address (src, SImode, out);
13358 emit_move_insn (scratch, mem);
13359 if (TARGET_64BIT)
13360 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
13361 else
13362 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
13363
13364 /* This formula yields a nonzero result iff one of the bytes is zero.
13365 This saves three branches inside loop and many cycles. */
13366
13367 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
13368 emit_insn (gen_one_cmplsi2 (scratch, scratch));
13369 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
13370 emit_insn (gen_andsi3 (tmpreg, tmpreg,
13371 gen_int_mode (0x80808080, SImode)));
13372 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
13373 align_4_label);
13374
13375 if (TARGET_CMOVE)
13376 {
13377 rtx reg = gen_reg_rtx (SImode);
13378 rtx reg2 = gen_reg_rtx (Pmode);
13379 emit_move_insn (reg, tmpreg);
13380 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
13381
13382 /* If zero is not in the first two bytes, move two bytes forward. */
13383 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13384 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13385 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13386 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
13387 gen_rtx_IF_THEN_ELSE (SImode, tmp,
13388 reg,
13389 tmpreg)));
13390 /* Emit lea manually to avoid clobbering of flags. */
13391 emit_insn (gen_rtx_SET (SImode, reg2,
13392 gen_rtx_PLUS (Pmode, out, const2_rtx)));
13393
13394 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13395 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13396 emit_insn (gen_rtx_SET (VOIDmode, out,
13397 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
13398 reg2,
13399 out)));
13400
13401 }
13402 else
13403 {
13404 rtx end_2_label = gen_label_rtx ();
13405 /* Is zero in the first two bytes? */
13406
13407 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13408 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13409 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
13410 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13411 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
13412 pc_rtx);
13413 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
13414 JUMP_LABEL (tmp) = end_2_label;
13415
13416 /* Not in the first two. Move two bytes forward. */
13417 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
13418 if (TARGET_64BIT)
13419 emit_insn (gen_adddi3 (out, out, const2_rtx));
13420 else
13421 emit_insn (gen_addsi3 (out, out, const2_rtx));
13422
13423 emit_label (end_2_label);
13424
13425 }
13426
13427 /* Avoid branch in fixing the byte. */
13428 tmpreg = gen_lowpart (QImode, tmpreg);
13429 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
13430 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
13431 if (TARGET_64BIT)
13432 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
13433 else
13434 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
13435
13436 emit_label (end_0_label);
13437 }
13438
13439 void
13440 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
13441 rtx callarg2 ATTRIBUTE_UNUSED,
13442 rtx pop, int sibcall)
13443 {
13444 rtx use = NULL, call;
13445
13446 if (pop == const0_rtx)
13447 pop = NULL;
13448 gcc_assert (!TARGET_64BIT || !pop);
13449
13450 if (TARGET_MACHO && !TARGET_64BIT)
13451 {
13452 #if TARGET_MACHO
13453 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
13454 fnaddr = machopic_indirect_call_target (fnaddr);
13455 #endif
13456 }
13457 else
13458 {
13459 /* Static functions and indirect calls don't need the pic register. */
13460 if (! TARGET_64BIT && flag_pic
13461 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
13462 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
13463 use_reg (&use, pic_offset_table_rtx);
13464 }
13465
13466 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
13467 {
13468 rtx al = gen_rtx_REG (QImode, 0);
13469 emit_move_insn (al, callarg2);
13470 use_reg (&use, al);
13471 }
13472
13473 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
13474 {
13475 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13476 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13477 }
13478 if (sibcall && TARGET_64BIT
13479 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
13480 {
13481 rtx addr;
13482 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13483 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
13484 emit_move_insn (fnaddr, addr);
13485 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13486 }
13487
13488 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
13489 if (retval)
13490 call = gen_rtx_SET (VOIDmode, retval, call);
13491 if (pop)
13492 {
13493 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
13494 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
13495 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
13496 }
13497
13498 call = emit_call_insn (call);
13499 if (use)
13500 CALL_INSN_FUNCTION_USAGE (call) = use;
13501 }
13502
13503 \f
13504 /* Clear stack slot assignments remembered from previous functions.
13505 This is called from INIT_EXPANDERS once before RTL is emitted for each
13506 function. */
13507
13508 static struct machine_function *
13509 ix86_init_machine_status (void)
13510 {
13511 struct machine_function *f;
13512
13513 f = ggc_alloc_cleared (sizeof (struct machine_function));
13514 f->use_fast_prologue_epilogue_nregs = -1;
13515 f->tls_descriptor_call_expanded_p = 0;
13516
13517 return f;
13518 }
13519
13520 /* Return a MEM corresponding to a stack slot with mode MODE.
13521 Allocate a new slot if necessary.
13522
13523 The RTL for a function can have several slots available: N is
13524 which slot to use. */
13525
13526 rtx
13527 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
13528 {
13529 struct stack_local_entry *s;
13530
13531 gcc_assert (n < MAX_386_STACK_LOCALS);
13532
13533 for (s = ix86_stack_locals; s; s = s->next)
13534 if (s->mode == mode && s->n == n)
13535 return copy_rtx (s->rtl);
13536
13537 s = (struct stack_local_entry *)
13538 ggc_alloc (sizeof (struct stack_local_entry));
13539 s->n = n;
13540 s->mode = mode;
13541 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
13542
13543 s->next = ix86_stack_locals;
13544 ix86_stack_locals = s;
13545 return s->rtl;
13546 }
13547
13548 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13549
13550 static GTY(()) rtx ix86_tls_symbol;
13551 rtx
13552 ix86_tls_get_addr (void)
13553 {
13554
13555 if (!ix86_tls_symbol)
13556 {
13557 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
13558 (TARGET_ANY_GNU_TLS
13559 && !TARGET_64BIT)
13560 ? "___tls_get_addr"
13561 : "__tls_get_addr");
13562 }
13563
13564 return ix86_tls_symbol;
13565 }
13566
13567 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13568
13569 static GTY(()) rtx ix86_tls_module_base_symbol;
13570 rtx
13571 ix86_tls_module_base (void)
13572 {
13573
13574 if (!ix86_tls_module_base_symbol)
13575 {
13576 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
13577 "_TLS_MODULE_BASE_");
13578 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13579 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13580 }
13581
13582 return ix86_tls_module_base_symbol;
13583 }
13584 \f
13585 /* Calculate the length of the memory address in the instruction
13586 encoding. Does not include the one-byte modrm, opcode, or prefix. */
13587
13588 int
13589 memory_address_length (rtx addr)
13590 {
13591 struct ix86_address parts;
13592 rtx base, index, disp;
13593 int len;
13594 int ok;
13595
13596 if (GET_CODE (addr) == PRE_DEC
13597 || GET_CODE (addr) == POST_INC
13598 || GET_CODE (addr) == PRE_MODIFY
13599 || GET_CODE (addr) == POST_MODIFY)
13600 return 0;
13601
13602 ok = ix86_decompose_address (addr, &parts);
13603 gcc_assert (ok);
13604
13605 if (parts.base && GET_CODE (parts.base) == SUBREG)
13606 parts.base = SUBREG_REG (parts.base);
13607 if (parts.index && GET_CODE (parts.index) == SUBREG)
13608 parts.index = SUBREG_REG (parts.index);
13609
13610 base = parts.base;
13611 index = parts.index;
13612 disp = parts.disp;
13613 len = 0;
13614
13615 /* Rule of thumb:
13616 - esp as the base always wants an index,
13617 - ebp as the base always wants a displacement. */
13618
13619 /* Register Indirect. */
13620 if (base && !index && !disp)
13621 {
13622 /* esp (for its index) and ebp (for its displacement) need
13623 the two-byte modrm form. */
13624 if (addr == stack_pointer_rtx
13625 || addr == arg_pointer_rtx
13626 || addr == frame_pointer_rtx
13627 || addr == hard_frame_pointer_rtx)
13628 len = 1;
13629 }
13630
13631 /* Direct Addressing. */
13632 else if (disp && !base && !index)
13633 len = 4;
13634
13635 else
13636 {
13637 /* Find the length of the displacement constant. */
13638 if (disp)
13639 {
13640 if (base && satisfies_constraint_K (disp))
13641 len = 1;
13642 else
13643 len = 4;
13644 }
13645 /* ebp always wants a displacement. */
13646 else if (base == hard_frame_pointer_rtx)
13647 len = 1;
13648
13649 /* An index requires the two-byte modrm form.... */
13650 if (index
13651 /* ...like esp, which always wants an index. */
13652 || base == stack_pointer_rtx
13653 || base == arg_pointer_rtx
13654 || base == frame_pointer_rtx)
13655 len += 1;
13656 }
13657
13658 return len;
13659 }
13660
13661 /* Compute default value for "length_immediate" attribute. When SHORTFORM
13662 is set, expect that insn have 8bit immediate alternative. */
13663 int
13664 ix86_attr_length_immediate_default (rtx insn, int shortform)
13665 {
13666 int len = 0;
13667 int i;
13668 extract_insn_cached (insn);
13669 for (i = recog_data.n_operands - 1; i >= 0; --i)
13670 if (CONSTANT_P (recog_data.operand[i]))
13671 {
13672 gcc_assert (!len);
13673 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
13674 len = 1;
13675 else
13676 {
13677 switch (get_attr_mode (insn))
13678 {
13679 case MODE_QI:
13680 len+=1;
13681 break;
13682 case MODE_HI:
13683 len+=2;
13684 break;
13685 case MODE_SI:
13686 len+=4;
13687 break;
13688 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
13689 case MODE_DI:
13690 len+=4;
13691 break;
13692 default:
13693 fatal_insn ("unknown insn mode", insn);
13694 }
13695 }
13696 }
13697 return len;
13698 }
13699 /* Compute default value for "length_address" attribute. */
13700 int
13701 ix86_attr_length_address_default (rtx insn)
13702 {
13703 int i;
13704
13705 if (get_attr_type (insn) == TYPE_LEA)
13706 {
13707 rtx set = PATTERN (insn);
13708
13709 if (GET_CODE (set) == PARALLEL)
13710 set = XVECEXP (set, 0, 0);
13711
13712 gcc_assert (GET_CODE (set) == SET);
13713
13714 return memory_address_length (SET_SRC (set));
13715 }
13716
13717 extract_insn_cached (insn);
13718 for (i = recog_data.n_operands - 1; i >= 0; --i)
13719 if (GET_CODE (recog_data.operand[i]) == MEM)
13720 {
13721 return memory_address_length (XEXP (recog_data.operand[i], 0));
13722 break;
13723 }
13724 return 0;
13725 }
13726 \f
13727 /* Return the maximum number of instructions a cpu can issue. */
13728
13729 static int
13730 ix86_issue_rate (void)
13731 {
13732 switch (ix86_tune)
13733 {
13734 case PROCESSOR_PENTIUM:
13735 case PROCESSOR_K6:
13736 return 2;
13737
13738 case PROCESSOR_PENTIUMPRO:
13739 case PROCESSOR_PENTIUM4:
13740 case PROCESSOR_ATHLON:
13741 case PROCESSOR_K8:
13742 case PROCESSOR_NOCONA:
13743 case PROCESSOR_GENERIC32:
13744 case PROCESSOR_GENERIC64:
13745 return 3;
13746
13747 default:
13748 return 1;
13749 }
13750 }
13751
13752 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
13753 by DEP_INSN and nothing set by DEP_INSN. */
13754
13755 static int
13756 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13757 {
13758 rtx set, set2;
13759
13760 /* Simplify the test for uninteresting insns. */
13761 if (insn_type != TYPE_SETCC
13762 && insn_type != TYPE_ICMOV
13763 && insn_type != TYPE_FCMOV
13764 && insn_type != TYPE_IBR)
13765 return 0;
13766
13767 if ((set = single_set (dep_insn)) != 0)
13768 {
13769 set = SET_DEST (set);
13770 set2 = NULL_RTX;
13771 }
13772 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
13773 && XVECLEN (PATTERN (dep_insn), 0) == 2
13774 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
13775 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
13776 {
13777 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13778 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13779 }
13780 else
13781 return 0;
13782
13783 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
13784 return 0;
13785
13786 /* This test is true if the dependent insn reads the flags but
13787 not any other potentially set register. */
13788 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
13789 return 0;
13790
13791 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
13792 return 0;
13793
13794 return 1;
13795 }
13796
13797 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
13798 address with operands set by DEP_INSN. */
13799
13800 static int
13801 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13802 {
13803 rtx addr;
13804
13805 if (insn_type == TYPE_LEA
13806 && TARGET_PENTIUM)
13807 {
13808 addr = PATTERN (insn);
13809
13810 if (GET_CODE (addr) == PARALLEL)
13811 addr = XVECEXP (addr, 0, 0);
13812
13813 gcc_assert (GET_CODE (addr) == SET);
13814
13815 addr = SET_SRC (addr);
13816 }
13817 else
13818 {
13819 int i;
13820 extract_insn_cached (insn);
13821 for (i = recog_data.n_operands - 1; i >= 0; --i)
13822 if (GET_CODE (recog_data.operand[i]) == MEM)
13823 {
13824 addr = XEXP (recog_data.operand[i], 0);
13825 goto found;
13826 }
13827 return 0;
13828 found:;
13829 }
13830
13831 return modified_in_p (addr, dep_insn);
13832 }
13833
13834 static int
13835 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
13836 {
13837 enum attr_type insn_type, dep_insn_type;
13838 enum attr_memory memory;
13839 rtx set, set2;
13840 int dep_insn_code_number;
13841
13842 /* Anti and output dependencies have zero cost on all CPUs. */
13843 if (REG_NOTE_KIND (link) != 0)
13844 return 0;
13845
13846 dep_insn_code_number = recog_memoized (dep_insn);
13847
13848 /* If we can't recognize the insns, we can't really do anything. */
13849 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
13850 return cost;
13851
13852 insn_type = get_attr_type (insn);
13853 dep_insn_type = get_attr_type (dep_insn);
13854
13855 switch (ix86_tune)
13856 {
13857 case PROCESSOR_PENTIUM:
13858 /* Address Generation Interlock adds a cycle of latency. */
13859 if (ix86_agi_dependent (insn, dep_insn, insn_type))
13860 cost += 1;
13861
13862 /* ??? Compares pair with jump/setcc. */
13863 if (ix86_flags_dependent (insn, dep_insn, insn_type))
13864 cost = 0;
13865
13866 /* Floating point stores require value to be ready one cycle earlier. */
13867 if (insn_type == TYPE_FMOV
13868 && get_attr_memory (insn) == MEMORY_STORE
13869 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13870 cost += 1;
13871 break;
13872
13873 case PROCESSOR_PENTIUMPRO:
13874 memory = get_attr_memory (insn);
13875
13876 /* INT->FP conversion is expensive. */
13877 if (get_attr_fp_int_src (dep_insn))
13878 cost += 5;
13879
13880 /* There is one cycle extra latency between an FP op and a store. */
13881 if (insn_type == TYPE_FMOV
13882 && (set = single_set (dep_insn)) != NULL_RTX
13883 && (set2 = single_set (insn)) != NULL_RTX
13884 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
13885 && GET_CODE (SET_DEST (set2)) == MEM)
13886 cost += 1;
13887
13888 /* Show ability of reorder buffer to hide latency of load by executing
13889 in parallel with previous instruction in case
13890 previous instruction is not needed to compute the address. */
13891 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13892 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13893 {
13894 /* Claim moves to take one cycle, as core can issue one load
13895 at time and the next load can start cycle later. */
13896 if (dep_insn_type == TYPE_IMOV
13897 || dep_insn_type == TYPE_FMOV)
13898 cost = 1;
13899 else if (cost > 1)
13900 cost--;
13901 }
13902 break;
13903
13904 case PROCESSOR_K6:
13905 memory = get_attr_memory (insn);
13906
13907 /* The esp dependency is resolved before the instruction is really
13908 finished. */
13909 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
13910 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
13911 return 1;
13912
13913 /* INT->FP conversion is expensive. */
13914 if (get_attr_fp_int_src (dep_insn))
13915 cost += 5;
13916
13917 /* Show ability of reorder buffer to hide latency of load by executing
13918 in parallel with previous instruction in case
13919 previous instruction is not needed to compute the address. */
13920 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13921 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13922 {
13923 /* Claim moves to take one cycle, as core can issue one load
13924 at time and the next load can start cycle later. */
13925 if (dep_insn_type == TYPE_IMOV
13926 || dep_insn_type == TYPE_FMOV)
13927 cost = 1;
13928 else if (cost > 2)
13929 cost -= 2;
13930 else
13931 cost = 1;
13932 }
13933 break;
13934
13935 case PROCESSOR_ATHLON:
13936 case PROCESSOR_K8:
13937 case PROCESSOR_GENERIC32:
13938 case PROCESSOR_GENERIC64:
13939 memory = get_attr_memory (insn);
13940
13941 /* Show ability of reorder buffer to hide latency of load by executing
13942 in parallel with previous instruction in case
13943 previous instruction is not needed to compute the address. */
13944 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13945 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13946 {
13947 enum attr_unit unit = get_attr_unit (insn);
13948 int loadcost = 3;
13949
13950 /* Because of the difference between the length of integer and
13951 floating unit pipeline preparation stages, the memory operands
13952 for floating point are cheaper.
13953
13954 ??? For Athlon it the difference is most probably 2. */
13955 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
13956 loadcost = 3;
13957 else
13958 loadcost = TARGET_ATHLON ? 2 : 0;
13959
13960 if (cost >= loadcost)
13961 cost -= loadcost;
13962 else
13963 cost = 0;
13964 }
13965
13966 default:
13967 break;
13968 }
13969
13970 return cost;
13971 }
13972
13973 /* How many alternative schedules to try. This should be as wide as the
13974 scheduling freedom in the DFA, but no wider. Making this value too
13975 large results extra work for the scheduler. */
13976
13977 static int
13978 ia32_multipass_dfa_lookahead (void)
13979 {
13980 if (ix86_tune == PROCESSOR_PENTIUM)
13981 return 2;
13982
13983 if (ix86_tune == PROCESSOR_PENTIUMPRO
13984 || ix86_tune == PROCESSOR_K6)
13985 return 1;
13986
13987 else
13988 return 0;
13989 }
13990
13991 \f
13992 /* Compute the alignment given to a constant that is being placed in memory.
13993 EXP is the constant and ALIGN is the alignment that the object would
13994 ordinarily have.
13995 The value of this function is used instead of that alignment to align
13996 the object. */
13997
13998 int
13999 ix86_constant_alignment (tree exp, int align)
14000 {
14001 if (TREE_CODE (exp) == REAL_CST)
14002 {
14003 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
14004 return 64;
14005 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
14006 return 128;
14007 }
14008 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
14009 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
14010 return BITS_PER_WORD;
14011
14012 return align;
14013 }
14014
14015 /* Compute the alignment for a static variable.
14016 TYPE is the data type, and ALIGN is the alignment that
14017 the object would ordinarily have. The value of this function is used
14018 instead of that alignment to align the object. */
14019
14020 int
14021 ix86_data_alignment (tree type, int align)
14022 {
14023 int max_align = optimize_size ? BITS_PER_WORD : 256;
14024
14025 if (AGGREGATE_TYPE_P (type)
14026 && TYPE_SIZE (type)
14027 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14028 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
14029 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
14030 && align < max_align)
14031 align = max_align;
14032
14033 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14034 to 16byte boundary. */
14035 if (TARGET_64BIT)
14036 {
14037 if (AGGREGATE_TYPE_P (type)
14038 && TYPE_SIZE (type)
14039 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14040 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
14041 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14042 return 128;
14043 }
14044
14045 if (TREE_CODE (type) == ARRAY_TYPE)
14046 {
14047 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14048 return 64;
14049 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14050 return 128;
14051 }
14052 else if (TREE_CODE (type) == COMPLEX_TYPE)
14053 {
14054
14055 if (TYPE_MODE (type) == DCmode && align < 64)
14056 return 64;
14057 if (TYPE_MODE (type) == XCmode && align < 128)
14058 return 128;
14059 }
14060 else if ((TREE_CODE (type) == RECORD_TYPE
14061 || TREE_CODE (type) == UNION_TYPE
14062 || TREE_CODE (type) == QUAL_UNION_TYPE)
14063 && TYPE_FIELDS (type))
14064 {
14065 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14066 return 64;
14067 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14068 return 128;
14069 }
14070 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14071 || TREE_CODE (type) == INTEGER_TYPE)
14072 {
14073 if (TYPE_MODE (type) == DFmode && align < 64)
14074 return 64;
14075 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14076 return 128;
14077 }
14078
14079 return align;
14080 }
14081
14082 /* Compute the alignment for a local variable.
14083 TYPE is the data type, and ALIGN is the alignment that
14084 the object would ordinarily have. The value of this macro is used
14085 instead of that alignment to align the object. */
14086
14087 int
14088 ix86_local_alignment (tree type, int align)
14089 {
14090 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14091 to 16byte boundary. */
14092 if (TARGET_64BIT)
14093 {
14094 if (AGGREGATE_TYPE_P (type)
14095 && TYPE_SIZE (type)
14096 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14097 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
14098 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14099 return 128;
14100 }
14101 if (TREE_CODE (type) == ARRAY_TYPE)
14102 {
14103 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14104 return 64;
14105 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14106 return 128;
14107 }
14108 else if (TREE_CODE (type) == COMPLEX_TYPE)
14109 {
14110 if (TYPE_MODE (type) == DCmode && align < 64)
14111 return 64;
14112 if (TYPE_MODE (type) == XCmode && align < 128)
14113 return 128;
14114 }
14115 else if ((TREE_CODE (type) == RECORD_TYPE
14116 || TREE_CODE (type) == UNION_TYPE
14117 || TREE_CODE (type) == QUAL_UNION_TYPE)
14118 && TYPE_FIELDS (type))
14119 {
14120 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14121 return 64;
14122 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14123 return 128;
14124 }
14125 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14126 || TREE_CODE (type) == INTEGER_TYPE)
14127 {
14128
14129 if (TYPE_MODE (type) == DFmode && align < 64)
14130 return 64;
14131 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14132 return 128;
14133 }
14134 return align;
14135 }
14136 \f
14137 /* Emit RTL insns to initialize the variable parts of a trampoline.
14138 FNADDR is an RTX for the address of the function's pure code.
14139 CXT is an RTX for the static chain value for the function. */
14140 void
14141 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
14142 {
14143 if (!TARGET_64BIT)
14144 {
14145 /* Compute offset from the end of the jmp to the target function. */
14146 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
14147 plus_constant (tramp, 10),
14148 NULL_RTX, 1, OPTAB_DIRECT);
14149 emit_move_insn (gen_rtx_MEM (QImode, tramp),
14150 gen_int_mode (0xb9, QImode));
14151 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
14152 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
14153 gen_int_mode (0xe9, QImode));
14154 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
14155 }
14156 else
14157 {
14158 int offset = 0;
14159 /* Try to load address using shorter movl instead of movabs.
14160 We may want to support movq for kernel mode, but kernel does not use
14161 trampolines at the moment. */
14162 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
14163 {
14164 fnaddr = copy_to_mode_reg (DImode, fnaddr);
14165 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14166 gen_int_mode (0xbb41, HImode));
14167 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
14168 gen_lowpart (SImode, fnaddr));
14169 offset += 6;
14170 }
14171 else
14172 {
14173 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14174 gen_int_mode (0xbb49, HImode));
14175 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14176 fnaddr);
14177 offset += 10;
14178 }
14179 /* Load static chain using movabs to r10. */
14180 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14181 gen_int_mode (0xba49, HImode));
14182 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14183 cxt);
14184 offset += 10;
14185 /* Jump to the r11 */
14186 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14187 gen_int_mode (0xff49, HImode));
14188 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
14189 gen_int_mode (0xe3, QImode));
14190 offset += 3;
14191 gcc_assert (offset <= TRAMPOLINE_SIZE);
14192 }
14193
14194 #ifdef ENABLE_EXECUTE_STACK
14195 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
14196 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
14197 #endif
14198 }
14199 \f
14200 /* Codes for all the SSE/MMX builtins. */
14201 enum ix86_builtins
14202 {
14203 IX86_BUILTIN_ADDPS,
14204 IX86_BUILTIN_ADDSS,
14205 IX86_BUILTIN_DIVPS,
14206 IX86_BUILTIN_DIVSS,
14207 IX86_BUILTIN_MULPS,
14208 IX86_BUILTIN_MULSS,
14209 IX86_BUILTIN_SUBPS,
14210 IX86_BUILTIN_SUBSS,
14211
14212 IX86_BUILTIN_CMPEQPS,
14213 IX86_BUILTIN_CMPLTPS,
14214 IX86_BUILTIN_CMPLEPS,
14215 IX86_BUILTIN_CMPGTPS,
14216 IX86_BUILTIN_CMPGEPS,
14217 IX86_BUILTIN_CMPNEQPS,
14218 IX86_BUILTIN_CMPNLTPS,
14219 IX86_BUILTIN_CMPNLEPS,
14220 IX86_BUILTIN_CMPNGTPS,
14221 IX86_BUILTIN_CMPNGEPS,
14222 IX86_BUILTIN_CMPORDPS,
14223 IX86_BUILTIN_CMPUNORDPS,
14224 IX86_BUILTIN_CMPEQSS,
14225 IX86_BUILTIN_CMPLTSS,
14226 IX86_BUILTIN_CMPLESS,
14227 IX86_BUILTIN_CMPNEQSS,
14228 IX86_BUILTIN_CMPNLTSS,
14229 IX86_BUILTIN_CMPNLESS,
14230 IX86_BUILTIN_CMPNGTSS,
14231 IX86_BUILTIN_CMPNGESS,
14232 IX86_BUILTIN_CMPORDSS,
14233 IX86_BUILTIN_CMPUNORDSS,
14234
14235 IX86_BUILTIN_COMIEQSS,
14236 IX86_BUILTIN_COMILTSS,
14237 IX86_BUILTIN_COMILESS,
14238 IX86_BUILTIN_COMIGTSS,
14239 IX86_BUILTIN_COMIGESS,
14240 IX86_BUILTIN_COMINEQSS,
14241 IX86_BUILTIN_UCOMIEQSS,
14242 IX86_BUILTIN_UCOMILTSS,
14243 IX86_BUILTIN_UCOMILESS,
14244 IX86_BUILTIN_UCOMIGTSS,
14245 IX86_BUILTIN_UCOMIGESS,
14246 IX86_BUILTIN_UCOMINEQSS,
14247
14248 IX86_BUILTIN_CVTPI2PS,
14249 IX86_BUILTIN_CVTPS2PI,
14250 IX86_BUILTIN_CVTSI2SS,
14251 IX86_BUILTIN_CVTSI642SS,
14252 IX86_BUILTIN_CVTSS2SI,
14253 IX86_BUILTIN_CVTSS2SI64,
14254 IX86_BUILTIN_CVTTPS2PI,
14255 IX86_BUILTIN_CVTTSS2SI,
14256 IX86_BUILTIN_CVTTSS2SI64,
14257
14258 IX86_BUILTIN_MAXPS,
14259 IX86_BUILTIN_MAXSS,
14260 IX86_BUILTIN_MINPS,
14261 IX86_BUILTIN_MINSS,
14262
14263 IX86_BUILTIN_LOADUPS,
14264 IX86_BUILTIN_STOREUPS,
14265 IX86_BUILTIN_MOVSS,
14266
14267 IX86_BUILTIN_MOVHLPS,
14268 IX86_BUILTIN_MOVLHPS,
14269 IX86_BUILTIN_LOADHPS,
14270 IX86_BUILTIN_LOADLPS,
14271 IX86_BUILTIN_STOREHPS,
14272 IX86_BUILTIN_STORELPS,
14273
14274 IX86_BUILTIN_MASKMOVQ,
14275 IX86_BUILTIN_MOVMSKPS,
14276 IX86_BUILTIN_PMOVMSKB,
14277
14278 IX86_BUILTIN_MOVNTPS,
14279 IX86_BUILTIN_MOVNTQ,
14280
14281 IX86_BUILTIN_LOADDQU,
14282 IX86_BUILTIN_STOREDQU,
14283
14284 IX86_BUILTIN_PACKSSWB,
14285 IX86_BUILTIN_PACKSSDW,
14286 IX86_BUILTIN_PACKUSWB,
14287
14288 IX86_BUILTIN_PADDB,
14289 IX86_BUILTIN_PADDW,
14290 IX86_BUILTIN_PADDD,
14291 IX86_BUILTIN_PADDQ,
14292 IX86_BUILTIN_PADDSB,
14293 IX86_BUILTIN_PADDSW,
14294 IX86_BUILTIN_PADDUSB,
14295 IX86_BUILTIN_PADDUSW,
14296 IX86_BUILTIN_PSUBB,
14297 IX86_BUILTIN_PSUBW,
14298 IX86_BUILTIN_PSUBD,
14299 IX86_BUILTIN_PSUBQ,
14300 IX86_BUILTIN_PSUBSB,
14301 IX86_BUILTIN_PSUBSW,
14302 IX86_BUILTIN_PSUBUSB,
14303 IX86_BUILTIN_PSUBUSW,
14304
14305 IX86_BUILTIN_PAND,
14306 IX86_BUILTIN_PANDN,
14307 IX86_BUILTIN_POR,
14308 IX86_BUILTIN_PXOR,
14309
14310 IX86_BUILTIN_PAVGB,
14311 IX86_BUILTIN_PAVGW,
14312
14313 IX86_BUILTIN_PCMPEQB,
14314 IX86_BUILTIN_PCMPEQW,
14315 IX86_BUILTIN_PCMPEQD,
14316 IX86_BUILTIN_PCMPGTB,
14317 IX86_BUILTIN_PCMPGTW,
14318 IX86_BUILTIN_PCMPGTD,
14319
14320 IX86_BUILTIN_PMADDWD,
14321
14322 IX86_BUILTIN_PMAXSW,
14323 IX86_BUILTIN_PMAXUB,
14324 IX86_BUILTIN_PMINSW,
14325 IX86_BUILTIN_PMINUB,
14326
14327 IX86_BUILTIN_PMULHUW,
14328 IX86_BUILTIN_PMULHW,
14329 IX86_BUILTIN_PMULLW,
14330
14331 IX86_BUILTIN_PSADBW,
14332 IX86_BUILTIN_PSHUFW,
14333
14334 IX86_BUILTIN_PSLLW,
14335 IX86_BUILTIN_PSLLD,
14336 IX86_BUILTIN_PSLLQ,
14337 IX86_BUILTIN_PSRAW,
14338 IX86_BUILTIN_PSRAD,
14339 IX86_BUILTIN_PSRLW,
14340 IX86_BUILTIN_PSRLD,
14341 IX86_BUILTIN_PSRLQ,
14342 IX86_BUILTIN_PSLLWI,
14343 IX86_BUILTIN_PSLLDI,
14344 IX86_BUILTIN_PSLLQI,
14345 IX86_BUILTIN_PSRAWI,
14346 IX86_BUILTIN_PSRADI,
14347 IX86_BUILTIN_PSRLWI,
14348 IX86_BUILTIN_PSRLDI,
14349 IX86_BUILTIN_PSRLQI,
14350
14351 IX86_BUILTIN_PUNPCKHBW,
14352 IX86_BUILTIN_PUNPCKHWD,
14353 IX86_BUILTIN_PUNPCKHDQ,
14354 IX86_BUILTIN_PUNPCKLBW,
14355 IX86_BUILTIN_PUNPCKLWD,
14356 IX86_BUILTIN_PUNPCKLDQ,
14357
14358 IX86_BUILTIN_SHUFPS,
14359
14360 IX86_BUILTIN_RCPPS,
14361 IX86_BUILTIN_RCPSS,
14362 IX86_BUILTIN_RSQRTPS,
14363 IX86_BUILTIN_RSQRTSS,
14364 IX86_BUILTIN_SQRTPS,
14365 IX86_BUILTIN_SQRTSS,
14366
14367 IX86_BUILTIN_UNPCKHPS,
14368 IX86_BUILTIN_UNPCKLPS,
14369
14370 IX86_BUILTIN_ANDPS,
14371 IX86_BUILTIN_ANDNPS,
14372 IX86_BUILTIN_ORPS,
14373 IX86_BUILTIN_XORPS,
14374
14375 IX86_BUILTIN_EMMS,
14376 IX86_BUILTIN_LDMXCSR,
14377 IX86_BUILTIN_STMXCSR,
14378 IX86_BUILTIN_SFENCE,
14379
14380 /* 3DNow! Original */
14381 IX86_BUILTIN_FEMMS,
14382 IX86_BUILTIN_PAVGUSB,
14383 IX86_BUILTIN_PF2ID,
14384 IX86_BUILTIN_PFACC,
14385 IX86_BUILTIN_PFADD,
14386 IX86_BUILTIN_PFCMPEQ,
14387 IX86_BUILTIN_PFCMPGE,
14388 IX86_BUILTIN_PFCMPGT,
14389 IX86_BUILTIN_PFMAX,
14390 IX86_BUILTIN_PFMIN,
14391 IX86_BUILTIN_PFMUL,
14392 IX86_BUILTIN_PFRCP,
14393 IX86_BUILTIN_PFRCPIT1,
14394 IX86_BUILTIN_PFRCPIT2,
14395 IX86_BUILTIN_PFRSQIT1,
14396 IX86_BUILTIN_PFRSQRT,
14397 IX86_BUILTIN_PFSUB,
14398 IX86_BUILTIN_PFSUBR,
14399 IX86_BUILTIN_PI2FD,
14400 IX86_BUILTIN_PMULHRW,
14401
14402 /* 3DNow! Athlon Extensions */
14403 IX86_BUILTIN_PF2IW,
14404 IX86_BUILTIN_PFNACC,
14405 IX86_BUILTIN_PFPNACC,
14406 IX86_BUILTIN_PI2FW,
14407 IX86_BUILTIN_PSWAPDSI,
14408 IX86_BUILTIN_PSWAPDSF,
14409
14410 /* SSE2 */
14411 IX86_BUILTIN_ADDPD,
14412 IX86_BUILTIN_ADDSD,
14413 IX86_BUILTIN_DIVPD,
14414 IX86_BUILTIN_DIVSD,
14415 IX86_BUILTIN_MULPD,
14416 IX86_BUILTIN_MULSD,
14417 IX86_BUILTIN_SUBPD,
14418 IX86_BUILTIN_SUBSD,
14419
14420 IX86_BUILTIN_CMPEQPD,
14421 IX86_BUILTIN_CMPLTPD,
14422 IX86_BUILTIN_CMPLEPD,
14423 IX86_BUILTIN_CMPGTPD,
14424 IX86_BUILTIN_CMPGEPD,
14425 IX86_BUILTIN_CMPNEQPD,
14426 IX86_BUILTIN_CMPNLTPD,
14427 IX86_BUILTIN_CMPNLEPD,
14428 IX86_BUILTIN_CMPNGTPD,
14429 IX86_BUILTIN_CMPNGEPD,
14430 IX86_BUILTIN_CMPORDPD,
14431 IX86_BUILTIN_CMPUNORDPD,
14432 IX86_BUILTIN_CMPNEPD,
14433 IX86_BUILTIN_CMPEQSD,
14434 IX86_BUILTIN_CMPLTSD,
14435 IX86_BUILTIN_CMPLESD,
14436 IX86_BUILTIN_CMPNEQSD,
14437 IX86_BUILTIN_CMPNLTSD,
14438 IX86_BUILTIN_CMPNLESD,
14439 IX86_BUILTIN_CMPORDSD,
14440 IX86_BUILTIN_CMPUNORDSD,
14441 IX86_BUILTIN_CMPNESD,
14442
14443 IX86_BUILTIN_COMIEQSD,
14444 IX86_BUILTIN_COMILTSD,
14445 IX86_BUILTIN_COMILESD,
14446 IX86_BUILTIN_COMIGTSD,
14447 IX86_BUILTIN_COMIGESD,
14448 IX86_BUILTIN_COMINEQSD,
14449 IX86_BUILTIN_UCOMIEQSD,
14450 IX86_BUILTIN_UCOMILTSD,
14451 IX86_BUILTIN_UCOMILESD,
14452 IX86_BUILTIN_UCOMIGTSD,
14453 IX86_BUILTIN_UCOMIGESD,
14454 IX86_BUILTIN_UCOMINEQSD,
14455
14456 IX86_BUILTIN_MAXPD,
14457 IX86_BUILTIN_MAXSD,
14458 IX86_BUILTIN_MINPD,
14459 IX86_BUILTIN_MINSD,
14460
14461 IX86_BUILTIN_ANDPD,
14462 IX86_BUILTIN_ANDNPD,
14463 IX86_BUILTIN_ORPD,
14464 IX86_BUILTIN_XORPD,
14465
14466 IX86_BUILTIN_SQRTPD,
14467 IX86_BUILTIN_SQRTSD,
14468
14469 IX86_BUILTIN_UNPCKHPD,
14470 IX86_BUILTIN_UNPCKLPD,
14471
14472 IX86_BUILTIN_SHUFPD,
14473
14474 IX86_BUILTIN_LOADUPD,
14475 IX86_BUILTIN_STOREUPD,
14476 IX86_BUILTIN_MOVSD,
14477
14478 IX86_BUILTIN_LOADHPD,
14479 IX86_BUILTIN_LOADLPD,
14480
14481 IX86_BUILTIN_CVTDQ2PD,
14482 IX86_BUILTIN_CVTDQ2PS,
14483
14484 IX86_BUILTIN_CVTPD2DQ,
14485 IX86_BUILTIN_CVTPD2PI,
14486 IX86_BUILTIN_CVTPD2PS,
14487 IX86_BUILTIN_CVTTPD2DQ,
14488 IX86_BUILTIN_CVTTPD2PI,
14489
14490 IX86_BUILTIN_CVTPI2PD,
14491 IX86_BUILTIN_CVTSI2SD,
14492 IX86_BUILTIN_CVTSI642SD,
14493
14494 IX86_BUILTIN_CVTSD2SI,
14495 IX86_BUILTIN_CVTSD2SI64,
14496 IX86_BUILTIN_CVTSD2SS,
14497 IX86_BUILTIN_CVTSS2SD,
14498 IX86_BUILTIN_CVTTSD2SI,
14499 IX86_BUILTIN_CVTTSD2SI64,
14500
14501 IX86_BUILTIN_CVTPS2DQ,
14502 IX86_BUILTIN_CVTPS2PD,
14503 IX86_BUILTIN_CVTTPS2DQ,
14504
14505 IX86_BUILTIN_MOVNTI,
14506 IX86_BUILTIN_MOVNTPD,
14507 IX86_BUILTIN_MOVNTDQ,
14508
14509 /* SSE2 MMX */
14510 IX86_BUILTIN_MASKMOVDQU,
14511 IX86_BUILTIN_MOVMSKPD,
14512 IX86_BUILTIN_PMOVMSKB128,
14513
14514 IX86_BUILTIN_PACKSSWB128,
14515 IX86_BUILTIN_PACKSSDW128,
14516 IX86_BUILTIN_PACKUSWB128,
14517
14518 IX86_BUILTIN_PADDB128,
14519 IX86_BUILTIN_PADDW128,
14520 IX86_BUILTIN_PADDD128,
14521 IX86_BUILTIN_PADDQ128,
14522 IX86_BUILTIN_PADDSB128,
14523 IX86_BUILTIN_PADDSW128,
14524 IX86_BUILTIN_PADDUSB128,
14525 IX86_BUILTIN_PADDUSW128,
14526 IX86_BUILTIN_PSUBB128,
14527 IX86_BUILTIN_PSUBW128,
14528 IX86_BUILTIN_PSUBD128,
14529 IX86_BUILTIN_PSUBQ128,
14530 IX86_BUILTIN_PSUBSB128,
14531 IX86_BUILTIN_PSUBSW128,
14532 IX86_BUILTIN_PSUBUSB128,
14533 IX86_BUILTIN_PSUBUSW128,
14534
14535 IX86_BUILTIN_PAND128,
14536 IX86_BUILTIN_PANDN128,
14537 IX86_BUILTIN_POR128,
14538 IX86_BUILTIN_PXOR128,
14539
14540 IX86_BUILTIN_PAVGB128,
14541 IX86_BUILTIN_PAVGW128,
14542
14543 IX86_BUILTIN_PCMPEQB128,
14544 IX86_BUILTIN_PCMPEQW128,
14545 IX86_BUILTIN_PCMPEQD128,
14546 IX86_BUILTIN_PCMPGTB128,
14547 IX86_BUILTIN_PCMPGTW128,
14548 IX86_BUILTIN_PCMPGTD128,
14549
14550 IX86_BUILTIN_PMADDWD128,
14551
14552 IX86_BUILTIN_PMAXSW128,
14553 IX86_BUILTIN_PMAXUB128,
14554 IX86_BUILTIN_PMINSW128,
14555 IX86_BUILTIN_PMINUB128,
14556
14557 IX86_BUILTIN_PMULUDQ,
14558 IX86_BUILTIN_PMULUDQ128,
14559 IX86_BUILTIN_PMULHUW128,
14560 IX86_BUILTIN_PMULHW128,
14561 IX86_BUILTIN_PMULLW128,
14562
14563 IX86_BUILTIN_PSADBW128,
14564 IX86_BUILTIN_PSHUFHW,
14565 IX86_BUILTIN_PSHUFLW,
14566 IX86_BUILTIN_PSHUFD,
14567
14568 IX86_BUILTIN_PSLLW128,
14569 IX86_BUILTIN_PSLLD128,
14570 IX86_BUILTIN_PSLLQ128,
14571 IX86_BUILTIN_PSRAW128,
14572 IX86_BUILTIN_PSRAD128,
14573 IX86_BUILTIN_PSRLW128,
14574 IX86_BUILTIN_PSRLD128,
14575 IX86_BUILTIN_PSRLQ128,
14576 IX86_BUILTIN_PSLLDQI128,
14577 IX86_BUILTIN_PSLLWI128,
14578 IX86_BUILTIN_PSLLDI128,
14579 IX86_BUILTIN_PSLLQI128,
14580 IX86_BUILTIN_PSRAWI128,
14581 IX86_BUILTIN_PSRADI128,
14582 IX86_BUILTIN_PSRLDQI128,
14583 IX86_BUILTIN_PSRLWI128,
14584 IX86_BUILTIN_PSRLDI128,
14585 IX86_BUILTIN_PSRLQI128,
14586
14587 IX86_BUILTIN_PUNPCKHBW128,
14588 IX86_BUILTIN_PUNPCKHWD128,
14589 IX86_BUILTIN_PUNPCKHDQ128,
14590 IX86_BUILTIN_PUNPCKHQDQ128,
14591 IX86_BUILTIN_PUNPCKLBW128,
14592 IX86_BUILTIN_PUNPCKLWD128,
14593 IX86_BUILTIN_PUNPCKLDQ128,
14594 IX86_BUILTIN_PUNPCKLQDQ128,
14595
14596 IX86_BUILTIN_CLFLUSH,
14597 IX86_BUILTIN_MFENCE,
14598 IX86_BUILTIN_LFENCE,
14599
14600 /* Prescott New Instructions. */
14601 IX86_BUILTIN_ADDSUBPS,
14602 IX86_BUILTIN_HADDPS,
14603 IX86_BUILTIN_HSUBPS,
14604 IX86_BUILTIN_MOVSHDUP,
14605 IX86_BUILTIN_MOVSLDUP,
14606 IX86_BUILTIN_ADDSUBPD,
14607 IX86_BUILTIN_HADDPD,
14608 IX86_BUILTIN_HSUBPD,
14609 IX86_BUILTIN_LDDQU,
14610
14611 IX86_BUILTIN_MONITOR,
14612 IX86_BUILTIN_MWAIT,
14613
14614 /* SSSE3. */
14615 IX86_BUILTIN_PHADDW,
14616 IX86_BUILTIN_PHADDD,
14617 IX86_BUILTIN_PHADDSW,
14618 IX86_BUILTIN_PHSUBW,
14619 IX86_BUILTIN_PHSUBD,
14620 IX86_BUILTIN_PHSUBSW,
14621 IX86_BUILTIN_PMADDUBSW,
14622 IX86_BUILTIN_PMULHRSW,
14623 IX86_BUILTIN_PSHUFB,
14624 IX86_BUILTIN_PSIGNB,
14625 IX86_BUILTIN_PSIGNW,
14626 IX86_BUILTIN_PSIGND,
14627 IX86_BUILTIN_PALIGNR,
14628 IX86_BUILTIN_PABSB,
14629 IX86_BUILTIN_PABSW,
14630 IX86_BUILTIN_PABSD,
14631
14632 IX86_BUILTIN_PHADDW128,
14633 IX86_BUILTIN_PHADDD128,
14634 IX86_BUILTIN_PHADDSW128,
14635 IX86_BUILTIN_PHSUBW128,
14636 IX86_BUILTIN_PHSUBD128,
14637 IX86_BUILTIN_PHSUBSW128,
14638 IX86_BUILTIN_PMADDUBSW128,
14639 IX86_BUILTIN_PMULHRSW128,
14640 IX86_BUILTIN_PSHUFB128,
14641 IX86_BUILTIN_PSIGNB128,
14642 IX86_BUILTIN_PSIGNW128,
14643 IX86_BUILTIN_PSIGND128,
14644 IX86_BUILTIN_PALIGNR128,
14645 IX86_BUILTIN_PABSB128,
14646 IX86_BUILTIN_PABSW128,
14647 IX86_BUILTIN_PABSD128,
14648
14649 IX86_BUILTIN_VEC_INIT_V2SI,
14650 IX86_BUILTIN_VEC_INIT_V4HI,
14651 IX86_BUILTIN_VEC_INIT_V8QI,
14652 IX86_BUILTIN_VEC_EXT_V2DF,
14653 IX86_BUILTIN_VEC_EXT_V2DI,
14654 IX86_BUILTIN_VEC_EXT_V4SF,
14655 IX86_BUILTIN_VEC_EXT_V4SI,
14656 IX86_BUILTIN_VEC_EXT_V8HI,
14657 IX86_BUILTIN_VEC_EXT_V2SI,
14658 IX86_BUILTIN_VEC_EXT_V4HI,
14659 IX86_BUILTIN_VEC_SET_V8HI,
14660 IX86_BUILTIN_VEC_SET_V4HI,
14661
14662 IX86_BUILTIN_MAX
14663 };
14664
14665 #define def_builtin(MASK, NAME, TYPE, CODE) \
14666 do { \
14667 if ((MASK) & target_flags \
14668 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
14669 add_builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
14670 NULL, NULL_TREE); \
14671 } while (0)
14672
14673 /* Bits for builtin_description.flag. */
14674
14675 /* Set when we don't support the comparison natively, and should
14676 swap_comparison in order to support it. */
14677 #define BUILTIN_DESC_SWAP_OPERANDS 1
14678
14679 struct builtin_description
14680 {
14681 const unsigned int mask;
14682 const enum insn_code icode;
14683 const char *const name;
14684 const enum ix86_builtins code;
14685 const enum rtx_code comparison;
14686 const unsigned int flag;
14687 };
14688
14689 static const struct builtin_description bdesc_comi[] =
14690 {
14691 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
14692 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
14693 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
14694 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
14695 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
14696 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
14697 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
14698 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
14699 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
14700 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
14701 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
14702 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
14703 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
14704 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
14705 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
14706 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
14707 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
14708 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
14709 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
14710 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
14711 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
14712 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
14713 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
14714 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
14715 };
14716
14717 static const struct builtin_description bdesc_2arg[] =
14718 {
14719 /* SSE */
14720 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
14721 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
14722 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
14723 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
14724 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
14725 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
14726 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
14727 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
14728
14729 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
14730 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
14731 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
14732 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
14733 BUILTIN_DESC_SWAP_OPERANDS },
14734 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
14735 BUILTIN_DESC_SWAP_OPERANDS },
14736 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
14737 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
14738 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
14739 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
14740 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
14741 BUILTIN_DESC_SWAP_OPERANDS },
14742 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
14743 BUILTIN_DESC_SWAP_OPERANDS },
14744 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
14745 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
14746 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
14747 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
14748 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
14749 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
14750 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
14751 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
14752 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
14753 BUILTIN_DESC_SWAP_OPERANDS },
14754 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
14755 BUILTIN_DESC_SWAP_OPERANDS },
14756 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
14757
14758 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
14759 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
14760 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
14761 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
14762
14763 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
14764 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
14765 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
14766 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
14767
14768 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
14769 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
14770 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
14771 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
14772 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
14773
14774 /* MMX */
14775 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
14776 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
14777 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
14778 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
14779 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
14780 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
14781 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
14782 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
14783
14784 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
14785 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
14786 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
14787 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
14788 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
14789 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
14790 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
14791 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
14792
14793 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
14794 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
14795 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
14796
14797 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
14798 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
14799 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
14800 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
14801
14802 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
14803 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
14804
14805 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
14806 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
14807 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
14808 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
14809 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
14810 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
14811
14812 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
14813 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
14814 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
14815 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
14816
14817 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
14818 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
14819 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
14820 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
14821 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
14822 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
14823
14824 /* Special. */
14825 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
14826 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
14827 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
14828
14829 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
14830 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
14831 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
14832
14833 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
14834 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
14835 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
14836 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
14837 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
14838 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
14839
14840 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
14841 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
14842 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
14843 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
14844 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
14845 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
14846
14847 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
14848 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
14849 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
14850 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
14851
14852 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
14853 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
14854
14855 /* SSE2 */
14856 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
14857 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
14858 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
14859 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
14860 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
14861 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
14862 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
14863 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
14864
14865 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
14866 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
14867 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
14868 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
14869 BUILTIN_DESC_SWAP_OPERANDS },
14870 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
14871 BUILTIN_DESC_SWAP_OPERANDS },
14872 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
14873 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
14874 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
14875 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
14876 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
14877 BUILTIN_DESC_SWAP_OPERANDS },
14878 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
14879 BUILTIN_DESC_SWAP_OPERANDS },
14880 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
14881 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
14882 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
14883 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
14884 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
14885 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
14886 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
14887 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
14888 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
14889
14890 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
14891 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
14892 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
14893 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
14894
14895 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
14896 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
14897 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
14898 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
14899
14900 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
14901 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
14902 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
14903
14904 /* SSE2 MMX */
14905 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
14906 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
14907 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
14908 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
14909 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
14910 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
14911 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
14912 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
14913
14914 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
14915 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
14916 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
14917 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
14918 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
14919 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
14920 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
14921 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
14922
14923 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
14924 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
14925
14926 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
14927 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
14928 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
14929 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
14930
14931 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
14932 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
14933
14934 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
14935 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
14936 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
14937 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
14938 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
14939 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
14940
14941 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
14942 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
14943 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
14944 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
14945
14946 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
14947 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
14948 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
14949 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
14950 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
14951 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
14952 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
14953 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
14954
14955 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
14956 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
14957 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
14958
14959 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
14960 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
14961
14962 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
14963 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
14964
14965 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
14966 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
14967 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
14968
14969 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
14970 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
14971 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
14972
14973 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
14974 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
14975
14976 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
14977
14978 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
14979 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
14980 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
14981 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
14982
14983 /* SSE3 MMX */
14984 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
14985 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
14986 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
14987 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
14988 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
14989 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
14990
14991 /* SSSE3 */
14992 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
14993 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
14994 { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
14995 { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
14996 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
14997 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
14998 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
14999 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
15000 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
15001 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
15002 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
15003 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
15004 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
15005 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
15006 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
15007 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
15008 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
15009 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
15010 { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
15011 { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
15012 { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
15013 { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
15014 { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
15015 { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
15016 };
15017
15018 static const struct builtin_description bdesc_1arg[] =
15019 {
15020 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
15021 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
15022
15023 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
15024 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
15025 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
15026
15027 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
15028 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
15029 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
15030 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
15031 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
15032 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
15033
15034 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
15035 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
15036
15037 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
15038
15039 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
15040 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
15041
15042 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
15043 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
15044 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
15045 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
15046 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
15047
15048 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
15049
15050 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
15051 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
15052 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
15053 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
15054
15055 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
15056 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
15057 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
15058
15059 /* SSE3 */
15060 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
15061 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
15062
15063 /* SSSE3 */
15064 { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
15065 { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
15066 { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
15067 { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
15068 { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
15069 { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
15070 };
15071
15072 static void
15073 ix86_init_builtins (void)
15074 {
15075 if (TARGET_MMX)
15076 ix86_init_mmx_sse_builtins ();
15077 }
15078
15079 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
15080 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
15081 builtins. */
15082 static void
15083 ix86_init_mmx_sse_builtins (void)
15084 {
15085 const struct builtin_description * d;
15086 size_t i;
15087
15088 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
15089 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
15090 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
15091 tree V2DI_type_node
15092 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
15093 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
15094 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
15095 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
15096 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
15097 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
15098 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
15099
15100 tree pchar_type_node = build_pointer_type (char_type_node);
15101 tree pcchar_type_node = build_pointer_type (
15102 build_type_variant (char_type_node, 1, 0));
15103 tree pfloat_type_node = build_pointer_type (float_type_node);
15104 tree pcfloat_type_node = build_pointer_type (
15105 build_type_variant (float_type_node, 1, 0));
15106 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
15107 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
15108 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
15109
15110 /* Comparisons. */
15111 tree int_ftype_v4sf_v4sf
15112 = build_function_type_list (integer_type_node,
15113 V4SF_type_node, V4SF_type_node, NULL_TREE);
15114 tree v4si_ftype_v4sf_v4sf
15115 = build_function_type_list (V4SI_type_node,
15116 V4SF_type_node, V4SF_type_node, NULL_TREE);
15117 /* MMX/SSE/integer conversions. */
15118 tree int_ftype_v4sf
15119 = build_function_type_list (integer_type_node,
15120 V4SF_type_node, NULL_TREE);
15121 tree int64_ftype_v4sf
15122 = build_function_type_list (long_long_integer_type_node,
15123 V4SF_type_node, NULL_TREE);
15124 tree int_ftype_v8qi
15125 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
15126 tree v4sf_ftype_v4sf_int
15127 = build_function_type_list (V4SF_type_node,
15128 V4SF_type_node, integer_type_node, NULL_TREE);
15129 tree v4sf_ftype_v4sf_int64
15130 = build_function_type_list (V4SF_type_node,
15131 V4SF_type_node, long_long_integer_type_node,
15132 NULL_TREE);
15133 tree v4sf_ftype_v4sf_v2si
15134 = build_function_type_list (V4SF_type_node,
15135 V4SF_type_node, V2SI_type_node, NULL_TREE);
15136
15137 /* Miscellaneous. */
15138 tree v8qi_ftype_v4hi_v4hi
15139 = build_function_type_list (V8QI_type_node,
15140 V4HI_type_node, V4HI_type_node, NULL_TREE);
15141 tree v4hi_ftype_v2si_v2si
15142 = build_function_type_list (V4HI_type_node,
15143 V2SI_type_node, V2SI_type_node, NULL_TREE);
15144 tree v4sf_ftype_v4sf_v4sf_int
15145 = build_function_type_list (V4SF_type_node,
15146 V4SF_type_node, V4SF_type_node,
15147 integer_type_node, NULL_TREE);
15148 tree v2si_ftype_v4hi_v4hi
15149 = build_function_type_list (V2SI_type_node,
15150 V4HI_type_node, V4HI_type_node, NULL_TREE);
15151 tree v4hi_ftype_v4hi_int
15152 = build_function_type_list (V4HI_type_node,
15153 V4HI_type_node, integer_type_node, NULL_TREE);
15154 tree v4hi_ftype_v4hi_di
15155 = build_function_type_list (V4HI_type_node,
15156 V4HI_type_node, long_long_unsigned_type_node,
15157 NULL_TREE);
15158 tree v2si_ftype_v2si_di
15159 = build_function_type_list (V2SI_type_node,
15160 V2SI_type_node, long_long_unsigned_type_node,
15161 NULL_TREE);
15162 tree void_ftype_void
15163 = build_function_type (void_type_node, void_list_node);
15164 tree void_ftype_unsigned
15165 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
15166 tree void_ftype_unsigned_unsigned
15167 = build_function_type_list (void_type_node, unsigned_type_node,
15168 unsigned_type_node, NULL_TREE);
15169 tree void_ftype_pcvoid_unsigned_unsigned
15170 = build_function_type_list (void_type_node, const_ptr_type_node,
15171 unsigned_type_node, unsigned_type_node,
15172 NULL_TREE);
15173 tree unsigned_ftype_void
15174 = build_function_type (unsigned_type_node, void_list_node);
15175 tree v2si_ftype_v4sf
15176 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
15177 /* Loads/stores. */
15178 tree void_ftype_v8qi_v8qi_pchar
15179 = build_function_type_list (void_type_node,
15180 V8QI_type_node, V8QI_type_node,
15181 pchar_type_node, NULL_TREE);
15182 tree v4sf_ftype_pcfloat
15183 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
15184 /* @@@ the type is bogus */
15185 tree v4sf_ftype_v4sf_pv2si
15186 = build_function_type_list (V4SF_type_node,
15187 V4SF_type_node, pv2si_type_node, NULL_TREE);
15188 tree void_ftype_pv2si_v4sf
15189 = build_function_type_list (void_type_node,
15190 pv2si_type_node, V4SF_type_node, NULL_TREE);
15191 tree void_ftype_pfloat_v4sf
15192 = build_function_type_list (void_type_node,
15193 pfloat_type_node, V4SF_type_node, NULL_TREE);
15194 tree void_ftype_pdi_di
15195 = build_function_type_list (void_type_node,
15196 pdi_type_node, long_long_unsigned_type_node,
15197 NULL_TREE);
15198 tree void_ftype_pv2di_v2di
15199 = build_function_type_list (void_type_node,
15200 pv2di_type_node, V2DI_type_node, NULL_TREE);
15201 /* Normal vector unops. */
15202 tree v4sf_ftype_v4sf
15203 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
15204 tree v16qi_ftype_v16qi
15205 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
15206 tree v8hi_ftype_v8hi
15207 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
15208 tree v4si_ftype_v4si
15209 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
15210 tree v8qi_ftype_v8qi
15211 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
15212 tree v4hi_ftype_v4hi
15213 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
15214
15215 /* Normal vector binops. */
15216 tree v4sf_ftype_v4sf_v4sf
15217 = build_function_type_list (V4SF_type_node,
15218 V4SF_type_node, V4SF_type_node, NULL_TREE);
15219 tree v8qi_ftype_v8qi_v8qi
15220 = build_function_type_list (V8QI_type_node,
15221 V8QI_type_node, V8QI_type_node, NULL_TREE);
15222 tree v4hi_ftype_v4hi_v4hi
15223 = build_function_type_list (V4HI_type_node,
15224 V4HI_type_node, V4HI_type_node, NULL_TREE);
15225 tree v2si_ftype_v2si_v2si
15226 = build_function_type_list (V2SI_type_node,
15227 V2SI_type_node, V2SI_type_node, NULL_TREE);
15228 tree di_ftype_di_di
15229 = build_function_type_list (long_long_unsigned_type_node,
15230 long_long_unsigned_type_node,
15231 long_long_unsigned_type_node, NULL_TREE);
15232
15233 tree di_ftype_di_di_int
15234 = build_function_type_list (long_long_unsigned_type_node,
15235 long_long_unsigned_type_node,
15236 long_long_unsigned_type_node,
15237 integer_type_node, NULL_TREE);
15238
15239 tree v2si_ftype_v2sf
15240 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
15241 tree v2sf_ftype_v2si
15242 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
15243 tree v2si_ftype_v2si
15244 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
15245 tree v2sf_ftype_v2sf
15246 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
15247 tree v2sf_ftype_v2sf_v2sf
15248 = build_function_type_list (V2SF_type_node,
15249 V2SF_type_node, V2SF_type_node, NULL_TREE);
15250 tree v2si_ftype_v2sf_v2sf
15251 = build_function_type_list (V2SI_type_node,
15252 V2SF_type_node, V2SF_type_node, NULL_TREE);
15253 tree pint_type_node = build_pointer_type (integer_type_node);
15254 tree pdouble_type_node = build_pointer_type (double_type_node);
15255 tree pcdouble_type_node = build_pointer_type (
15256 build_type_variant (double_type_node, 1, 0));
15257 tree int_ftype_v2df_v2df
15258 = build_function_type_list (integer_type_node,
15259 V2DF_type_node, V2DF_type_node, NULL_TREE);
15260
15261 tree void_ftype_pcvoid
15262 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
15263 tree v4sf_ftype_v4si
15264 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
15265 tree v4si_ftype_v4sf
15266 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
15267 tree v2df_ftype_v4si
15268 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
15269 tree v4si_ftype_v2df
15270 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
15271 tree v2si_ftype_v2df
15272 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
15273 tree v4sf_ftype_v2df
15274 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
15275 tree v2df_ftype_v2si
15276 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
15277 tree v2df_ftype_v4sf
15278 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
15279 tree int_ftype_v2df
15280 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
15281 tree int64_ftype_v2df
15282 = build_function_type_list (long_long_integer_type_node,
15283 V2DF_type_node, NULL_TREE);
15284 tree v2df_ftype_v2df_int
15285 = build_function_type_list (V2DF_type_node,
15286 V2DF_type_node, integer_type_node, NULL_TREE);
15287 tree v2df_ftype_v2df_int64
15288 = build_function_type_list (V2DF_type_node,
15289 V2DF_type_node, long_long_integer_type_node,
15290 NULL_TREE);
15291 tree v4sf_ftype_v4sf_v2df
15292 = build_function_type_list (V4SF_type_node,
15293 V4SF_type_node, V2DF_type_node, NULL_TREE);
15294 tree v2df_ftype_v2df_v4sf
15295 = build_function_type_list (V2DF_type_node,
15296 V2DF_type_node, V4SF_type_node, NULL_TREE);
15297 tree v2df_ftype_v2df_v2df_int
15298 = build_function_type_list (V2DF_type_node,
15299 V2DF_type_node, V2DF_type_node,
15300 integer_type_node,
15301 NULL_TREE);
15302 tree v2df_ftype_v2df_pcdouble
15303 = build_function_type_list (V2DF_type_node,
15304 V2DF_type_node, pcdouble_type_node, NULL_TREE);
15305 tree void_ftype_pdouble_v2df
15306 = build_function_type_list (void_type_node,
15307 pdouble_type_node, V2DF_type_node, NULL_TREE);
15308 tree void_ftype_pint_int
15309 = build_function_type_list (void_type_node,
15310 pint_type_node, integer_type_node, NULL_TREE);
15311 tree void_ftype_v16qi_v16qi_pchar
15312 = build_function_type_list (void_type_node,
15313 V16QI_type_node, V16QI_type_node,
15314 pchar_type_node, NULL_TREE);
15315 tree v2df_ftype_pcdouble
15316 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
15317 tree v2df_ftype_v2df_v2df
15318 = build_function_type_list (V2DF_type_node,
15319 V2DF_type_node, V2DF_type_node, NULL_TREE);
15320 tree v16qi_ftype_v16qi_v16qi
15321 = build_function_type_list (V16QI_type_node,
15322 V16QI_type_node, V16QI_type_node, NULL_TREE);
15323 tree v8hi_ftype_v8hi_v8hi
15324 = build_function_type_list (V8HI_type_node,
15325 V8HI_type_node, V8HI_type_node, NULL_TREE);
15326 tree v4si_ftype_v4si_v4si
15327 = build_function_type_list (V4SI_type_node,
15328 V4SI_type_node, V4SI_type_node, NULL_TREE);
15329 tree v2di_ftype_v2di_v2di
15330 = build_function_type_list (V2DI_type_node,
15331 V2DI_type_node, V2DI_type_node, NULL_TREE);
15332 tree v2di_ftype_v2df_v2df
15333 = build_function_type_list (V2DI_type_node,
15334 V2DF_type_node, V2DF_type_node, NULL_TREE);
15335 tree v2df_ftype_v2df
15336 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
15337 tree v2di_ftype_v2di_int
15338 = build_function_type_list (V2DI_type_node,
15339 V2DI_type_node, integer_type_node, NULL_TREE);
15340 tree v2di_ftype_v2di_v2di_int
15341 = build_function_type_list (V2DI_type_node, V2DI_type_node,
15342 V2DI_type_node, integer_type_node, NULL_TREE);
15343 tree v4si_ftype_v4si_int
15344 = build_function_type_list (V4SI_type_node,
15345 V4SI_type_node, integer_type_node, NULL_TREE);
15346 tree v8hi_ftype_v8hi_int
15347 = build_function_type_list (V8HI_type_node,
15348 V8HI_type_node, integer_type_node, NULL_TREE);
15349 tree v8hi_ftype_v8hi_v2di
15350 = build_function_type_list (V8HI_type_node,
15351 V8HI_type_node, V2DI_type_node, NULL_TREE);
15352 tree v4si_ftype_v4si_v2di
15353 = build_function_type_list (V4SI_type_node,
15354 V4SI_type_node, V2DI_type_node, NULL_TREE);
15355 tree v4si_ftype_v8hi_v8hi
15356 = build_function_type_list (V4SI_type_node,
15357 V8HI_type_node, V8HI_type_node, NULL_TREE);
15358 tree di_ftype_v8qi_v8qi
15359 = build_function_type_list (long_long_unsigned_type_node,
15360 V8QI_type_node, V8QI_type_node, NULL_TREE);
15361 tree di_ftype_v2si_v2si
15362 = build_function_type_list (long_long_unsigned_type_node,
15363 V2SI_type_node, V2SI_type_node, NULL_TREE);
15364 tree v2di_ftype_v16qi_v16qi
15365 = build_function_type_list (V2DI_type_node,
15366 V16QI_type_node, V16QI_type_node, NULL_TREE);
15367 tree v2di_ftype_v4si_v4si
15368 = build_function_type_list (V2DI_type_node,
15369 V4SI_type_node, V4SI_type_node, NULL_TREE);
15370 tree int_ftype_v16qi
15371 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
15372 tree v16qi_ftype_pcchar
15373 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
15374 tree void_ftype_pchar_v16qi
15375 = build_function_type_list (void_type_node,
15376 pchar_type_node, V16QI_type_node, NULL_TREE);
15377
15378 tree float80_type;
15379 tree float128_type;
15380 tree ftype;
15381
15382 /* The __float80 type. */
15383 if (TYPE_MODE (long_double_type_node) == XFmode)
15384 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
15385 "__float80");
15386 else
15387 {
15388 /* The __float80 type. */
15389 float80_type = make_node (REAL_TYPE);
15390 TYPE_PRECISION (float80_type) = 80;
15391 layout_type (float80_type);
15392 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
15393 }
15394
15395 if (TARGET_64BIT)
15396 {
15397 float128_type = make_node (REAL_TYPE);
15398 TYPE_PRECISION (float128_type) = 128;
15399 layout_type (float128_type);
15400 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
15401 }
15402
15403 /* Add all builtins that are more or less simple operations on two
15404 operands. */
15405 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15406 {
15407 /* Use one of the operands; the target can have a different mode for
15408 mask-generating compares. */
15409 enum machine_mode mode;
15410 tree type;
15411
15412 if (d->name == 0)
15413 continue;
15414 mode = insn_data[d->icode].operand[1].mode;
15415
15416 switch (mode)
15417 {
15418 case V16QImode:
15419 type = v16qi_ftype_v16qi_v16qi;
15420 break;
15421 case V8HImode:
15422 type = v8hi_ftype_v8hi_v8hi;
15423 break;
15424 case V4SImode:
15425 type = v4si_ftype_v4si_v4si;
15426 break;
15427 case V2DImode:
15428 type = v2di_ftype_v2di_v2di;
15429 break;
15430 case V2DFmode:
15431 type = v2df_ftype_v2df_v2df;
15432 break;
15433 case V4SFmode:
15434 type = v4sf_ftype_v4sf_v4sf;
15435 break;
15436 case V8QImode:
15437 type = v8qi_ftype_v8qi_v8qi;
15438 break;
15439 case V4HImode:
15440 type = v4hi_ftype_v4hi_v4hi;
15441 break;
15442 case V2SImode:
15443 type = v2si_ftype_v2si_v2si;
15444 break;
15445 case DImode:
15446 type = di_ftype_di_di;
15447 break;
15448
15449 default:
15450 gcc_unreachable ();
15451 }
15452
15453 /* Override for comparisons. */
15454 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
15455 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
15456 type = v4si_ftype_v4sf_v4sf;
15457
15458 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
15459 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
15460 type = v2di_ftype_v2df_v2df;
15461
15462 def_builtin (d->mask, d->name, type, d->code);
15463 }
15464
15465 /* Add all builtins that are more or less simple operations on 1 operand. */
15466 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15467 {
15468 enum machine_mode mode;
15469 tree type;
15470
15471 if (d->name == 0)
15472 continue;
15473 mode = insn_data[d->icode].operand[1].mode;
15474
15475 switch (mode)
15476 {
15477 case V16QImode:
15478 type = v16qi_ftype_v16qi;
15479 break;
15480 case V8HImode:
15481 type = v8hi_ftype_v8hi;
15482 break;
15483 case V4SImode:
15484 type = v4si_ftype_v4si;
15485 break;
15486 case V2DFmode:
15487 type = v2df_ftype_v2df;
15488 break;
15489 case V4SFmode:
15490 type = v4sf_ftype_v4sf;
15491 break;
15492 case V8QImode:
15493 type = v8qi_ftype_v8qi;
15494 break;
15495 case V4HImode:
15496 type = v4hi_ftype_v4hi;
15497 break;
15498 case V2SImode:
15499 type = v2si_ftype_v2si;
15500 break;
15501
15502 default:
15503 abort ();
15504 }
15505
15506 def_builtin (d->mask, d->name, type, d->code);
15507 }
15508
15509 /* Add the remaining MMX insns with somewhat more complicated types. */
15510 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
15511 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
15512 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
15513 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
15514
15515 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
15516 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
15517 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
15518
15519 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
15520 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
15521
15522 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
15523 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
15524
15525 /* comi/ucomi insns. */
15526 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
15527 if (d->mask == MASK_SSE2)
15528 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
15529 else
15530 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
15531
15532 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
15533 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
15534 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
15535
15536 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
15537 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
15538 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
15539 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
15540 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
15541 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
15542 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
15543 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
15544 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
15545 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
15546 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
15547
15548 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
15549
15550 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
15551 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
15552
15553 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
15554 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
15555 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
15556 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
15557
15558 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
15559 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
15560 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
15561 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
15562
15563 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
15564
15565 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
15566
15567 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
15568 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
15569 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
15570 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
15571 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
15572 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
15573
15574 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
15575
15576 /* Original 3DNow! */
15577 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
15578 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
15579 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
15580 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
15581 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
15582 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
15583 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
15584 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
15585 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
15586 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
15587 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
15588 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
15589 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
15590 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
15591 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
15592 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
15593 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
15594 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
15595 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
15596 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
15597
15598 /* 3DNow! extension as used in the Athlon CPU. */
15599 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
15600 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
15601 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
15602 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
15603 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
15604 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
15605
15606 /* SSE2 */
15607 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
15608
15609 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
15610 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
15611
15612 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
15613 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
15614
15615 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
15616 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
15617 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
15618 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
15619 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
15620
15621 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
15622 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
15623 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
15624 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
15625
15626 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
15627 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
15628
15629 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
15630
15631 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
15632 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
15633
15634 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
15635 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
15636 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
15637 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
15638 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
15639
15640 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
15641
15642 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
15643 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
15644 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
15645 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
15646
15647 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
15648 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
15649 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
15650
15651 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
15652 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
15653 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
15654 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
15655
15656 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
15657 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
15658 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
15659
15660 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
15661 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
15662
15663 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
15664 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
15665
15666 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
15667 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
15668 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
15669
15670 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
15671 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
15672 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
15673
15674 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
15675 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
15676
15677 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
15678 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
15679 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
15680 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
15681
15682 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
15683 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
15684 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
15685 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
15686
15687 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
15688 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
15689
15690 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
15691
15692 /* Prescott New Instructions. */
15693 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
15694 void_ftype_pcvoid_unsigned_unsigned,
15695 IX86_BUILTIN_MONITOR);
15696 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
15697 void_ftype_unsigned_unsigned,
15698 IX86_BUILTIN_MWAIT);
15699 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
15700 v4sf_ftype_v4sf,
15701 IX86_BUILTIN_MOVSHDUP);
15702 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
15703 v4sf_ftype_v4sf,
15704 IX86_BUILTIN_MOVSLDUP);
15705 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
15706 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
15707
15708 /* SSSE3. */
15709 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
15710 v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
15711 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
15712 IX86_BUILTIN_PALIGNR);
15713
15714 /* Access to the vec_init patterns. */
15715 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
15716 integer_type_node, NULL_TREE);
15717 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
15718 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
15719
15720 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
15721 short_integer_type_node,
15722 short_integer_type_node,
15723 short_integer_type_node, NULL_TREE);
15724 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
15725 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
15726
15727 ftype = build_function_type_list (V8QI_type_node, char_type_node,
15728 char_type_node, char_type_node,
15729 char_type_node, char_type_node,
15730 char_type_node, char_type_node,
15731 char_type_node, NULL_TREE);
15732 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
15733 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
15734
15735 /* Access to the vec_extract patterns. */
15736 ftype = build_function_type_list (double_type_node, V2DF_type_node,
15737 integer_type_node, NULL_TREE);
15738 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
15739 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
15740
15741 ftype = build_function_type_list (long_long_integer_type_node,
15742 V2DI_type_node, integer_type_node,
15743 NULL_TREE);
15744 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
15745 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
15746
15747 ftype = build_function_type_list (float_type_node, V4SF_type_node,
15748 integer_type_node, NULL_TREE);
15749 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
15750 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
15751
15752 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
15753 integer_type_node, NULL_TREE);
15754 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
15755 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
15756
15757 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
15758 integer_type_node, NULL_TREE);
15759 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
15760 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
15761
15762 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
15763 integer_type_node, NULL_TREE);
15764 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
15765 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
15766
15767 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
15768 integer_type_node, NULL_TREE);
15769 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
15770 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
15771
15772 /* Access to the vec_set patterns. */
15773 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
15774 intHI_type_node,
15775 integer_type_node, NULL_TREE);
15776 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
15777 ftype, IX86_BUILTIN_VEC_SET_V8HI);
15778
15779 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
15780 intHI_type_node,
15781 integer_type_node, NULL_TREE);
15782 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
15783 ftype, IX86_BUILTIN_VEC_SET_V4HI);
15784 }
15785
15786 /* Errors in the source file can cause expand_expr to return const0_rtx
15787 where we expect a vector. To avoid crashing, use one of the vector
15788 clear instructions. */
15789 static rtx
15790 safe_vector_operand (rtx x, enum machine_mode mode)
15791 {
15792 if (x == const0_rtx)
15793 x = CONST0_RTX (mode);
15794 return x;
15795 }
15796
15797 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
15798
15799 static rtx
15800 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
15801 {
15802 rtx pat, xops[3];
15803 tree arg0 = TREE_VALUE (arglist);
15804 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15805 rtx op0 = expand_normal (arg0);
15806 rtx op1 = expand_normal (arg1);
15807 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15808 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15809 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
15810
15811 if (VECTOR_MODE_P (mode0))
15812 op0 = safe_vector_operand (op0, mode0);
15813 if (VECTOR_MODE_P (mode1))
15814 op1 = safe_vector_operand (op1, mode1);
15815
15816 if (optimize || !target
15817 || GET_MODE (target) != tmode
15818 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15819 target = gen_reg_rtx (tmode);
15820
15821 if (GET_MODE (op1) == SImode && mode1 == TImode)
15822 {
15823 rtx x = gen_reg_rtx (V4SImode);
15824 emit_insn (gen_sse2_loadd (x, op1));
15825 op1 = gen_lowpart (TImode, x);
15826 }
15827
15828 /* The insn must want input operands in the same modes as the
15829 result. */
15830 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
15831 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
15832
15833 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
15834 op0 = copy_to_mode_reg (mode0, op0);
15835 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
15836 op1 = copy_to_mode_reg (mode1, op1);
15837
15838 /* ??? Using ix86_fixup_binary_operands is problematic when
15839 we've got mismatched modes. Fake it. */
15840
15841 xops[0] = target;
15842 xops[1] = op0;
15843 xops[2] = op1;
15844
15845 if (tmode == mode0 && tmode == mode1)
15846 {
15847 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
15848 op0 = xops[1];
15849 op1 = xops[2];
15850 }
15851 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
15852 {
15853 op0 = force_reg (mode0, op0);
15854 op1 = force_reg (mode1, op1);
15855 target = gen_reg_rtx (tmode);
15856 }
15857
15858 pat = GEN_FCN (icode) (target, op0, op1);
15859 if (! pat)
15860 return 0;
15861 emit_insn (pat);
15862 return target;
15863 }
15864
15865 /* Subroutine of ix86_expand_builtin to take care of stores. */
15866
15867 static rtx
15868 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
15869 {
15870 rtx pat;
15871 tree arg0 = TREE_VALUE (arglist);
15872 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15873 rtx op0 = expand_normal (arg0);
15874 rtx op1 = expand_normal (arg1);
15875 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
15876 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
15877
15878 if (VECTOR_MODE_P (mode1))
15879 op1 = safe_vector_operand (op1, mode1);
15880
15881 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15882 op1 = copy_to_mode_reg (mode1, op1);
15883
15884 pat = GEN_FCN (icode) (op0, op1);
15885 if (pat)
15886 emit_insn (pat);
15887 return 0;
15888 }
15889
15890 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
15891
15892 static rtx
15893 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
15894 rtx target, int do_load)
15895 {
15896 rtx pat;
15897 tree arg0 = TREE_VALUE (arglist);
15898 rtx op0 = expand_normal (arg0);
15899 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15900 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15901
15902 if (optimize || !target
15903 || GET_MODE (target) != tmode
15904 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15905 target = gen_reg_rtx (tmode);
15906 if (do_load)
15907 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15908 else
15909 {
15910 if (VECTOR_MODE_P (mode0))
15911 op0 = safe_vector_operand (op0, mode0);
15912
15913 if ((optimize && !register_operand (op0, mode0))
15914 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15915 op0 = copy_to_mode_reg (mode0, op0);
15916 }
15917
15918 pat = GEN_FCN (icode) (target, op0);
15919 if (! pat)
15920 return 0;
15921 emit_insn (pat);
15922 return target;
15923 }
15924
15925 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
15926 sqrtss, rsqrtss, rcpss. */
15927
15928 static rtx
15929 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
15930 {
15931 rtx pat;
15932 tree arg0 = TREE_VALUE (arglist);
15933 rtx op1, op0 = expand_normal (arg0);
15934 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15935 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15936
15937 if (optimize || !target
15938 || GET_MODE (target) != tmode
15939 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15940 target = gen_reg_rtx (tmode);
15941
15942 if (VECTOR_MODE_P (mode0))
15943 op0 = safe_vector_operand (op0, mode0);
15944
15945 if ((optimize && !register_operand (op0, mode0))
15946 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15947 op0 = copy_to_mode_reg (mode0, op0);
15948
15949 op1 = op0;
15950 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
15951 op1 = copy_to_mode_reg (mode0, op1);
15952
15953 pat = GEN_FCN (icode) (target, op0, op1);
15954 if (! pat)
15955 return 0;
15956 emit_insn (pat);
15957 return target;
15958 }
15959
15960 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
15961
15962 static rtx
15963 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
15964 rtx target)
15965 {
15966 rtx pat;
15967 tree arg0 = TREE_VALUE (arglist);
15968 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15969 rtx op0 = expand_normal (arg0);
15970 rtx op1 = expand_normal (arg1);
15971 rtx op2;
15972 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
15973 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
15974 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
15975 enum rtx_code comparison = d->comparison;
15976
15977 if (VECTOR_MODE_P (mode0))
15978 op0 = safe_vector_operand (op0, mode0);
15979 if (VECTOR_MODE_P (mode1))
15980 op1 = safe_vector_operand (op1, mode1);
15981
15982 /* Swap operands if we have a comparison that isn't available in
15983 hardware. */
15984 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15985 {
15986 rtx tmp = gen_reg_rtx (mode1);
15987 emit_move_insn (tmp, op1);
15988 op1 = op0;
15989 op0 = tmp;
15990 }
15991
15992 if (optimize || !target
15993 || GET_MODE (target) != tmode
15994 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
15995 target = gen_reg_rtx (tmode);
15996
15997 if ((optimize && !register_operand (op0, mode0))
15998 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
15999 op0 = copy_to_mode_reg (mode0, op0);
16000 if ((optimize && !register_operand (op1, mode1))
16001 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
16002 op1 = copy_to_mode_reg (mode1, op1);
16003
16004 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
16005 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
16006 if (! pat)
16007 return 0;
16008 emit_insn (pat);
16009 return target;
16010 }
16011
16012 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
16013
16014 static rtx
16015 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
16016 rtx target)
16017 {
16018 rtx pat;
16019 tree arg0 = TREE_VALUE (arglist);
16020 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16021 rtx op0 = expand_normal (arg0);
16022 rtx op1 = expand_normal (arg1);
16023 rtx op2;
16024 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
16025 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
16026 enum rtx_code comparison = d->comparison;
16027
16028 if (VECTOR_MODE_P (mode0))
16029 op0 = safe_vector_operand (op0, mode0);
16030 if (VECTOR_MODE_P (mode1))
16031 op1 = safe_vector_operand (op1, mode1);
16032
16033 /* Swap operands if we have a comparison that isn't available in
16034 hardware. */
16035 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
16036 {
16037 rtx tmp = op1;
16038 op1 = op0;
16039 op0 = tmp;
16040 }
16041
16042 target = gen_reg_rtx (SImode);
16043 emit_move_insn (target, const0_rtx);
16044 target = gen_rtx_SUBREG (QImode, target, 0);
16045
16046 if ((optimize && !register_operand (op0, mode0))
16047 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
16048 op0 = copy_to_mode_reg (mode0, op0);
16049 if ((optimize && !register_operand (op1, mode1))
16050 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
16051 op1 = copy_to_mode_reg (mode1, op1);
16052
16053 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
16054 pat = GEN_FCN (d->icode) (op0, op1);
16055 if (! pat)
16056 return 0;
16057 emit_insn (pat);
16058 emit_insn (gen_rtx_SET (VOIDmode,
16059 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
16060 gen_rtx_fmt_ee (comparison, QImode,
16061 SET_DEST (pat),
16062 const0_rtx)));
16063
16064 return SUBREG_REG (target);
16065 }
16066
16067 /* Return the integer constant in ARG. Constrain it to be in the range
16068 of the subparts of VEC_TYPE; issue an error if not. */
16069
16070 static int
16071 get_element_number (tree vec_type, tree arg)
16072 {
16073 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
16074
16075 if (!host_integerp (arg, 1)
16076 || (elt = tree_low_cst (arg, 1), elt > max))
16077 {
16078 error ("selector must be an integer constant in the range 0..%wi", max);
16079 return 0;
16080 }
16081
16082 return elt;
16083 }
16084
16085 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16086 ix86_expand_vector_init. We DO have language-level syntax for this, in
16087 the form of (type){ init-list }. Except that since we can't place emms
16088 instructions from inside the compiler, we can't allow the use of MMX
16089 registers unless the user explicitly asks for it. So we do *not* define
16090 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
16091 we have builtins invoked by mmintrin.h that gives us license to emit
16092 these sorts of instructions. */
16093
16094 static rtx
16095 ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
16096 {
16097 enum machine_mode tmode = TYPE_MODE (type);
16098 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
16099 int i, n_elt = GET_MODE_NUNITS (tmode);
16100 rtvec v = rtvec_alloc (n_elt);
16101
16102 gcc_assert (VECTOR_MODE_P (tmode));
16103
16104 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
16105 {
16106 rtx x = expand_normal (TREE_VALUE (arglist));
16107 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
16108 }
16109
16110 gcc_assert (arglist == NULL);
16111
16112 if (!target || !register_operand (target, tmode))
16113 target = gen_reg_rtx (tmode);
16114
16115 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
16116 return target;
16117 }
16118
16119 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16120 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
16121 had a language-level syntax for referencing vector elements. */
16122
16123 static rtx
16124 ix86_expand_vec_ext_builtin (tree arglist, rtx target)
16125 {
16126 enum machine_mode tmode, mode0;
16127 tree arg0, arg1;
16128 int elt;
16129 rtx op0;
16130
16131 arg0 = TREE_VALUE (arglist);
16132 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16133
16134 op0 = expand_normal (arg0);
16135 elt = get_element_number (TREE_TYPE (arg0), arg1);
16136
16137 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16138 mode0 = TYPE_MODE (TREE_TYPE (arg0));
16139 gcc_assert (VECTOR_MODE_P (mode0));
16140
16141 op0 = force_reg (mode0, op0);
16142
16143 if (optimize || !target || !register_operand (target, tmode))
16144 target = gen_reg_rtx (tmode);
16145
16146 ix86_expand_vector_extract (true, target, op0, elt);
16147
16148 return target;
16149 }
16150
16151 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16152 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
16153 a language-level syntax for referencing vector elements. */
16154
16155 static rtx
16156 ix86_expand_vec_set_builtin (tree arglist)
16157 {
16158 enum machine_mode tmode, mode1;
16159 tree arg0, arg1, arg2;
16160 int elt;
16161 rtx op0, op1;
16162
16163 arg0 = TREE_VALUE (arglist);
16164 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16165 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16166
16167 tmode = TYPE_MODE (TREE_TYPE (arg0));
16168 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16169 gcc_assert (VECTOR_MODE_P (tmode));
16170
16171 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
16172 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
16173 elt = get_element_number (TREE_TYPE (arg0), arg2);
16174
16175 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
16176 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
16177
16178 op0 = force_reg (tmode, op0);
16179 op1 = force_reg (mode1, op1);
16180
16181 ix86_expand_vector_set (true, op0, op1, elt);
16182
16183 return op0;
16184 }
16185
16186 /* Expand an expression EXP that calls a built-in function,
16187 with result going to TARGET if that's convenient
16188 (and in mode MODE if that's convenient).
16189 SUBTARGET may be used as the target for computing one of EXP's operands.
16190 IGNORE is nonzero if the value is to be ignored. */
16191
16192 static rtx
16193 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
16194 enum machine_mode mode ATTRIBUTE_UNUSED,
16195 int ignore ATTRIBUTE_UNUSED)
16196 {
16197 const struct builtin_description *d;
16198 size_t i;
16199 enum insn_code icode;
16200 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
16201 tree arglist = TREE_OPERAND (exp, 1);
16202 tree arg0, arg1, arg2;
16203 rtx op0, op1, op2, pat;
16204 enum machine_mode tmode, mode0, mode1, mode2, mode3;
16205 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
16206
16207 switch (fcode)
16208 {
16209 case IX86_BUILTIN_EMMS:
16210 emit_insn (gen_mmx_emms ());
16211 return 0;
16212
16213 case IX86_BUILTIN_SFENCE:
16214 emit_insn (gen_sse_sfence ());
16215 return 0;
16216
16217 case IX86_BUILTIN_MASKMOVQ:
16218 case IX86_BUILTIN_MASKMOVDQU:
16219 icode = (fcode == IX86_BUILTIN_MASKMOVQ
16220 ? CODE_FOR_mmx_maskmovq
16221 : CODE_FOR_sse2_maskmovdqu);
16222 /* Note the arg order is different from the operand order. */
16223 arg1 = TREE_VALUE (arglist);
16224 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
16225 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16226 op0 = expand_normal (arg0);
16227 op1 = expand_normal (arg1);
16228 op2 = expand_normal (arg2);
16229 mode0 = insn_data[icode].operand[0].mode;
16230 mode1 = insn_data[icode].operand[1].mode;
16231 mode2 = insn_data[icode].operand[2].mode;
16232
16233 op0 = force_reg (Pmode, op0);
16234 op0 = gen_rtx_MEM (mode1, op0);
16235
16236 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16237 op0 = copy_to_mode_reg (mode0, op0);
16238 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
16239 op1 = copy_to_mode_reg (mode1, op1);
16240 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
16241 op2 = copy_to_mode_reg (mode2, op2);
16242 pat = GEN_FCN (icode) (op0, op1, op2);
16243 if (! pat)
16244 return 0;
16245 emit_insn (pat);
16246 return 0;
16247
16248 case IX86_BUILTIN_SQRTSS:
16249 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
16250 case IX86_BUILTIN_RSQRTSS:
16251 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
16252 case IX86_BUILTIN_RCPSS:
16253 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
16254
16255 case IX86_BUILTIN_LOADUPS:
16256 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
16257
16258 case IX86_BUILTIN_STOREUPS:
16259 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
16260
16261 case IX86_BUILTIN_LOADHPS:
16262 case IX86_BUILTIN_LOADLPS:
16263 case IX86_BUILTIN_LOADHPD:
16264 case IX86_BUILTIN_LOADLPD:
16265 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
16266 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
16267 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
16268 : CODE_FOR_sse2_loadlpd);
16269 arg0 = TREE_VALUE (arglist);
16270 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16271 op0 = expand_normal (arg0);
16272 op1 = expand_normal (arg1);
16273 tmode = insn_data[icode].operand[0].mode;
16274 mode0 = insn_data[icode].operand[1].mode;
16275 mode1 = insn_data[icode].operand[2].mode;
16276
16277 op0 = force_reg (mode0, op0);
16278 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
16279 if (optimize || target == 0
16280 || GET_MODE (target) != tmode
16281 || !register_operand (target, tmode))
16282 target = gen_reg_rtx (tmode);
16283 pat = GEN_FCN (icode) (target, op0, op1);
16284 if (! pat)
16285 return 0;
16286 emit_insn (pat);
16287 return target;
16288
16289 case IX86_BUILTIN_STOREHPS:
16290 case IX86_BUILTIN_STORELPS:
16291 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
16292 : CODE_FOR_sse_storelps);
16293 arg0 = TREE_VALUE (arglist);
16294 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16295 op0 = expand_normal (arg0);
16296 op1 = expand_normal (arg1);
16297 mode0 = insn_data[icode].operand[0].mode;
16298 mode1 = insn_data[icode].operand[1].mode;
16299
16300 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16301 op1 = force_reg (mode1, op1);
16302
16303 pat = GEN_FCN (icode) (op0, op1);
16304 if (! pat)
16305 return 0;
16306 emit_insn (pat);
16307 return const0_rtx;
16308
16309 case IX86_BUILTIN_MOVNTPS:
16310 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
16311 case IX86_BUILTIN_MOVNTQ:
16312 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
16313
16314 case IX86_BUILTIN_LDMXCSR:
16315 op0 = expand_normal (TREE_VALUE (arglist));
16316 target = assign_386_stack_local (SImode, SLOT_TEMP);
16317 emit_move_insn (target, op0);
16318 emit_insn (gen_sse_ldmxcsr (target));
16319 return 0;
16320
16321 case IX86_BUILTIN_STMXCSR:
16322 target = assign_386_stack_local (SImode, SLOT_TEMP);
16323 emit_insn (gen_sse_stmxcsr (target));
16324 return copy_to_mode_reg (SImode, target);
16325
16326 case IX86_BUILTIN_SHUFPS:
16327 case IX86_BUILTIN_SHUFPD:
16328 icode = (fcode == IX86_BUILTIN_SHUFPS
16329 ? CODE_FOR_sse_shufps
16330 : CODE_FOR_sse2_shufpd);
16331 arg0 = TREE_VALUE (arglist);
16332 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16333 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16334 op0 = expand_normal (arg0);
16335 op1 = expand_normal (arg1);
16336 op2 = expand_normal (arg2);
16337 tmode = insn_data[icode].operand[0].mode;
16338 mode0 = insn_data[icode].operand[1].mode;
16339 mode1 = insn_data[icode].operand[2].mode;
16340 mode2 = insn_data[icode].operand[3].mode;
16341
16342 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16343 op0 = copy_to_mode_reg (mode0, op0);
16344 if ((optimize && !register_operand (op1, mode1))
16345 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
16346 op1 = copy_to_mode_reg (mode1, op1);
16347 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16348 {
16349 /* @@@ better error message */
16350 error ("mask must be an immediate");
16351 return gen_reg_rtx (tmode);
16352 }
16353 if (optimize || target == 0
16354 || GET_MODE (target) != tmode
16355 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16356 target = gen_reg_rtx (tmode);
16357 pat = GEN_FCN (icode) (target, op0, op1, op2);
16358 if (! pat)
16359 return 0;
16360 emit_insn (pat);
16361 return target;
16362
16363 case IX86_BUILTIN_PSHUFW:
16364 case IX86_BUILTIN_PSHUFD:
16365 case IX86_BUILTIN_PSHUFHW:
16366 case IX86_BUILTIN_PSHUFLW:
16367 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
16368 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
16369 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
16370 : CODE_FOR_mmx_pshufw);
16371 arg0 = TREE_VALUE (arglist);
16372 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16373 op0 = expand_normal (arg0);
16374 op1 = expand_normal (arg1);
16375 tmode = insn_data[icode].operand[0].mode;
16376 mode1 = insn_data[icode].operand[1].mode;
16377 mode2 = insn_data[icode].operand[2].mode;
16378
16379 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16380 op0 = copy_to_mode_reg (mode1, op0);
16381 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16382 {
16383 /* @@@ better error message */
16384 error ("mask must be an immediate");
16385 return const0_rtx;
16386 }
16387 if (target == 0
16388 || GET_MODE (target) != tmode
16389 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16390 target = gen_reg_rtx (tmode);
16391 pat = GEN_FCN (icode) (target, op0, op1);
16392 if (! pat)
16393 return 0;
16394 emit_insn (pat);
16395 return target;
16396
16397 case IX86_BUILTIN_PSLLDQI128:
16398 case IX86_BUILTIN_PSRLDQI128:
16399 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
16400 : CODE_FOR_sse2_lshrti3);
16401 arg0 = TREE_VALUE (arglist);
16402 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16403 op0 = expand_normal (arg0);
16404 op1 = expand_normal (arg1);
16405 tmode = insn_data[icode].operand[0].mode;
16406 mode1 = insn_data[icode].operand[1].mode;
16407 mode2 = insn_data[icode].operand[2].mode;
16408
16409 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16410 {
16411 op0 = copy_to_reg (op0);
16412 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16413 }
16414 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16415 {
16416 error ("shift must be an immediate");
16417 return const0_rtx;
16418 }
16419 target = gen_reg_rtx (V2DImode);
16420 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
16421 if (! pat)
16422 return 0;
16423 emit_insn (pat);
16424 return target;
16425
16426 case IX86_BUILTIN_FEMMS:
16427 emit_insn (gen_mmx_femms ());
16428 return NULL_RTX;
16429
16430 case IX86_BUILTIN_PAVGUSB:
16431 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
16432
16433 case IX86_BUILTIN_PF2ID:
16434 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
16435
16436 case IX86_BUILTIN_PFACC:
16437 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
16438
16439 case IX86_BUILTIN_PFADD:
16440 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
16441
16442 case IX86_BUILTIN_PFCMPEQ:
16443 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
16444
16445 case IX86_BUILTIN_PFCMPGE:
16446 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
16447
16448 case IX86_BUILTIN_PFCMPGT:
16449 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
16450
16451 case IX86_BUILTIN_PFMAX:
16452 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
16453
16454 case IX86_BUILTIN_PFMIN:
16455 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
16456
16457 case IX86_BUILTIN_PFMUL:
16458 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
16459
16460 case IX86_BUILTIN_PFRCP:
16461 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
16462
16463 case IX86_BUILTIN_PFRCPIT1:
16464 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
16465
16466 case IX86_BUILTIN_PFRCPIT2:
16467 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
16468
16469 case IX86_BUILTIN_PFRSQIT1:
16470 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
16471
16472 case IX86_BUILTIN_PFRSQRT:
16473 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
16474
16475 case IX86_BUILTIN_PFSUB:
16476 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
16477
16478 case IX86_BUILTIN_PFSUBR:
16479 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
16480
16481 case IX86_BUILTIN_PI2FD:
16482 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
16483
16484 case IX86_BUILTIN_PMULHRW:
16485 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
16486
16487 case IX86_BUILTIN_PF2IW:
16488 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
16489
16490 case IX86_BUILTIN_PFNACC:
16491 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
16492
16493 case IX86_BUILTIN_PFPNACC:
16494 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
16495
16496 case IX86_BUILTIN_PI2FW:
16497 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
16498
16499 case IX86_BUILTIN_PSWAPDSI:
16500 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
16501
16502 case IX86_BUILTIN_PSWAPDSF:
16503 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
16504
16505 case IX86_BUILTIN_SQRTSD:
16506 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
16507 case IX86_BUILTIN_LOADUPD:
16508 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
16509 case IX86_BUILTIN_STOREUPD:
16510 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
16511
16512 case IX86_BUILTIN_MFENCE:
16513 emit_insn (gen_sse2_mfence ());
16514 return 0;
16515 case IX86_BUILTIN_LFENCE:
16516 emit_insn (gen_sse2_lfence ());
16517 return 0;
16518
16519 case IX86_BUILTIN_CLFLUSH:
16520 arg0 = TREE_VALUE (arglist);
16521 op0 = expand_normal (arg0);
16522 icode = CODE_FOR_sse2_clflush;
16523 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
16524 op0 = copy_to_mode_reg (Pmode, op0);
16525
16526 emit_insn (gen_sse2_clflush (op0));
16527 return 0;
16528
16529 case IX86_BUILTIN_MOVNTPD:
16530 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
16531 case IX86_BUILTIN_MOVNTDQ:
16532 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
16533 case IX86_BUILTIN_MOVNTI:
16534 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
16535
16536 case IX86_BUILTIN_LOADDQU:
16537 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
16538 case IX86_BUILTIN_STOREDQU:
16539 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
16540
16541 case IX86_BUILTIN_MONITOR:
16542 arg0 = TREE_VALUE (arglist);
16543 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16544 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16545 op0 = expand_normal (arg0);
16546 op1 = expand_normal (arg1);
16547 op2 = expand_normal (arg2);
16548 if (!REG_P (op0))
16549 op0 = copy_to_mode_reg (Pmode, op0);
16550 if (!REG_P (op1))
16551 op1 = copy_to_mode_reg (SImode, op1);
16552 if (!REG_P (op2))
16553 op2 = copy_to_mode_reg (SImode, op2);
16554 if (!TARGET_64BIT)
16555 emit_insn (gen_sse3_monitor (op0, op1, op2));
16556 else
16557 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
16558 return 0;
16559
16560 case IX86_BUILTIN_MWAIT:
16561 arg0 = TREE_VALUE (arglist);
16562 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16563 op0 = expand_normal (arg0);
16564 op1 = expand_normal (arg1);
16565 if (!REG_P (op0))
16566 op0 = copy_to_mode_reg (SImode, op0);
16567 if (!REG_P (op1))
16568 op1 = copy_to_mode_reg (SImode, op1);
16569 emit_insn (gen_sse3_mwait (op0, op1));
16570 return 0;
16571
16572 case IX86_BUILTIN_LDDQU:
16573 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
16574 target, 1);
16575
16576 case IX86_BUILTIN_PALIGNR:
16577 case IX86_BUILTIN_PALIGNR128:
16578 if (fcode == IX86_BUILTIN_PALIGNR)
16579 {
16580 icode = CODE_FOR_ssse3_palignrdi;
16581 mode = DImode;
16582 }
16583 else
16584 {
16585 icode = CODE_FOR_ssse3_palignrti;
16586 mode = V2DImode;
16587 }
16588 arg0 = TREE_VALUE (arglist);
16589 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16590 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16591 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
16592 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
16593 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
16594 tmode = insn_data[icode].operand[0].mode;
16595 mode1 = insn_data[icode].operand[1].mode;
16596 mode2 = insn_data[icode].operand[2].mode;
16597 mode3 = insn_data[icode].operand[3].mode;
16598
16599 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16600 {
16601 op0 = copy_to_reg (op0);
16602 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16603 }
16604 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16605 {
16606 op1 = copy_to_reg (op1);
16607 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
16608 }
16609 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
16610 {
16611 error ("shift must be an immediate");
16612 return const0_rtx;
16613 }
16614 target = gen_reg_rtx (mode);
16615 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
16616 op0, op1, op2);
16617 if (! pat)
16618 return 0;
16619 emit_insn (pat);
16620 return target;
16621
16622 case IX86_BUILTIN_VEC_INIT_V2SI:
16623 case IX86_BUILTIN_VEC_INIT_V4HI:
16624 case IX86_BUILTIN_VEC_INIT_V8QI:
16625 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
16626
16627 case IX86_BUILTIN_VEC_EXT_V2DF:
16628 case IX86_BUILTIN_VEC_EXT_V2DI:
16629 case IX86_BUILTIN_VEC_EXT_V4SF:
16630 case IX86_BUILTIN_VEC_EXT_V4SI:
16631 case IX86_BUILTIN_VEC_EXT_V8HI:
16632 case IX86_BUILTIN_VEC_EXT_V2SI:
16633 case IX86_BUILTIN_VEC_EXT_V4HI:
16634 return ix86_expand_vec_ext_builtin (arglist, target);
16635
16636 case IX86_BUILTIN_VEC_SET_V8HI:
16637 case IX86_BUILTIN_VEC_SET_V4HI:
16638 return ix86_expand_vec_set_builtin (arglist);
16639
16640 default:
16641 break;
16642 }
16643
16644 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16645 if (d->code == fcode)
16646 {
16647 /* Compares are treated specially. */
16648 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
16649 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
16650 || d->icode == CODE_FOR_sse2_maskcmpv2df3
16651 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
16652 return ix86_expand_sse_compare (d, arglist, target);
16653
16654 return ix86_expand_binop_builtin (d->icode, arglist, target);
16655 }
16656
16657 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16658 if (d->code == fcode)
16659 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
16660
16661 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
16662 if (d->code == fcode)
16663 return ix86_expand_sse_comi (d, arglist, target);
16664
16665 gcc_unreachable ();
16666 }
16667
16668 /* Store OPERAND to the memory after reload is completed. This means
16669 that we can't easily use assign_stack_local. */
16670 rtx
16671 ix86_force_to_memory (enum machine_mode mode, rtx operand)
16672 {
16673 rtx result;
16674
16675 gcc_assert (reload_completed);
16676 if (TARGET_RED_ZONE)
16677 {
16678 result = gen_rtx_MEM (mode,
16679 gen_rtx_PLUS (Pmode,
16680 stack_pointer_rtx,
16681 GEN_INT (-RED_ZONE_SIZE)));
16682 emit_move_insn (result, operand);
16683 }
16684 else if (!TARGET_RED_ZONE && TARGET_64BIT)
16685 {
16686 switch (mode)
16687 {
16688 case HImode:
16689 case SImode:
16690 operand = gen_lowpart (DImode, operand);
16691 /* FALLTHRU */
16692 case DImode:
16693 emit_insn (
16694 gen_rtx_SET (VOIDmode,
16695 gen_rtx_MEM (DImode,
16696 gen_rtx_PRE_DEC (DImode,
16697 stack_pointer_rtx)),
16698 operand));
16699 break;
16700 default:
16701 gcc_unreachable ();
16702 }
16703 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16704 }
16705 else
16706 {
16707 switch (mode)
16708 {
16709 case DImode:
16710 {
16711 rtx operands[2];
16712 split_di (&operand, 1, operands, operands + 1);
16713 emit_insn (
16714 gen_rtx_SET (VOIDmode,
16715 gen_rtx_MEM (SImode,
16716 gen_rtx_PRE_DEC (Pmode,
16717 stack_pointer_rtx)),
16718 operands[1]));
16719 emit_insn (
16720 gen_rtx_SET (VOIDmode,
16721 gen_rtx_MEM (SImode,
16722 gen_rtx_PRE_DEC (Pmode,
16723 stack_pointer_rtx)),
16724 operands[0]));
16725 }
16726 break;
16727 case HImode:
16728 /* Store HImodes as SImodes. */
16729 operand = gen_lowpart (SImode, operand);
16730 /* FALLTHRU */
16731 case SImode:
16732 emit_insn (
16733 gen_rtx_SET (VOIDmode,
16734 gen_rtx_MEM (GET_MODE (operand),
16735 gen_rtx_PRE_DEC (SImode,
16736 stack_pointer_rtx)),
16737 operand));
16738 break;
16739 default:
16740 gcc_unreachable ();
16741 }
16742 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16743 }
16744 return result;
16745 }
16746
16747 /* Free operand from the memory. */
16748 void
16749 ix86_free_from_memory (enum machine_mode mode)
16750 {
16751 if (!TARGET_RED_ZONE)
16752 {
16753 int size;
16754
16755 if (mode == DImode || TARGET_64BIT)
16756 size = 8;
16757 else
16758 size = 4;
16759 /* Use LEA to deallocate stack space. In peephole2 it will be converted
16760 to pop or add instruction if registers are available. */
16761 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16762 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
16763 GEN_INT (size))));
16764 }
16765 }
16766
16767 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
16768 QImode must go into class Q_REGS.
16769 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
16770 movdf to do mem-to-mem moves through integer regs. */
16771 enum reg_class
16772 ix86_preferred_reload_class (rtx x, enum reg_class class)
16773 {
16774 enum machine_mode mode = GET_MODE (x);
16775
16776 /* We're only allowed to return a subclass of CLASS. Many of the
16777 following checks fail for NO_REGS, so eliminate that early. */
16778 if (class == NO_REGS)
16779 return NO_REGS;
16780
16781 /* All classes can load zeros. */
16782 if (x == CONST0_RTX (mode))
16783 return class;
16784
16785 /* Force constants into memory if we are loading a (nonzero) constant into
16786 an MMX or SSE register. This is because there are no MMX/SSE instructions
16787 to load from a constant. */
16788 if (CONSTANT_P (x)
16789 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
16790 return NO_REGS;
16791
16792 /* Prefer SSE regs only, if we can use them for math. */
16793 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
16794 return SSE_CLASS_P (class) ? class : NO_REGS;
16795
16796 /* Floating-point constants need more complex checks. */
16797 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
16798 {
16799 /* General regs can load everything. */
16800 if (reg_class_subset_p (class, GENERAL_REGS))
16801 return class;
16802
16803 /* Floats can load 0 and 1 plus some others. Note that we eliminated
16804 zero above. We only want to wind up preferring 80387 registers if
16805 we plan on doing computation with them. */
16806 if (TARGET_80387
16807 && standard_80387_constant_p (x))
16808 {
16809 /* Limit class to non-sse. */
16810 if (class == FLOAT_SSE_REGS)
16811 return FLOAT_REGS;
16812 if (class == FP_TOP_SSE_REGS)
16813 return FP_TOP_REG;
16814 if (class == FP_SECOND_SSE_REGS)
16815 return FP_SECOND_REG;
16816 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
16817 return class;
16818 }
16819
16820 return NO_REGS;
16821 }
16822
16823 /* Generally when we see PLUS here, it's the function invariant
16824 (plus soft-fp const_int). Which can only be computed into general
16825 regs. */
16826 if (GET_CODE (x) == PLUS)
16827 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
16828
16829 /* QImode constants are easy to load, but non-constant QImode data
16830 must go into Q_REGS. */
16831 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
16832 {
16833 if (reg_class_subset_p (class, Q_REGS))
16834 return class;
16835 if (reg_class_subset_p (Q_REGS, class))
16836 return Q_REGS;
16837 return NO_REGS;
16838 }
16839
16840 return class;
16841 }
16842
16843 /* Discourage putting floating-point values in SSE registers unless
16844 SSE math is being used, and likewise for the 387 registers. */
16845 enum reg_class
16846 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
16847 {
16848 enum machine_mode mode = GET_MODE (x);
16849
16850 /* Restrict the output reload class to the register bank that we are doing
16851 math on. If we would like not to return a subset of CLASS, reject this
16852 alternative: if reload cannot do this, it will still use its choice. */
16853 mode = GET_MODE (x);
16854 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
16855 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
16856
16857 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
16858 {
16859 if (class == FP_TOP_SSE_REGS)
16860 return FP_TOP_REG;
16861 else if (class == FP_SECOND_SSE_REGS)
16862 return FP_SECOND_REG;
16863 else
16864 return FLOAT_CLASS_P (class) ? class : NO_REGS;
16865 }
16866
16867 return class;
16868 }
16869
16870 /* If we are copying between general and FP registers, we need a memory
16871 location. The same is true for SSE and MMX registers.
16872
16873 The macro can't work reliably when one of the CLASSES is class containing
16874 registers from multiple units (SSE, MMX, integer). We avoid this by never
16875 combining those units in single alternative in the machine description.
16876 Ensure that this constraint holds to avoid unexpected surprises.
16877
16878 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
16879 enforce these sanity checks. */
16880
16881 int
16882 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
16883 enum machine_mode mode, int strict)
16884 {
16885 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
16886 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
16887 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
16888 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
16889 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
16890 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
16891 {
16892 gcc_assert (!strict);
16893 return true;
16894 }
16895
16896 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
16897 return true;
16898
16899 /* ??? This is a lie. We do have moves between mmx/general, and for
16900 mmx/sse2. But by saying we need secondary memory we discourage the
16901 register allocator from using the mmx registers unless needed. */
16902 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
16903 return true;
16904
16905 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16906 {
16907 /* SSE1 doesn't have any direct moves from other classes. */
16908 if (!TARGET_SSE2)
16909 return true;
16910
16911 /* If the target says that inter-unit moves are more expensive
16912 than moving through memory, then don't generate them. */
16913 if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
16914 return true;
16915
16916 /* Between SSE and general, we have moves no larger than word size. */
16917 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
16918 return true;
16919
16920 /* ??? For the cost of one register reformat penalty, we could use
16921 the same instructions to move SFmode and DFmode data, but the
16922 relevant move patterns don't support those alternatives. */
16923 if (mode == SFmode || mode == DFmode)
16924 return true;
16925 }
16926
16927 return false;
16928 }
16929
16930 /* Return true if the registers in CLASS cannot represent the change from
16931 modes FROM to TO. */
16932
16933 bool
16934 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
16935 enum reg_class class)
16936 {
16937 if (from == to)
16938 return false;
16939
16940 /* x87 registers can't do subreg at all, as all values are reformatted
16941 to extended precision. */
16942 if (MAYBE_FLOAT_CLASS_P (class))
16943 return true;
16944
16945 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
16946 {
16947 /* Vector registers do not support QI or HImode loads. If we don't
16948 disallow a change to these modes, reload will assume it's ok to
16949 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
16950 the vec_dupv4hi pattern. */
16951 if (GET_MODE_SIZE (from) < 4)
16952 return true;
16953
16954 /* Vector registers do not support subreg with nonzero offsets, which
16955 are otherwise valid for integer registers. Since we can't see
16956 whether we have a nonzero offset from here, prohibit all
16957 nonparadoxical subregs changing size. */
16958 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
16959 return true;
16960 }
16961
16962 return false;
16963 }
16964
16965 /* Return the cost of moving data from a register in class CLASS1 to
16966 one in class CLASS2.
16967
16968 It is not required that the cost always equal 2 when FROM is the same as TO;
16969 on some machines it is expensive to move between registers if they are not
16970 general registers. */
16971
16972 int
16973 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
16974 enum reg_class class2)
16975 {
16976 /* In case we require secondary memory, compute cost of the store followed
16977 by load. In order to avoid bad register allocation choices, we need
16978 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
16979
16980 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
16981 {
16982 int cost = 1;
16983
16984 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
16985 MEMORY_MOVE_COST (mode, class1, 1));
16986 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
16987 MEMORY_MOVE_COST (mode, class2, 1));
16988
16989 /* In case of copying from general_purpose_register we may emit multiple
16990 stores followed by single load causing memory size mismatch stall.
16991 Count this as arbitrarily high cost of 20. */
16992 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
16993 cost += 20;
16994
16995 /* In the case of FP/MMX moves, the registers actually overlap, and we
16996 have to switch modes in order to treat them differently. */
16997 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
16998 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
16999 cost += 20;
17000
17001 return cost;
17002 }
17003
17004 /* Moves between SSE/MMX and integer unit are expensive. */
17005 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
17006 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
17007 return ix86_cost->mmxsse_to_integer;
17008 if (MAYBE_FLOAT_CLASS_P (class1))
17009 return ix86_cost->fp_move;
17010 if (MAYBE_SSE_CLASS_P (class1))
17011 return ix86_cost->sse_move;
17012 if (MAYBE_MMX_CLASS_P (class1))
17013 return ix86_cost->mmx_move;
17014 return 2;
17015 }
17016
17017 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
17018
17019 bool
17020 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
17021 {
17022 /* Flags and only flags can only hold CCmode values. */
17023 if (CC_REGNO_P (regno))
17024 return GET_MODE_CLASS (mode) == MODE_CC;
17025 if (GET_MODE_CLASS (mode) == MODE_CC
17026 || GET_MODE_CLASS (mode) == MODE_RANDOM
17027 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
17028 return 0;
17029 if (FP_REGNO_P (regno))
17030 return VALID_FP_MODE_P (mode);
17031 if (SSE_REGNO_P (regno))
17032 {
17033 /* We implement the move patterns for all vector modes into and
17034 out of SSE registers, even when no operation instructions
17035 are available. */
17036 return (VALID_SSE_REG_MODE (mode)
17037 || VALID_SSE2_REG_MODE (mode)
17038 || VALID_MMX_REG_MODE (mode)
17039 || VALID_MMX_REG_MODE_3DNOW (mode));
17040 }
17041 if (MMX_REGNO_P (regno))
17042 {
17043 /* We implement the move patterns for 3DNOW modes even in MMX mode,
17044 so if the register is available at all, then we can move data of
17045 the given mode into or out of it. */
17046 return (VALID_MMX_REG_MODE (mode)
17047 || VALID_MMX_REG_MODE_3DNOW (mode));
17048 }
17049
17050 if (mode == QImode)
17051 {
17052 /* Take care for QImode values - they can be in non-QI regs,
17053 but then they do cause partial register stalls. */
17054 if (regno < 4 || TARGET_64BIT)
17055 return 1;
17056 if (!TARGET_PARTIAL_REG_STALL)
17057 return 1;
17058 return reload_in_progress || reload_completed;
17059 }
17060 /* We handle both integer and floats in the general purpose registers. */
17061 else if (VALID_INT_MODE_P (mode))
17062 return 1;
17063 else if (VALID_FP_MODE_P (mode))
17064 return 1;
17065 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
17066 on to use that value in smaller contexts, this can easily force a
17067 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
17068 supporting DImode, allow it. */
17069 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
17070 return 1;
17071
17072 return 0;
17073 }
17074
17075 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
17076 tieable integer mode. */
17077
17078 static bool
17079 ix86_tieable_integer_mode_p (enum machine_mode mode)
17080 {
17081 switch (mode)
17082 {
17083 case HImode:
17084 case SImode:
17085 return true;
17086
17087 case QImode:
17088 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
17089
17090 case DImode:
17091 return TARGET_64BIT;
17092
17093 default:
17094 return false;
17095 }
17096 }
17097
17098 /* Return true if MODE1 is accessible in a register that can hold MODE2
17099 without copying. That is, all register classes that can hold MODE2
17100 can also hold MODE1. */
17101
17102 bool
17103 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
17104 {
17105 if (mode1 == mode2)
17106 return true;
17107
17108 if (ix86_tieable_integer_mode_p (mode1)
17109 && ix86_tieable_integer_mode_p (mode2))
17110 return true;
17111
17112 /* MODE2 being XFmode implies fp stack or general regs, which means we
17113 can tie any smaller floating point modes to it. Note that we do not
17114 tie this with TFmode. */
17115 if (mode2 == XFmode)
17116 return mode1 == SFmode || mode1 == DFmode;
17117
17118 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
17119 that we can tie it with SFmode. */
17120 if (mode2 == DFmode)
17121 return mode1 == SFmode;
17122
17123 /* If MODE2 is only appropriate for an SSE register, then tie with
17124 any other mode acceptable to SSE registers. */
17125 if (GET_MODE_SIZE (mode2) >= 8
17126 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
17127 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
17128
17129 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
17130 with any other mode acceptable to MMX registers. */
17131 if (GET_MODE_SIZE (mode2) == 8
17132 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
17133 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
17134
17135 return false;
17136 }
17137
17138 /* Return the cost of moving data of mode M between a
17139 register and memory. A value of 2 is the default; this cost is
17140 relative to those in `REGISTER_MOVE_COST'.
17141
17142 If moving between registers and memory is more expensive than
17143 between two registers, you should define this macro to express the
17144 relative cost.
17145
17146 Model also increased moving costs of QImode registers in non
17147 Q_REGS classes.
17148 */
17149 int
17150 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
17151 {
17152 if (FLOAT_CLASS_P (class))
17153 {
17154 int index;
17155 switch (mode)
17156 {
17157 case SFmode:
17158 index = 0;
17159 break;
17160 case DFmode:
17161 index = 1;
17162 break;
17163 case XFmode:
17164 index = 2;
17165 break;
17166 default:
17167 return 100;
17168 }
17169 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
17170 }
17171 if (SSE_CLASS_P (class))
17172 {
17173 int index;
17174 switch (GET_MODE_SIZE (mode))
17175 {
17176 case 4:
17177 index = 0;
17178 break;
17179 case 8:
17180 index = 1;
17181 break;
17182 case 16:
17183 index = 2;
17184 break;
17185 default:
17186 return 100;
17187 }
17188 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
17189 }
17190 if (MMX_CLASS_P (class))
17191 {
17192 int index;
17193 switch (GET_MODE_SIZE (mode))
17194 {
17195 case 4:
17196 index = 0;
17197 break;
17198 case 8:
17199 index = 1;
17200 break;
17201 default:
17202 return 100;
17203 }
17204 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
17205 }
17206 switch (GET_MODE_SIZE (mode))
17207 {
17208 case 1:
17209 if (in)
17210 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
17211 : ix86_cost->movzbl_load);
17212 else
17213 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
17214 : ix86_cost->int_store[0] + 4);
17215 break;
17216 case 2:
17217 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
17218 default:
17219 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
17220 if (mode == TFmode)
17221 mode = XFmode;
17222 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
17223 * (((int) GET_MODE_SIZE (mode)
17224 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
17225 }
17226 }
17227
17228 /* Compute a (partial) cost for rtx X. Return true if the complete
17229 cost has been computed, and false if subexpressions should be
17230 scanned. In either case, *TOTAL contains the cost result. */
17231
17232 static bool
17233 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
17234 {
17235 enum machine_mode mode = GET_MODE (x);
17236
17237 switch (code)
17238 {
17239 case CONST_INT:
17240 case CONST:
17241 case LABEL_REF:
17242 case SYMBOL_REF:
17243 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
17244 *total = 3;
17245 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
17246 *total = 2;
17247 else if (flag_pic && SYMBOLIC_CONST (x)
17248 && (!TARGET_64BIT
17249 || (!GET_CODE (x) != LABEL_REF
17250 && (GET_CODE (x) != SYMBOL_REF
17251 || !SYMBOL_REF_LOCAL_P (x)))))
17252 *total = 1;
17253 else
17254 *total = 0;
17255 return true;
17256
17257 case CONST_DOUBLE:
17258 if (mode == VOIDmode)
17259 *total = 0;
17260 else
17261 switch (standard_80387_constant_p (x))
17262 {
17263 case 1: /* 0.0 */
17264 *total = 1;
17265 break;
17266 default: /* Other constants */
17267 *total = 2;
17268 break;
17269 case 0:
17270 case -1:
17271 /* Start with (MEM (SYMBOL_REF)), since that's where
17272 it'll probably end up. Add a penalty for size. */
17273 *total = (COSTS_N_INSNS (1)
17274 + (flag_pic != 0 && !TARGET_64BIT)
17275 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
17276 break;
17277 }
17278 return true;
17279
17280 case ZERO_EXTEND:
17281 /* The zero extensions is often completely free on x86_64, so make
17282 it as cheap as possible. */
17283 if (TARGET_64BIT && mode == DImode
17284 && GET_MODE (XEXP (x, 0)) == SImode)
17285 *total = 1;
17286 else if (TARGET_ZERO_EXTEND_WITH_AND)
17287 *total = ix86_cost->add;
17288 else
17289 *total = ix86_cost->movzx;
17290 return false;
17291
17292 case SIGN_EXTEND:
17293 *total = ix86_cost->movsx;
17294 return false;
17295
17296 case ASHIFT:
17297 if (GET_CODE (XEXP (x, 1)) == CONST_INT
17298 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
17299 {
17300 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17301 if (value == 1)
17302 {
17303 *total = ix86_cost->add;
17304 return false;
17305 }
17306 if ((value == 2 || value == 3)
17307 && ix86_cost->lea <= ix86_cost->shift_const)
17308 {
17309 *total = ix86_cost->lea;
17310 return false;
17311 }
17312 }
17313 /* FALLTHRU */
17314
17315 case ROTATE:
17316 case ASHIFTRT:
17317 case LSHIFTRT:
17318 case ROTATERT:
17319 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
17320 {
17321 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17322 {
17323 if (INTVAL (XEXP (x, 1)) > 32)
17324 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
17325 else
17326 *total = ix86_cost->shift_const * 2;
17327 }
17328 else
17329 {
17330 if (GET_CODE (XEXP (x, 1)) == AND)
17331 *total = ix86_cost->shift_var * 2;
17332 else
17333 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
17334 }
17335 }
17336 else
17337 {
17338 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17339 *total = ix86_cost->shift_const;
17340 else
17341 *total = ix86_cost->shift_var;
17342 }
17343 return false;
17344
17345 case MULT:
17346 if (FLOAT_MODE_P (mode))
17347 {
17348 *total = ix86_cost->fmul;
17349 return false;
17350 }
17351 else
17352 {
17353 rtx op0 = XEXP (x, 0);
17354 rtx op1 = XEXP (x, 1);
17355 int nbits;
17356 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17357 {
17358 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17359 for (nbits = 0; value != 0; value &= value - 1)
17360 nbits++;
17361 }
17362 else
17363 /* This is arbitrary. */
17364 nbits = 7;
17365
17366 /* Compute costs correctly for widening multiplication. */
17367 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
17368 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
17369 == GET_MODE_SIZE (mode))
17370 {
17371 int is_mulwiden = 0;
17372 enum machine_mode inner_mode = GET_MODE (op0);
17373
17374 if (GET_CODE (op0) == GET_CODE (op1))
17375 is_mulwiden = 1, op1 = XEXP (op1, 0);
17376 else if (GET_CODE (op1) == CONST_INT)
17377 {
17378 if (GET_CODE (op0) == SIGN_EXTEND)
17379 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
17380 == INTVAL (op1);
17381 else
17382 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
17383 }
17384
17385 if (is_mulwiden)
17386 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
17387 }
17388
17389 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
17390 + nbits * ix86_cost->mult_bit
17391 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
17392
17393 return true;
17394 }
17395
17396 case DIV:
17397 case UDIV:
17398 case MOD:
17399 case UMOD:
17400 if (FLOAT_MODE_P (mode))
17401 *total = ix86_cost->fdiv;
17402 else
17403 *total = ix86_cost->divide[MODE_INDEX (mode)];
17404 return false;
17405
17406 case PLUS:
17407 if (FLOAT_MODE_P (mode))
17408 *total = ix86_cost->fadd;
17409 else if (GET_MODE_CLASS (mode) == MODE_INT
17410 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
17411 {
17412 if (GET_CODE (XEXP (x, 0)) == PLUS
17413 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
17414 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
17415 && CONSTANT_P (XEXP (x, 1)))
17416 {
17417 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
17418 if (val == 2 || val == 4 || val == 8)
17419 {
17420 *total = ix86_cost->lea;
17421 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17422 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
17423 outer_code);
17424 *total += rtx_cost (XEXP (x, 1), outer_code);
17425 return true;
17426 }
17427 }
17428 else if (GET_CODE (XEXP (x, 0)) == MULT
17429 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
17430 {
17431 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
17432 if (val == 2 || val == 4 || val == 8)
17433 {
17434 *total = ix86_cost->lea;
17435 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17436 *total += rtx_cost (XEXP (x, 1), outer_code);
17437 return true;
17438 }
17439 }
17440 else if (GET_CODE (XEXP (x, 0)) == PLUS)
17441 {
17442 *total = ix86_cost->lea;
17443 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17444 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17445 *total += rtx_cost (XEXP (x, 1), outer_code);
17446 return true;
17447 }
17448 }
17449 /* FALLTHRU */
17450
17451 case MINUS:
17452 if (FLOAT_MODE_P (mode))
17453 {
17454 *total = ix86_cost->fadd;
17455 return false;
17456 }
17457 /* FALLTHRU */
17458
17459 case AND:
17460 case IOR:
17461 case XOR:
17462 if (!TARGET_64BIT && mode == DImode)
17463 {
17464 *total = (ix86_cost->add * 2
17465 + (rtx_cost (XEXP (x, 0), outer_code)
17466 << (GET_MODE (XEXP (x, 0)) != DImode))
17467 + (rtx_cost (XEXP (x, 1), outer_code)
17468 << (GET_MODE (XEXP (x, 1)) != DImode)));
17469 return true;
17470 }
17471 /* FALLTHRU */
17472
17473 case NEG:
17474 if (FLOAT_MODE_P (mode))
17475 {
17476 *total = ix86_cost->fchs;
17477 return false;
17478 }
17479 /* FALLTHRU */
17480
17481 case NOT:
17482 if (!TARGET_64BIT && mode == DImode)
17483 *total = ix86_cost->add * 2;
17484 else
17485 *total = ix86_cost->add;
17486 return false;
17487
17488 case COMPARE:
17489 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
17490 && XEXP (XEXP (x, 0), 1) == const1_rtx
17491 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
17492 && XEXP (x, 1) == const0_rtx)
17493 {
17494 /* This kind of construct is implemented using test[bwl].
17495 Treat it as if we had an AND. */
17496 *total = (ix86_cost->add
17497 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
17498 + rtx_cost (const1_rtx, outer_code));
17499 return true;
17500 }
17501 return false;
17502
17503 case FLOAT_EXTEND:
17504 if (!TARGET_SSE_MATH
17505 || mode == XFmode
17506 || (mode == DFmode && !TARGET_SSE2))
17507 /* For standard 80387 constants, raise the cost to prevent
17508 compress_float_constant() to generate load from memory. */
17509 switch (standard_80387_constant_p (XEXP (x, 0)))
17510 {
17511 case -1:
17512 case 0:
17513 *total = 0;
17514 break;
17515 case 1: /* 0.0 */
17516 *total = 1;
17517 break;
17518 default:
17519 *total = (x86_ext_80387_constants & TUNEMASK
17520 || optimize_size
17521 ? 1 : 0);
17522 }
17523 return false;
17524
17525 case ABS:
17526 if (FLOAT_MODE_P (mode))
17527 *total = ix86_cost->fabs;
17528 return false;
17529
17530 case SQRT:
17531 if (FLOAT_MODE_P (mode))
17532 *total = ix86_cost->fsqrt;
17533 return false;
17534
17535 case UNSPEC:
17536 if (XINT (x, 1) == UNSPEC_TP)
17537 *total = 0;
17538 return false;
17539
17540 default:
17541 return false;
17542 }
17543 }
17544
17545 #if TARGET_MACHO
17546
17547 static int current_machopic_label_num;
17548
17549 /* Given a symbol name and its associated stub, write out the
17550 definition of the stub. */
17551
17552 void
17553 machopic_output_stub (FILE *file, const char *symb, const char *stub)
17554 {
17555 unsigned int length;
17556 char *binder_name, *symbol_name, lazy_ptr_name[32];
17557 int label = ++current_machopic_label_num;
17558
17559 /* For 64-bit we shouldn't get here. */
17560 gcc_assert (!TARGET_64BIT);
17561
17562 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
17563 symb = (*targetm.strip_name_encoding) (symb);
17564
17565 length = strlen (stub);
17566 binder_name = alloca (length + 32);
17567 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
17568
17569 length = strlen (symb);
17570 symbol_name = alloca (length + 32);
17571 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
17572
17573 sprintf (lazy_ptr_name, "L%d$lz", label);
17574
17575 if (MACHOPIC_PURE)
17576 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
17577 else
17578 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
17579
17580 fprintf (file, "%s:\n", stub);
17581 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17582
17583 if (MACHOPIC_PURE)
17584 {
17585 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
17586 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
17587 fprintf (file, "\tjmp\t*%%edx\n");
17588 }
17589 else
17590 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
17591
17592 fprintf (file, "%s:\n", binder_name);
17593
17594 if (MACHOPIC_PURE)
17595 {
17596 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
17597 fprintf (file, "\tpushl\t%%eax\n");
17598 }
17599 else
17600 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
17601
17602 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
17603
17604 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
17605 fprintf (file, "%s:\n", lazy_ptr_name);
17606 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17607 fprintf (file, "\t.long %s\n", binder_name);
17608 }
17609
17610 void
17611 darwin_x86_file_end (void)
17612 {
17613 darwin_file_end ();
17614 ix86_file_end ();
17615 }
17616 #endif /* TARGET_MACHO */
17617
17618 /* Order the registers for register allocator. */
17619
17620 void
17621 x86_order_regs_for_local_alloc (void)
17622 {
17623 int pos = 0;
17624 int i;
17625
17626 /* First allocate the local general purpose registers. */
17627 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17628 if (GENERAL_REGNO_P (i) && call_used_regs[i])
17629 reg_alloc_order [pos++] = i;
17630
17631 /* Global general purpose registers. */
17632 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17633 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
17634 reg_alloc_order [pos++] = i;
17635
17636 /* x87 registers come first in case we are doing FP math
17637 using them. */
17638 if (!TARGET_SSE_MATH)
17639 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17640 reg_alloc_order [pos++] = i;
17641
17642 /* SSE registers. */
17643 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
17644 reg_alloc_order [pos++] = i;
17645 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
17646 reg_alloc_order [pos++] = i;
17647
17648 /* x87 registers. */
17649 if (TARGET_SSE_MATH)
17650 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17651 reg_alloc_order [pos++] = i;
17652
17653 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
17654 reg_alloc_order [pos++] = i;
17655
17656 /* Initialize the rest of array as we do not allocate some registers
17657 at all. */
17658 while (pos < FIRST_PSEUDO_REGISTER)
17659 reg_alloc_order [pos++] = 0;
17660 }
17661
17662 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
17663 struct attribute_spec.handler. */
17664 static tree
17665 ix86_handle_struct_attribute (tree *node, tree name,
17666 tree args ATTRIBUTE_UNUSED,
17667 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
17668 {
17669 tree *type = NULL;
17670 if (DECL_P (*node))
17671 {
17672 if (TREE_CODE (*node) == TYPE_DECL)
17673 type = &TREE_TYPE (*node);
17674 }
17675 else
17676 type = node;
17677
17678 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
17679 || TREE_CODE (*type) == UNION_TYPE)))
17680 {
17681 warning (OPT_Wattributes, "%qs attribute ignored",
17682 IDENTIFIER_POINTER (name));
17683 *no_add_attrs = true;
17684 }
17685
17686 else if ((is_attribute_p ("ms_struct", name)
17687 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
17688 || ((is_attribute_p ("gcc_struct", name)
17689 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
17690 {
17691 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
17692 IDENTIFIER_POINTER (name));
17693 *no_add_attrs = true;
17694 }
17695
17696 return NULL_TREE;
17697 }
17698
17699 static bool
17700 ix86_ms_bitfield_layout_p (tree record_type)
17701 {
17702 return (TARGET_MS_BITFIELD_LAYOUT &&
17703 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
17704 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
17705 }
17706
17707 /* Returns an expression indicating where the this parameter is
17708 located on entry to the FUNCTION. */
17709
17710 static rtx
17711 x86_this_parameter (tree function)
17712 {
17713 tree type = TREE_TYPE (function);
17714
17715 if (TARGET_64BIT)
17716 {
17717 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
17718 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
17719 }
17720
17721 if (ix86_function_regparm (type, function) > 0)
17722 {
17723 tree parm;
17724
17725 parm = TYPE_ARG_TYPES (type);
17726 /* Figure out whether or not the function has a variable number of
17727 arguments. */
17728 for (; parm; parm = TREE_CHAIN (parm))
17729 if (TREE_VALUE (parm) == void_type_node)
17730 break;
17731 /* If not, the this parameter is in the first argument. */
17732 if (parm)
17733 {
17734 int regno = 0;
17735 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
17736 regno = 2;
17737 return gen_rtx_REG (SImode, regno);
17738 }
17739 }
17740
17741 if (aggregate_value_p (TREE_TYPE (type), type))
17742 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
17743 else
17744 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
17745 }
17746
17747 /* Determine whether x86_output_mi_thunk can succeed. */
17748
17749 static bool
17750 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
17751 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
17752 HOST_WIDE_INT vcall_offset, tree function)
17753 {
17754 /* 64-bit can handle anything. */
17755 if (TARGET_64BIT)
17756 return true;
17757
17758 /* For 32-bit, everything's fine if we have one free register. */
17759 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
17760 return true;
17761
17762 /* Need a free register for vcall_offset. */
17763 if (vcall_offset)
17764 return false;
17765
17766 /* Need a free register for GOT references. */
17767 if (flag_pic && !(*targetm.binds_local_p) (function))
17768 return false;
17769
17770 /* Otherwise ok. */
17771 return true;
17772 }
17773
17774 /* Output the assembler code for a thunk function. THUNK_DECL is the
17775 declaration for the thunk function itself, FUNCTION is the decl for
17776 the target function. DELTA is an immediate constant offset to be
17777 added to THIS. If VCALL_OFFSET is nonzero, the word at
17778 *(*this + vcall_offset) should be added to THIS. */
17779
17780 static void
17781 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
17782 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
17783 HOST_WIDE_INT vcall_offset, tree function)
17784 {
17785 rtx xops[3];
17786 rtx this = x86_this_parameter (function);
17787 rtx this_reg, tmp;
17788
17789 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
17790 pull it in now and let DELTA benefit. */
17791 if (REG_P (this))
17792 this_reg = this;
17793 else if (vcall_offset)
17794 {
17795 /* Put the this parameter into %eax. */
17796 xops[0] = this;
17797 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
17798 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17799 }
17800 else
17801 this_reg = NULL_RTX;
17802
17803 /* Adjust the this parameter by a fixed constant. */
17804 if (delta)
17805 {
17806 xops[0] = GEN_INT (delta);
17807 xops[1] = this_reg ? this_reg : this;
17808 if (TARGET_64BIT)
17809 {
17810 if (!x86_64_general_operand (xops[0], DImode))
17811 {
17812 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17813 xops[1] = tmp;
17814 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
17815 xops[0] = tmp;
17816 xops[1] = this;
17817 }
17818 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17819 }
17820 else
17821 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17822 }
17823
17824 /* Adjust the this parameter by a value stored in the vtable. */
17825 if (vcall_offset)
17826 {
17827 if (TARGET_64BIT)
17828 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17829 else
17830 {
17831 int tmp_regno = 2 /* ECX */;
17832 if (lookup_attribute ("fastcall",
17833 TYPE_ATTRIBUTES (TREE_TYPE (function))))
17834 tmp_regno = 0 /* EAX */;
17835 tmp = gen_rtx_REG (SImode, tmp_regno);
17836 }
17837
17838 xops[0] = gen_rtx_MEM (Pmode, this_reg);
17839 xops[1] = tmp;
17840 if (TARGET_64BIT)
17841 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17842 else
17843 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17844
17845 /* Adjust the this parameter. */
17846 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
17847 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
17848 {
17849 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
17850 xops[0] = GEN_INT (vcall_offset);
17851 xops[1] = tmp2;
17852 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17853 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
17854 }
17855 xops[1] = this_reg;
17856 if (TARGET_64BIT)
17857 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17858 else
17859 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17860 }
17861
17862 /* If necessary, drop THIS back to its stack slot. */
17863 if (this_reg && this_reg != this)
17864 {
17865 xops[0] = this_reg;
17866 xops[1] = this;
17867 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17868 }
17869
17870 xops[0] = XEXP (DECL_RTL (function), 0);
17871 if (TARGET_64BIT)
17872 {
17873 if (!flag_pic || (*targetm.binds_local_p) (function))
17874 output_asm_insn ("jmp\t%P0", xops);
17875 else
17876 {
17877 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
17878 tmp = gen_rtx_CONST (Pmode, tmp);
17879 tmp = gen_rtx_MEM (QImode, tmp);
17880 xops[0] = tmp;
17881 output_asm_insn ("jmp\t%A0", xops);
17882 }
17883 }
17884 else
17885 {
17886 if (!flag_pic || (*targetm.binds_local_p) (function))
17887 output_asm_insn ("jmp\t%P0", xops);
17888 else
17889 #if TARGET_MACHO
17890 if (TARGET_MACHO)
17891 {
17892 rtx sym_ref = XEXP (DECL_RTL (function), 0);
17893 tmp = (gen_rtx_SYMBOL_REF
17894 (Pmode,
17895 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
17896 tmp = gen_rtx_MEM (QImode, tmp);
17897 xops[0] = tmp;
17898 output_asm_insn ("jmp\t%0", xops);
17899 }
17900 else
17901 #endif /* TARGET_MACHO */
17902 {
17903 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
17904 output_set_got (tmp, NULL_RTX);
17905
17906 xops[1] = tmp;
17907 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
17908 output_asm_insn ("jmp\t{*}%1", xops);
17909 }
17910 }
17911 }
17912
17913 static void
17914 x86_file_start (void)
17915 {
17916 default_file_start ();
17917 #if TARGET_MACHO
17918 darwin_file_start ();
17919 #endif
17920 if (X86_FILE_START_VERSION_DIRECTIVE)
17921 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
17922 if (X86_FILE_START_FLTUSED)
17923 fputs ("\t.global\t__fltused\n", asm_out_file);
17924 if (ix86_asm_dialect == ASM_INTEL)
17925 fputs ("\t.intel_syntax\n", asm_out_file);
17926 }
17927
17928 int
17929 x86_field_alignment (tree field, int computed)
17930 {
17931 enum machine_mode mode;
17932 tree type = TREE_TYPE (field);
17933
17934 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
17935 return computed;
17936 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
17937 ? get_inner_array_type (type) : type);
17938 if (mode == DFmode || mode == DCmode
17939 || GET_MODE_CLASS (mode) == MODE_INT
17940 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
17941 return MIN (32, computed);
17942 return computed;
17943 }
17944
17945 /* Output assembler code to FILE to increment profiler label # LABELNO
17946 for profiling a function entry. */
17947 void
17948 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
17949 {
17950 if (TARGET_64BIT)
17951 if (flag_pic)
17952 {
17953 #ifndef NO_PROFILE_COUNTERS
17954 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
17955 #endif
17956 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
17957 }
17958 else
17959 {
17960 #ifndef NO_PROFILE_COUNTERS
17961 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
17962 #endif
17963 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17964 }
17965 else if (flag_pic)
17966 {
17967 #ifndef NO_PROFILE_COUNTERS
17968 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
17969 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
17970 #endif
17971 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
17972 }
17973 else
17974 {
17975 #ifndef NO_PROFILE_COUNTERS
17976 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
17977 PROFILE_COUNT_REGISTER);
17978 #endif
17979 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17980 }
17981 }
17982
17983 /* We don't have exact information about the insn sizes, but we may assume
17984 quite safely that we are informed about all 1 byte insns and memory
17985 address sizes. This is enough to eliminate unnecessary padding in
17986 99% of cases. */
17987
17988 static int
17989 min_insn_size (rtx insn)
17990 {
17991 int l = 0;
17992
17993 if (!INSN_P (insn) || !active_insn_p (insn))
17994 return 0;
17995
17996 /* Discard alignments we've emit and jump instructions. */
17997 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
17998 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
17999 return 0;
18000 if (GET_CODE (insn) == JUMP_INSN
18001 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
18002 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
18003 return 0;
18004
18005 /* Important case - calls are always 5 bytes.
18006 It is common to have many calls in the row. */
18007 if (GET_CODE (insn) == CALL_INSN
18008 && symbolic_reference_mentioned_p (PATTERN (insn))
18009 && !SIBLING_CALL_P (insn))
18010 return 5;
18011 if (get_attr_length (insn) <= 1)
18012 return 1;
18013
18014 /* For normal instructions we may rely on the sizes of addresses
18015 and the presence of symbol to require 4 bytes of encoding.
18016 This is not the case for jumps where references are PC relative. */
18017 if (GET_CODE (insn) != JUMP_INSN)
18018 {
18019 l = get_attr_length_address (insn);
18020 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
18021 l = 4;
18022 }
18023 if (l)
18024 return 1+l;
18025 else
18026 return 2;
18027 }
18028
18029 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
18030 window. */
18031
18032 static void
18033 ix86_avoid_jump_misspredicts (void)
18034 {
18035 rtx insn, start = get_insns ();
18036 int nbytes = 0, njumps = 0;
18037 int isjump = 0;
18038
18039 /* Look for all minimal intervals of instructions containing 4 jumps.
18040 The intervals are bounded by START and INSN. NBYTES is the total
18041 size of instructions in the interval including INSN and not including
18042 START. When the NBYTES is smaller than 16 bytes, it is possible
18043 that the end of START and INSN ends up in the same 16byte page.
18044
18045 The smallest offset in the page INSN can start is the case where START
18046 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
18047 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
18048 */
18049 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
18050 {
18051
18052 nbytes += min_insn_size (insn);
18053 if (dump_file)
18054 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
18055 INSN_UID (insn), min_insn_size (insn));
18056 if ((GET_CODE (insn) == JUMP_INSN
18057 && GET_CODE (PATTERN (insn)) != ADDR_VEC
18058 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
18059 || GET_CODE (insn) == CALL_INSN)
18060 njumps++;
18061 else
18062 continue;
18063
18064 while (njumps > 3)
18065 {
18066 start = NEXT_INSN (start);
18067 if ((GET_CODE (start) == JUMP_INSN
18068 && GET_CODE (PATTERN (start)) != ADDR_VEC
18069 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
18070 || GET_CODE (start) == CALL_INSN)
18071 njumps--, isjump = 1;
18072 else
18073 isjump = 0;
18074 nbytes -= min_insn_size (start);
18075 }
18076 gcc_assert (njumps >= 0);
18077 if (dump_file)
18078 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
18079 INSN_UID (start), INSN_UID (insn), nbytes);
18080
18081 if (njumps == 3 && isjump && nbytes < 16)
18082 {
18083 int padsize = 15 - nbytes + min_insn_size (insn);
18084
18085 if (dump_file)
18086 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
18087 INSN_UID (insn), padsize);
18088 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
18089 }
18090 }
18091 }
18092
18093 /* AMD Athlon works faster
18094 when RET is not destination of conditional jump or directly preceded
18095 by other jump instruction. We avoid the penalty by inserting NOP just
18096 before the RET instructions in such cases. */
18097 static void
18098 ix86_pad_returns (void)
18099 {
18100 edge e;
18101 edge_iterator ei;
18102
18103 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
18104 {
18105 basic_block bb = e->src;
18106 rtx ret = BB_END (bb);
18107 rtx prev;
18108 bool replace = false;
18109
18110 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
18111 || !maybe_hot_bb_p (bb))
18112 continue;
18113 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
18114 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
18115 break;
18116 if (prev && GET_CODE (prev) == CODE_LABEL)
18117 {
18118 edge e;
18119 edge_iterator ei;
18120
18121 FOR_EACH_EDGE (e, ei, bb->preds)
18122 if (EDGE_FREQUENCY (e) && e->src->index >= 0
18123 && !(e->flags & EDGE_FALLTHRU))
18124 replace = true;
18125 }
18126 if (!replace)
18127 {
18128 prev = prev_active_insn (ret);
18129 if (prev
18130 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
18131 || GET_CODE (prev) == CALL_INSN))
18132 replace = true;
18133 /* Empty functions get branch mispredict even when the jump destination
18134 is not visible to us. */
18135 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
18136 replace = true;
18137 }
18138 if (replace)
18139 {
18140 emit_insn_before (gen_return_internal_long (), ret);
18141 delete_insn (ret);
18142 }
18143 }
18144 }
18145
18146 /* Implement machine specific optimizations. We implement padding of returns
18147 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
18148 static void
18149 ix86_reorg (void)
18150 {
18151 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
18152 ix86_pad_returns ();
18153 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
18154 ix86_avoid_jump_misspredicts ();
18155 }
18156
18157 /* Return nonzero when QImode register that must be represented via REX prefix
18158 is used. */
18159 bool
18160 x86_extended_QIreg_mentioned_p (rtx insn)
18161 {
18162 int i;
18163 extract_insn_cached (insn);
18164 for (i = 0; i < recog_data.n_operands; i++)
18165 if (REG_P (recog_data.operand[i])
18166 && REGNO (recog_data.operand[i]) >= 4)
18167 return true;
18168 return false;
18169 }
18170
18171 /* Return nonzero when P points to register encoded via REX prefix.
18172 Called via for_each_rtx. */
18173 static int
18174 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
18175 {
18176 unsigned int regno;
18177 if (!REG_P (*p))
18178 return 0;
18179 regno = REGNO (*p);
18180 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
18181 }
18182
18183 /* Return true when INSN mentions register that must be encoded using REX
18184 prefix. */
18185 bool
18186 x86_extended_reg_mentioned_p (rtx insn)
18187 {
18188 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
18189 }
18190
18191 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
18192 optabs would emit if we didn't have TFmode patterns. */
18193
18194 void
18195 x86_emit_floatuns (rtx operands[2])
18196 {
18197 rtx neglab, donelab, i0, i1, f0, in, out;
18198 enum machine_mode mode, inmode;
18199
18200 inmode = GET_MODE (operands[1]);
18201 gcc_assert (inmode == SImode || inmode == DImode);
18202
18203 out = operands[0];
18204 in = force_reg (inmode, operands[1]);
18205 mode = GET_MODE (out);
18206 neglab = gen_label_rtx ();
18207 donelab = gen_label_rtx ();
18208 i1 = gen_reg_rtx (Pmode);
18209 f0 = gen_reg_rtx (mode);
18210
18211 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
18212
18213 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
18214 emit_jump_insn (gen_jump (donelab));
18215 emit_barrier ();
18216
18217 emit_label (neglab);
18218
18219 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18220 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18221 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
18222 expand_float (f0, i0, 0);
18223 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
18224
18225 emit_label (donelab);
18226 }
18227 \f
18228 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18229 with all elements equal to VAR. Return true if successful. */
18230
18231 static bool
18232 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
18233 rtx target, rtx val)
18234 {
18235 enum machine_mode smode, wsmode, wvmode;
18236 rtx x;
18237
18238 switch (mode)
18239 {
18240 case V2SImode:
18241 case V2SFmode:
18242 if (!mmx_ok)
18243 return false;
18244 /* FALLTHRU */
18245
18246 case V2DFmode:
18247 case V2DImode:
18248 case V4SFmode:
18249 case V4SImode:
18250 val = force_reg (GET_MODE_INNER (mode), val);
18251 x = gen_rtx_VEC_DUPLICATE (mode, val);
18252 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18253 return true;
18254
18255 case V4HImode:
18256 if (!mmx_ok)
18257 return false;
18258 if (TARGET_SSE || TARGET_3DNOW_A)
18259 {
18260 val = gen_lowpart (SImode, val);
18261 x = gen_rtx_TRUNCATE (HImode, val);
18262 x = gen_rtx_VEC_DUPLICATE (mode, x);
18263 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18264 return true;
18265 }
18266 else
18267 {
18268 smode = HImode;
18269 wsmode = SImode;
18270 wvmode = V2SImode;
18271 goto widen;
18272 }
18273
18274 case V8QImode:
18275 if (!mmx_ok)
18276 return false;
18277 smode = QImode;
18278 wsmode = HImode;
18279 wvmode = V4HImode;
18280 goto widen;
18281 case V8HImode:
18282 if (TARGET_SSE2)
18283 {
18284 rtx tmp1, tmp2;
18285 /* Extend HImode to SImode using a paradoxical SUBREG. */
18286 tmp1 = gen_reg_rtx (SImode);
18287 emit_move_insn (tmp1, gen_lowpart (SImode, val));
18288 /* Insert the SImode value as low element of V4SImode vector. */
18289 tmp2 = gen_reg_rtx (V4SImode);
18290 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18291 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18292 CONST0_RTX (V4SImode),
18293 const1_rtx);
18294 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18295 /* Cast the V4SImode vector back to a V8HImode vector. */
18296 tmp1 = gen_reg_rtx (V8HImode);
18297 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
18298 /* Duplicate the low short through the whole low SImode word. */
18299 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
18300 /* Cast the V8HImode vector back to a V4SImode vector. */
18301 tmp2 = gen_reg_rtx (V4SImode);
18302 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18303 /* Replicate the low element of the V4SImode vector. */
18304 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18305 /* Cast the V2SImode back to V8HImode, and store in target. */
18306 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
18307 return true;
18308 }
18309 smode = HImode;
18310 wsmode = SImode;
18311 wvmode = V4SImode;
18312 goto widen;
18313 case V16QImode:
18314 if (TARGET_SSE2)
18315 {
18316 rtx tmp1, tmp2;
18317 /* Extend QImode to SImode using a paradoxical SUBREG. */
18318 tmp1 = gen_reg_rtx (SImode);
18319 emit_move_insn (tmp1, gen_lowpart (SImode, val));
18320 /* Insert the SImode value as low element of V4SImode vector. */
18321 tmp2 = gen_reg_rtx (V4SImode);
18322 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18323 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18324 CONST0_RTX (V4SImode),
18325 const1_rtx);
18326 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18327 /* Cast the V4SImode vector back to a V16QImode vector. */
18328 tmp1 = gen_reg_rtx (V16QImode);
18329 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
18330 /* Duplicate the low byte through the whole low SImode word. */
18331 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18332 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18333 /* Cast the V16QImode vector back to a V4SImode vector. */
18334 tmp2 = gen_reg_rtx (V4SImode);
18335 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18336 /* Replicate the low element of the V4SImode vector. */
18337 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18338 /* Cast the V2SImode back to V16QImode, and store in target. */
18339 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
18340 return true;
18341 }
18342 smode = QImode;
18343 wsmode = HImode;
18344 wvmode = V8HImode;
18345 goto widen;
18346 widen:
18347 /* Replicate the value once into the next wider mode and recurse. */
18348 val = convert_modes (wsmode, smode, val, true);
18349 x = expand_simple_binop (wsmode, ASHIFT, val,
18350 GEN_INT (GET_MODE_BITSIZE (smode)),
18351 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18352 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
18353
18354 x = gen_reg_rtx (wvmode);
18355 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
18356 gcc_unreachable ();
18357 emit_move_insn (target, gen_lowpart (mode, x));
18358 return true;
18359
18360 default:
18361 return false;
18362 }
18363 }
18364
18365 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18366 whose ONE_VAR element is VAR, and other elements are zero. Return true
18367 if successful. */
18368
18369 static bool
18370 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
18371 rtx target, rtx var, int one_var)
18372 {
18373 enum machine_mode vsimode;
18374 rtx new_target;
18375 rtx x, tmp;
18376
18377 switch (mode)
18378 {
18379 case V2SFmode:
18380 case V2SImode:
18381 if (!mmx_ok)
18382 return false;
18383 /* FALLTHRU */
18384
18385 case V2DFmode:
18386 case V2DImode:
18387 if (one_var != 0)
18388 return false;
18389 var = force_reg (GET_MODE_INNER (mode), var);
18390 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
18391 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18392 return true;
18393
18394 case V4SFmode:
18395 case V4SImode:
18396 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
18397 new_target = gen_reg_rtx (mode);
18398 else
18399 new_target = target;
18400 var = force_reg (GET_MODE_INNER (mode), var);
18401 x = gen_rtx_VEC_DUPLICATE (mode, var);
18402 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
18403 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
18404 if (one_var != 0)
18405 {
18406 /* We need to shuffle the value to the correct position, so
18407 create a new pseudo to store the intermediate result. */
18408
18409 /* With SSE2, we can use the integer shuffle insns. */
18410 if (mode != V4SFmode && TARGET_SSE2)
18411 {
18412 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
18413 GEN_INT (1),
18414 GEN_INT (one_var == 1 ? 0 : 1),
18415 GEN_INT (one_var == 2 ? 0 : 1),
18416 GEN_INT (one_var == 3 ? 0 : 1)));
18417 if (target != new_target)
18418 emit_move_insn (target, new_target);
18419 return true;
18420 }
18421
18422 /* Otherwise convert the intermediate result to V4SFmode and
18423 use the SSE1 shuffle instructions. */
18424 if (mode != V4SFmode)
18425 {
18426 tmp = gen_reg_rtx (V4SFmode);
18427 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
18428 }
18429 else
18430 tmp = new_target;
18431
18432 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
18433 GEN_INT (1),
18434 GEN_INT (one_var == 1 ? 0 : 1),
18435 GEN_INT (one_var == 2 ? 0+4 : 1+4),
18436 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
18437
18438 if (mode != V4SFmode)
18439 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
18440 else if (tmp != target)
18441 emit_move_insn (target, tmp);
18442 }
18443 else if (target != new_target)
18444 emit_move_insn (target, new_target);
18445 return true;
18446
18447 case V8HImode:
18448 case V16QImode:
18449 vsimode = V4SImode;
18450 goto widen;
18451 case V4HImode:
18452 case V8QImode:
18453 if (!mmx_ok)
18454 return false;
18455 vsimode = V2SImode;
18456 goto widen;
18457 widen:
18458 if (one_var != 0)
18459 return false;
18460
18461 /* Zero extend the variable element to SImode and recurse. */
18462 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
18463
18464 x = gen_reg_rtx (vsimode);
18465 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
18466 var, one_var))
18467 gcc_unreachable ();
18468
18469 emit_move_insn (target, gen_lowpart (mode, x));
18470 return true;
18471
18472 default:
18473 return false;
18474 }
18475 }
18476
18477 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18478 consisting of the values in VALS. It is known that all elements
18479 except ONE_VAR are constants. Return true if successful. */
18480
18481 static bool
18482 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
18483 rtx target, rtx vals, int one_var)
18484 {
18485 rtx var = XVECEXP (vals, 0, one_var);
18486 enum machine_mode wmode;
18487 rtx const_vec, x;
18488
18489 const_vec = copy_rtx (vals);
18490 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
18491 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
18492
18493 switch (mode)
18494 {
18495 case V2DFmode:
18496 case V2DImode:
18497 case V2SFmode:
18498 case V2SImode:
18499 /* For the two element vectors, it's just as easy to use
18500 the general case. */
18501 return false;
18502
18503 case V4SFmode:
18504 case V4SImode:
18505 case V8HImode:
18506 case V4HImode:
18507 break;
18508
18509 case V16QImode:
18510 wmode = V8HImode;
18511 goto widen;
18512 case V8QImode:
18513 wmode = V4HImode;
18514 goto widen;
18515 widen:
18516 /* There's no way to set one QImode entry easily. Combine
18517 the variable value with its adjacent constant value, and
18518 promote to an HImode set. */
18519 x = XVECEXP (vals, 0, one_var ^ 1);
18520 if (one_var & 1)
18521 {
18522 var = convert_modes (HImode, QImode, var, true);
18523 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
18524 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18525 x = GEN_INT (INTVAL (x) & 0xff);
18526 }
18527 else
18528 {
18529 var = convert_modes (HImode, QImode, var, true);
18530 x = gen_int_mode (INTVAL (x) << 8, HImode);
18531 }
18532 if (x != const0_rtx)
18533 var = expand_simple_binop (HImode, IOR, var, x, var,
18534 1, OPTAB_LIB_WIDEN);
18535
18536 x = gen_reg_rtx (wmode);
18537 emit_move_insn (x, gen_lowpart (wmode, const_vec));
18538 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
18539
18540 emit_move_insn (target, gen_lowpart (mode, x));
18541 return true;
18542
18543 default:
18544 return false;
18545 }
18546
18547 emit_move_insn (target, const_vec);
18548 ix86_expand_vector_set (mmx_ok, target, var, one_var);
18549 return true;
18550 }
18551
18552 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
18553 all values variable, and none identical. */
18554
18555 static void
18556 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
18557 rtx target, rtx vals)
18558 {
18559 enum machine_mode half_mode = GET_MODE_INNER (mode);
18560 rtx op0 = NULL, op1 = NULL;
18561 bool use_vec_concat = false;
18562
18563 switch (mode)
18564 {
18565 case V2SFmode:
18566 case V2SImode:
18567 if (!mmx_ok && !TARGET_SSE)
18568 break;
18569 /* FALLTHRU */
18570
18571 case V2DFmode:
18572 case V2DImode:
18573 /* For the two element vectors, we always implement VEC_CONCAT. */
18574 op0 = XVECEXP (vals, 0, 0);
18575 op1 = XVECEXP (vals, 0, 1);
18576 use_vec_concat = true;
18577 break;
18578
18579 case V4SFmode:
18580 half_mode = V2SFmode;
18581 goto half;
18582 case V4SImode:
18583 half_mode = V2SImode;
18584 goto half;
18585 half:
18586 {
18587 rtvec v;
18588
18589 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
18590 Recurse to load the two halves. */
18591
18592 op0 = gen_reg_rtx (half_mode);
18593 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
18594 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
18595
18596 op1 = gen_reg_rtx (half_mode);
18597 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
18598 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
18599
18600 use_vec_concat = true;
18601 }
18602 break;
18603
18604 case V8HImode:
18605 case V16QImode:
18606 case V4HImode:
18607 case V8QImode:
18608 break;
18609
18610 default:
18611 gcc_unreachable ();
18612 }
18613
18614 if (use_vec_concat)
18615 {
18616 if (!register_operand (op0, half_mode))
18617 op0 = force_reg (half_mode, op0);
18618 if (!register_operand (op1, half_mode))
18619 op1 = force_reg (half_mode, op1);
18620
18621 emit_insn (gen_rtx_SET (VOIDmode, target,
18622 gen_rtx_VEC_CONCAT (mode, op0, op1)));
18623 }
18624 else
18625 {
18626 int i, j, n_elts, n_words, n_elt_per_word;
18627 enum machine_mode inner_mode;
18628 rtx words[4], shift;
18629
18630 inner_mode = GET_MODE_INNER (mode);
18631 n_elts = GET_MODE_NUNITS (mode);
18632 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
18633 n_elt_per_word = n_elts / n_words;
18634 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
18635
18636 for (i = 0; i < n_words; ++i)
18637 {
18638 rtx word = NULL_RTX;
18639
18640 for (j = 0; j < n_elt_per_word; ++j)
18641 {
18642 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
18643 elt = convert_modes (word_mode, inner_mode, elt, true);
18644
18645 if (j == 0)
18646 word = elt;
18647 else
18648 {
18649 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
18650 word, 1, OPTAB_LIB_WIDEN);
18651 word = expand_simple_binop (word_mode, IOR, word, elt,
18652 word, 1, OPTAB_LIB_WIDEN);
18653 }
18654 }
18655
18656 words[i] = word;
18657 }
18658
18659 if (n_words == 1)
18660 emit_move_insn (target, gen_lowpart (mode, words[0]));
18661 else if (n_words == 2)
18662 {
18663 rtx tmp = gen_reg_rtx (mode);
18664 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
18665 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
18666 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
18667 emit_move_insn (target, tmp);
18668 }
18669 else if (n_words == 4)
18670 {
18671 rtx tmp = gen_reg_rtx (V4SImode);
18672 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
18673 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
18674 emit_move_insn (target, gen_lowpart (mode, tmp));
18675 }
18676 else
18677 gcc_unreachable ();
18678 }
18679 }
18680
18681 /* Initialize vector TARGET via VALS. Suppress the use of MMX
18682 instructions unless MMX_OK is true. */
18683
18684 void
18685 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
18686 {
18687 enum machine_mode mode = GET_MODE (target);
18688 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18689 int n_elts = GET_MODE_NUNITS (mode);
18690 int n_var = 0, one_var = -1;
18691 bool all_same = true, all_const_zero = true;
18692 int i;
18693 rtx x;
18694
18695 for (i = 0; i < n_elts; ++i)
18696 {
18697 x = XVECEXP (vals, 0, i);
18698 if (!CONSTANT_P (x))
18699 n_var++, one_var = i;
18700 else if (x != CONST0_RTX (inner_mode))
18701 all_const_zero = false;
18702 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
18703 all_same = false;
18704 }
18705
18706 /* Constants are best loaded from the constant pool. */
18707 if (n_var == 0)
18708 {
18709 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
18710 return;
18711 }
18712
18713 /* If all values are identical, broadcast the value. */
18714 if (all_same
18715 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
18716 XVECEXP (vals, 0, 0)))
18717 return;
18718
18719 /* Values where only one field is non-constant are best loaded from
18720 the pool and overwritten via move later. */
18721 if (n_var == 1)
18722 {
18723 if (all_const_zero
18724 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
18725 XVECEXP (vals, 0, one_var),
18726 one_var))
18727 return;
18728
18729 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
18730 return;
18731 }
18732
18733 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
18734 }
18735
18736 void
18737 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
18738 {
18739 enum machine_mode mode = GET_MODE (target);
18740 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18741 bool use_vec_merge = false;
18742 rtx tmp;
18743
18744 switch (mode)
18745 {
18746 case V2SFmode:
18747 case V2SImode:
18748 if (mmx_ok)
18749 {
18750 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
18751 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
18752 if (elt == 0)
18753 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
18754 else
18755 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
18756 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18757 return;
18758 }
18759 break;
18760
18761 case V2DFmode:
18762 case V2DImode:
18763 {
18764 rtx op0, op1;
18765
18766 /* For the two element vectors, we implement a VEC_CONCAT with
18767 the extraction of the other element. */
18768
18769 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
18770 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
18771
18772 if (elt == 0)
18773 op0 = val, op1 = tmp;
18774 else
18775 op0 = tmp, op1 = val;
18776
18777 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
18778 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18779 }
18780 return;
18781
18782 case V4SFmode:
18783 switch (elt)
18784 {
18785 case 0:
18786 use_vec_merge = true;
18787 break;
18788
18789 case 1:
18790 /* tmp = target = A B C D */
18791 tmp = copy_to_reg (target);
18792 /* target = A A B B */
18793 emit_insn (gen_sse_unpcklps (target, target, target));
18794 /* target = X A B B */
18795 ix86_expand_vector_set (false, target, val, 0);
18796 /* target = A X C D */
18797 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18798 GEN_INT (1), GEN_INT (0),
18799 GEN_INT (2+4), GEN_INT (3+4)));
18800 return;
18801
18802 case 2:
18803 /* tmp = target = A B C D */
18804 tmp = copy_to_reg (target);
18805 /* tmp = X B C D */
18806 ix86_expand_vector_set (false, tmp, val, 0);
18807 /* target = A B X D */
18808 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18809 GEN_INT (0), GEN_INT (1),
18810 GEN_INT (0+4), GEN_INT (3+4)));
18811 return;
18812
18813 case 3:
18814 /* tmp = target = A B C D */
18815 tmp = copy_to_reg (target);
18816 /* tmp = X B C D */
18817 ix86_expand_vector_set (false, tmp, val, 0);
18818 /* target = A B X D */
18819 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18820 GEN_INT (0), GEN_INT (1),
18821 GEN_INT (2+4), GEN_INT (0+4)));
18822 return;
18823
18824 default:
18825 gcc_unreachable ();
18826 }
18827 break;
18828
18829 case V4SImode:
18830 /* Element 0 handled by vec_merge below. */
18831 if (elt == 0)
18832 {
18833 use_vec_merge = true;
18834 break;
18835 }
18836
18837 if (TARGET_SSE2)
18838 {
18839 /* With SSE2, use integer shuffles to swap element 0 and ELT,
18840 store into element 0, then shuffle them back. */
18841
18842 rtx order[4];
18843
18844 order[0] = GEN_INT (elt);
18845 order[1] = const1_rtx;
18846 order[2] = const2_rtx;
18847 order[3] = GEN_INT (3);
18848 order[elt] = const0_rtx;
18849
18850 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18851 order[1], order[2], order[3]));
18852
18853 ix86_expand_vector_set (false, target, val, 0);
18854
18855 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18856 order[1], order[2], order[3]));
18857 }
18858 else
18859 {
18860 /* For SSE1, we have to reuse the V4SF code. */
18861 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
18862 gen_lowpart (SFmode, val), elt);
18863 }
18864 return;
18865
18866 case V8HImode:
18867 use_vec_merge = TARGET_SSE2;
18868 break;
18869 case V4HImode:
18870 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18871 break;
18872
18873 case V16QImode:
18874 case V8QImode:
18875 default:
18876 break;
18877 }
18878
18879 if (use_vec_merge)
18880 {
18881 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
18882 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
18883 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18884 }
18885 else
18886 {
18887 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
18888
18889 emit_move_insn (mem, target);
18890
18891 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
18892 emit_move_insn (tmp, val);
18893
18894 emit_move_insn (target, mem);
18895 }
18896 }
18897
18898 void
18899 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
18900 {
18901 enum machine_mode mode = GET_MODE (vec);
18902 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18903 bool use_vec_extr = false;
18904 rtx tmp;
18905
18906 switch (mode)
18907 {
18908 case V2SImode:
18909 case V2SFmode:
18910 if (!mmx_ok)
18911 break;
18912 /* FALLTHRU */
18913
18914 case V2DFmode:
18915 case V2DImode:
18916 use_vec_extr = true;
18917 break;
18918
18919 case V4SFmode:
18920 switch (elt)
18921 {
18922 case 0:
18923 tmp = vec;
18924 break;
18925
18926 case 1:
18927 case 3:
18928 tmp = gen_reg_rtx (mode);
18929 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
18930 GEN_INT (elt), GEN_INT (elt),
18931 GEN_INT (elt+4), GEN_INT (elt+4)));
18932 break;
18933
18934 case 2:
18935 tmp = gen_reg_rtx (mode);
18936 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
18937 break;
18938
18939 default:
18940 gcc_unreachable ();
18941 }
18942 vec = tmp;
18943 use_vec_extr = true;
18944 elt = 0;
18945 break;
18946
18947 case V4SImode:
18948 if (TARGET_SSE2)
18949 {
18950 switch (elt)
18951 {
18952 case 0:
18953 tmp = vec;
18954 break;
18955
18956 case 1:
18957 case 3:
18958 tmp = gen_reg_rtx (mode);
18959 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
18960 GEN_INT (elt), GEN_INT (elt),
18961 GEN_INT (elt), GEN_INT (elt)));
18962 break;
18963
18964 case 2:
18965 tmp = gen_reg_rtx (mode);
18966 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
18967 break;
18968
18969 default:
18970 gcc_unreachable ();
18971 }
18972 vec = tmp;
18973 use_vec_extr = true;
18974 elt = 0;
18975 }
18976 else
18977 {
18978 /* For SSE1, we have to reuse the V4SF code. */
18979 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
18980 gen_lowpart (V4SFmode, vec), elt);
18981 return;
18982 }
18983 break;
18984
18985 case V8HImode:
18986 use_vec_extr = TARGET_SSE2;
18987 break;
18988 case V4HImode:
18989 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18990 break;
18991
18992 case V16QImode:
18993 case V8QImode:
18994 /* ??? Could extract the appropriate HImode element and shift. */
18995 default:
18996 break;
18997 }
18998
18999 if (use_vec_extr)
19000 {
19001 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
19002 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
19003
19004 /* Let the rtl optimizers know about the zero extension performed. */
19005 if (inner_mode == HImode)
19006 {
19007 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
19008 target = gen_lowpart (SImode, target);
19009 }
19010
19011 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19012 }
19013 else
19014 {
19015 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
19016
19017 emit_move_insn (mem, vec);
19018
19019 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
19020 emit_move_insn (target, tmp);
19021 }
19022 }
19023
19024 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
19025 pattern to reduce; DEST is the destination; IN is the input vector. */
19026
19027 void
19028 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
19029 {
19030 rtx tmp1, tmp2, tmp3;
19031
19032 tmp1 = gen_reg_rtx (V4SFmode);
19033 tmp2 = gen_reg_rtx (V4SFmode);
19034 tmp3 = gen_reg_rtx (V4SFmode);
19035
19036 emit_insn (gen_sse_movhlps (tmp1, in, in));
19037 emit_insn (fn (tmp2, tmp1, in));
19038
19039 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
19040 GEN_INT (1), GEN_INT (1),
19041 GEN_INT (1+4), GEN_INT (1+4)));
19042 emit_insn (fn (dest, tmp2, tmp3));
19043 }
19044 \f
19045 /* Target hook for scalar_mode_supported_p. */
19046 static bool
19047 ix86_scalar_mode_supported_p (enum machine_mode mode)
19048 {
19049 if (DECIMAL_FLOAT_MODE_P (mode))
19050 return true;
19051 else
19052 return default_scalar_mode_supported_p (mode);
19053 }
19054
19055 /* Implements target hook vector_mode_supported_p. */
19056 static bool
19057 ix86_vector_mode_supported_p (enum machine_mode mode)
19058 {
19059 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
19060 return true;
19061 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
19062 return true;
19063 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
19064 return true;
19065 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
19066 return true;
19067 return false;
19068 }
19069
19070 /* Worker function for TARGET_MD_ASM_CLOBBERS.
19071
19072 We do this in the new i386 backend to maintain source compatibility
19073 with the old cc0-based compiler. */
19074
19075 static tree
19076 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
19077 tree inputs ATTRIBUTE_UNUSED,
19078 tree clobbers)
19079 {
19080 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
19081 clobbers);
19082 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
19083 clobbers);
19084 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
19085 clobbers);
19086 return clobbers;
19087 }
19088
19089 /* Return true if this goes in small data/bss. */
19090
19091 static bool
19092 ix86_in_large_data_p (tree exp)
19093 {
19094 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
19095 return false;
19096
19097 /* Functions are never large data. */
19098 if (TREE_CODE (exp) == FUNCTION_DECL)
19099 return false;
19100
19101 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
19102 {
19103 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
19104 if (strcmp (section, ".ldata") == 0
19105 || strcmp (section, ".lbss") == 0)
19106 return true;
19107 return false;
19108 }
19109 else
19110 {
19111 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
19112
19113 /* If this is an incomplete type with size 0, then we can't put it
19114 in data because it might be too big when completed. */
19115 if (!size || size > ix86_section_threshold)
19116 return true;
19117 }
19118
19119 return false;
19120 }
19121 static void
19122 ix86_encode_section_info (tree decl, rtx rtl, int first)
19123 {
19124 default_encode_section_info (decl, rtl, first);
19125
19126 if (TREE_CODE (decl) == VAR_DECL
19127 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
19128 && ix86_in_large_data_p (decl))
19129 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
19130 }
19131
19132 /* Worker function for REVERSE_CONDITION. */
19133
19134 enum rtx_code
19135 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
19136 {
19137 return (mode != CCFPmode && mode != CCFPUmode
19138 ? reverse_condition (code)
19139 : reverse_condition_maybe_unordered (code));
19140 }
19141
19142 /* Output code to perform an x87 FP register move, from OPERANDS[1]
19143 to OPERANDS[0]. */
19144
19145 const char *
19146 output_387_reg_move (rtx insn, rtx *operands)
19147 {
19148 if (REG_P (operands[1])
19149 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
19150 {
19151 if (REGNO (operands[0]) == FIRST_STACK_REG)
19152 return output_387_ffreep (operands, 0);
19153 return "fstp\t%y0";
19154 }
19155 if (STACK_TOP_P (operands[0]))
19156 return "fld%z1\t%y1";
19157 return "fst\t%y0";
19158 }
19159
19160 /* Output code to perform a conditional jump to LABEL, if C2 flag in
19161 FP status register is set. */
19162
19163 void
19164 ix86_emit_fp_unordered_jump (rtx label)
19165 {
19166 rtx reg = gen_reg_rtx (HImode);
19167 rtx temp;
19168
19169 emit_insn (gen_x86_fnstsw_1 (reg));
19170
19171 if (TARGET_USE_SAHF)
19172 {
19173 emit_insn (gen_x86_sahf_1 (reg));
19174
19175 temp = gen_rtx_REG (CCmode, FLAGS_REG);
19176 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
19177 }
19178 else
19179 {
19180 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
19181
19182 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19183 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
19184 }
19185
19186 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
19187 gen_rtx_LABEL_REF (VOIDmode, label),
19188 pc_rtx);
19189 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
19190 emit_jump_insn (temp);
19191 }
19192
19193 /* Output code to perform a log1p XFmode calculation. */
19194
19195 void ix86_emit_i387_log1p (rtx op0, rtx op1)
19196 {
19197 rtx label1 = gen_label_rtx ();
19198 rtx label2 = gen_label_rtx ();
19199
19200 rtx tmp = gen_reg_rtx (XFmode);
19201 rtx tmp2 = gen_reg_rtx (XFmode);
19202
19203 emit_insn (gen_absxf2 (tmp, op1));
19204 emit_insn (gen_cmpxf (tmp,
19205 CONST_DOUBLE_FROM_REAL_VALUE (
19206 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
19207 XFmode)));
19208 emit_jump_insn (gen_bge (label1));
19209
19210 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19211 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
19212 emit_jump (label2);
19213
19214 emit_label (label1);
19215 emit_move_insn (tmp, CONST1_RTX (XFmode));
19216 emit_insn (gen_addxf3 (tmp, op1, tmp));
19217 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19218 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
19219
19220 emit_label (label2);
19221 }
19222
19223 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
19224
19225 static void
19226 i386_solaris_elf_named_section (const char *name, unsigned int flags,
19227 tree decl)
19228 {
19229 /* With Binutils 2.15, the "@unwind" marker must be specified on
19230 every occurrence of the ".eh_frame" section, not just the first
19231 one. */
19232 if (TARGET_64BIT
19233 && strcmp (name, ".eh_frame") == 0)
19234 {
19235 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
19236 flags & SECTION_WRITE ? "aw" : "a");
19237 return;
19238 }
19239 default_elf_asm_named_section (name, flags, decl);
19240 }
19241
19242 /* Return the mangling of TYPE if it is an extended fundamental type. */
19243
19244 static const char *
19245 ix86_mangle_fundamental_type (tree type)
19246 {
19247 switch (TYPE_MODE (type))
19248 {
19249 case TFmode:
19250 /* __float128 is "g". */
19251 return "g";
19252 case XFmode:
19253 /* "long double" or __float80 is "e". */
19254 return "e";
19255 default:
19256 return NULL;
19257 }
19258 }
19259
19260 /* For 32-bit code we can save PIC register setup by using
19261 __stack_chk_fail_local hidden function instead of calling
19262 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
19263 register, so it is better to call __stack_chk_fail directly. */
19264
19265 static tree
19266 ix86_stack_protect_fail (void)
19267 {
19268 return TARGET_64BIT
19269 ? default_external_stack_protect_fail ()
19270 : default_hidden_stack_protect_fail ();
19271 }
19272
19273 /* Select a format to encode pointers in exception handling data. CODE
19274 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
19275 true if the symbol may be affected by dynamic relocations.
19276
19277 ??? All x86 object file formats are capable of representing this.
19278 After all, the relocation needed is the same as for the call insn.
19279 Whether or not a particular assembler allows us to enter such, I
19280 guess we'll have to see. */
19281 int
19282 asm_preferred_eh_data_format (int code, int global)
19283 {
19284 if (flag_pic)
19285 {
19286 int type = DW_EH_PE_sdata8;
19287 if (!TARGET_64BIT
19288 || ix86_cmodel == CM_SMALL_PIC
19289 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
19290 type = DW_EH_PE_sdata4;
19291 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
19292 }
19293 if (ix86_cmodel == CM_SMALL
19294 || (ix86_cmodel == CM_MEDIUM && code))
19295 return DW_EH_PE_udata4;
19296 return DW_EH_PE_absptr;
19297 }
19298 \f
19299 /* Expand copysign from SIGN to the positive value ABS_VALUE
19300 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
19301 the sign-bit. */
19302 static void
19303 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
19304 {
19305 enum machine_mode mode = GET_MODE (sign);
19306 rtx sgn = gen_reg_rtx (mode);
19307 if (mask == NULL_RTX)
19308 {
19309 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
19310 if (!VECTOR_MODE_P (mode))
19311 {
19312 /* We need to generate a scalar mode mask in this case. */
19313 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
19314 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
19315 mask = gen_reg_rtx (mode);
19316 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
19317 }
19318 }
19319 else
19320 mask = gen_rtx_NOT (mode, mask);
19321 emit_insn (gen_rtx_SET (VOIDmode, sgn,
19322 gen_rtx_AND (mode, mask, sign)));
19323 emit_insn (gen_rtx_SET (VOIDmode, result,
19324 gen_rtx_IOR (mode, abs_value, sgn)));
19325 }
19326
19327 /* Expand fabs (OP0) and return a new rtx that holds the result. The
19328 mask for masking out the sign-bit is stored in *SMASK, if that is
19329 non-null. */
19330 static rtx
19331 ix86_expand_sse_fabs (rtx op0, rtx *smask)
19332 {
19333 enum machine_mode mode = GET_MODE (op0);
19334 rtx xa, mask;
19335
19336 xa = gen_reg_rtx (mode);
19337 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
19338 if (!VECTOR_MODE_P (mode))
19339 {
19340 /* We need to generate a scalar mode mask in this case. */
19341 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
19342 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
19343 mask = gen_reg_rtx (mode);
19344 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
19345 }
19346 emit_insn (gen_rtx_SET (VOIDmode, xa,
19347 gen_rtx_AND (mode, op0, mask)));
19348
19349 if (smask)
19350 *smask = mask;
19351
19352 return xa;
19353 }
19354
19355 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
19356 swapping the operands if SWAP_OPERANDS is true. The expanded
19357 code is a forward jump to a newly created label in case the
19358 comparison is true. The generated label rtx is returned. */
19359 static rtx
19360 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
19361 bool swap_operands)
19362 {
19363 rtx label, tmp;
19364
19365 if (swap_operands)
19366 {
19367 tmp = op0;
19368 op0 = op1;
19369 op1 = tmp;
19370 }
19371
19372 label = gen_label_rtx ();
19373 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
19374 emit_insn (gen_rtx_SET (VOIDmode, tmp,
19375 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
19376 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
19377 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19378 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
19379 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19380 JUMP_LABEL (tmp) = label;
19381
19382 return label;
19383 }
19384
19385 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
19386 using comparison code CODE. Operands are swapped for the comparison if
19387 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
19388 static rtx
19389 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
19390 bool swap_operands)
19391 {
19392 enum machine_mode mode = GET_MODE (op0);
19393 rtx mask = gen_reg_rtx (mode);
19394
19395 if (swap_operands)
19396 {
19397 rtx tmp = op0;
19398 op0 = op1;
19399 op1 = tmp;
19400 }
19401
19402 if (mode == DFmode)
19403 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
19404 gen_rtx_fmt_ee (code, mode, op0, op1)));
19405 else
19406 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
19407 gen_rtx_fmt_ee (code, mode, op0, op1)));
19408
19409 return mask;
19410 }
19411
19412 /* Generate and return a rtx of mode MODE for 2**n where n is the number
19413 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
19414 static rtx
19415 ix86_gen_TWO52 (enum machine_mode mode)
19416 {
19417 REAL_VALUE_TYPE TWO52r;
19418 rtx TWO52;
19419
19420 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
19421 TWO52 = const_double_from_real_value (TWO52r, mode);
19422 TWO52 = force_reg (mode, TWO52);
19423
19424 return TWO52;
19425 }
19426
19427 /* Expand SSE sequence for computing lround from OP1 storing
19428 into OP0. */
19429 void
19430 ix86_expand_lround (rtx op0, rtx op1)
19431 {
19432 /* C code for the stuff we're doing below:
19433 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
19434 return (long)tmp;
19435 */
19436 enum machine_mode mode = GET_MODE (op1);
19437 const struct real_format *fmt;
19438 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
19439 rtx adj;
19440
19441 /* load nextafter (0.5, 0.0) */
19442 fmt = REAL_MODE_FORMAT (mode);
19443 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
19444 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
19445
19446 /* adj = copysign (0.5, op1) */
19447 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
19448 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
19449
19450 /* adj = op1 + adj */
19451 expand_simple_binop (mode, PLUS, adj, op1, adj, 0, OPTAB_DIRECT);
19452
19453 /* op0 = (imode)adj */
19454 expand_fix (op0, adj, 0);
19455 }
19456
19457 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
19458 into OPERAND0. */
19459 void
19460 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
19461 {
19462 /* C code for the stuff we're doing below (for do_floor):
19463 xi = (long)op1;
19464 xi -= (double)xi > op1 ? 1 : 0;
19465 return xi;
19466 */
19467 enum machine_mode fmode = GET_MODE (op1);
19468 enum machine_mode imode = GET_MODE (op0);
19469 rtx ireg, freg, label;
19470
19471 /* reg = (long)op1 */
19472 ireg = gen_reg_rtx (imode);
19473 expand_fix (ireg, op1, 0);
19474
19475 /* freg = (double)reg */
19476 freg = gen_reg_rtx (fmode);
19477 expand_float (freg, ireg, 0);
19478
19479 /* ireg = (freg > op1) ? ireg - 1 : ireg */
19480 label = ix86_expand_sse_compare_and_jump (UNLE,
19481 freg, op1, !do_floor);
19482 expand_simple_binop (imode, do_floor ? MINUS : PLUS,
19483 ireg, const1_rtx, ireg, 0, OPTAB_DIRECT);
19484 emit_label (label);
19485 LABEL_NUSES (label) = 1;
19486
19487 emit_move_insn (op0, ireg);
19488 }
19489
19490 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
19491 result in OPERAND0. */
19492 void
19493 ix86_expand_rint (rtx operand0, rtx operand1)
19494 {
19495 /* C code for the stuff we're doing below:
19496 xa = fabs (operand1);
19497 if (!isless (xa, 2**52))
19498 return operand1;
19499 xa = xa + 2**52 - 2**52;
19500 return copysign (xa, operand1);
19501 */
19502 enum machine_mode mode = GET_MODE (operand0);
19503 rtx res, xa, label, TWO52, mask;
19504
19505 res = gen_reg_rtx (mode);
19506 emit_move_insn (res, operand1);
19507
19508 /* xa = abs (operand1) */
19509 xa = ix86_expand_sse_fabs (res, &mask);
19510
19511 /* if (!isless (xa, TWO52)) goto label; */
19512 TWO52 = ix86_gen_TWO52 (mode);
19513 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19514
19515 expand_simple_binop (mode, PLUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
19516 expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
19517
19518 ix86_sse_copysign_to_positive (res, xa, res, mask);
19519
19520 emit_label (label);
19521 LABEL_NUSES (label) = 1;
19522
19523 emit_move_insn (operand0, res);
19524 }
19525
19526 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
19527 into OPERAND0. */
19528 void
19529 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
19530 {
19531 /* C code for the stuff we expand below.
19532 double xa = fabs (x), x2;
19533 if (!isless (xa, TWO52))
19534 return x;
19535 xa = xa + TWO52 - TWO52;
19536 x2 = copysign (xa, x);
19537 Compensate. Floor:
19538 if (x2 > x)
19539 x2 -= 1;
19540 Compensate. Ceil:
19541 if (x2 < x)
19542 x2 -= -1;
19543 return x2;
19544 */
19545 enum machine_mode mode = GET_MODE (operand0);
19546 rtx xa, TWO52, tmp, label, one, res, mask;
19547
19548 TWO52 = ix86_gen_TWO52 (mode);
19549
19550 /* Temporary for holding the result, initialized to the input
19551 operand to ease control flow. */
19552 res = gen_reg_rtx (mode);
19553 emit_move_insn (res, operand1);
19554
19555 /* xa = abs (operand1) */
19556 xa = ix86_expand_sse_fabs (res, &mask);
19557
19558 /* if (!isless (xa, TWO52)) goto label; */
19559 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19560
19561 /* xa = xa + TWO52 - TWO52; */
19562 expand_simple_binop (mode, PLUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
19563 expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
19564
19565 /* xa = copysign (xa, operand1) */
19566 ix86_sse_copysign_to_positive (xa, xa, res, mask);
19567
19568 /* generate 1.0 or -1.0 */
19569 one = force_reg (mode,
19570 const_double_from_real_value (do_floor
19571 ? dconst1 : dconstm1, mode));
19572
19573 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
19574 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
19575 emit_insn (gen_rtx_SET (VOIDmode, tmp,
19576 gen_rtx_AND (mode, one, tmp)));
19577 /* We always need to subtract here to preserve signed zero. */
19578 expand_simple_binop (mode, MINUS,
19579 xa, tmp, res, 0, OPTAB_DIRECT);
19580
19581 emit_label (label);
19582 LABEL_NUSES (label) = 1;
19583
19584 emit_move_insn (operand0, res);
19585 }
19586
19587 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
19588 into OPERAND0. */
19589 void
19590 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
19591 {
19592 /* C code for the stuff we expand below.
19593 double xa = fabs (x), x2;
19594 if (!isless (xa, TWO52))
19595 return x;
19596 x2 = (double)(long)x;
19597 Compensate. Floor:
19598 if (x2 > x)
19599 x2 -= 1;
19600 Compensate. Ceil:
19601 if (x2 < x)
19602 x2 += 1;
19603 if (HONOR_SIGNED_ZEROS (mode))
19604 return copysign (x2, x);
19605 return x2;
19606 */
19607 enum machine_mode mode = GET_MODE (operand0);
19608 rtx xa, xi, TWO52, tmp, label, one, res, mask;
19609
19610 TWO52 = ix86_gen_TWO52 (mode);
19611
19612 /* Temporary for holding the result, initialized to the input
19613 operand to ease control flow. */
19614 res = gen_reg_rtx (mode);
19615 emit_move_insn (res, operand1);
19616
19617 /* xa = abs (operand1) */
19618 xa = ix86_expand_sse_fabs (res, &mask);
19619
19620 /* if (!isless (xa, TWO52)) goto label; */
19621 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19622
19623 /* xa = (double)(long)x */
19624 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
19625 expand_fix (xi, res, 0);
19626 expand_float (xa, xi, 0);
19627
19628 /* generate 1.0 */
19629 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
19630
19631 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
19632 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
19633 emit_insn (gen_rtx_SET (VOIDmode, tmp,
19634 gen_rtx_AND (mode, one, tmp)));
19635 expand_simple_binop (mode, do_floor ? MINUS : PLUS,
19636 xa, tmp, res, 0, OPTAB_DIRECT);
19637
19638 if (HONOR_SIGNED_ZEROS (mode))
19639 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
19640
19641 emit_label (label);
19642 LABEL_NUSES (label) = 1;
19643
19644 emit_move_insn (operand0, res);
19645 }
19646
19647 /* Expand SSE sequence for computing round from OPERAND1 storing
19648 into OPERAND0. Sequence that works without relying on DImode truncation
19649 via cvttsd2siq that is only available on 64bit targets. */
19650 void
19651 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
19652 {
19653 /* C code for the stuff we expand below.
19654 double xa = fabs (x), xa2, x2;
19655 if (!isless (xa, TWO52))
19656 return x;
19657 Using the absolute value and copying back sign makes
19658 -0.0 -> -0.0 correct.
19659 xa2 = xa + TWO52 - TWO52;
19660 Compensate.
19661 dxa = xa2 - xa;
19662 if (dxa <= -0.5)
19663 xa2 += 1;
19664 else if (dxa > 0.5)
19665 xa2 -= 1;
19666 x2 = copysign (xa2, x);
19667 return x2;
19668 */
19669 enum machine_mode mode = GET_MODE (operand0);
19670 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
19671
19672 TWO52 = ix86_gen_TWO52 (mode);
19673
19674 /* Temporary for holding the result, initialized to the input
19675 operand to ease control flow. */
19676 res = gen_reg_rtx (mode);
19677 emit_move_insn (res, operand1);
19678
19679 /* xa = abs (operand1) */
19680 xa = ix86_expand_sse_fabs (res, &mask);
19681
19682 /* if (!isless (xa, TWO52)) goto label; */
19683 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19684
19685 /* xa2 = xa + TWO52 - TWO52; */
19686 xa2 = gen_reg_rtx (mode);
19687 expand_simple_binop (mode, PLUS, xa, TWO52, xa2, 0, OPTAB_DIRECT);
19688 expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
19689
19690 /* dxa = xa2 - xa; */
19691 dxa = gen_reg_rtx (mode);
19692 expand_simple_binop (mode, MINUS, xa2, xa, dxa, 0, OPTAB_DIRECT);
19693
19694 /* generate 0.5, 1.0 and -0.5 */
19695 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
19696 one = gen_reg_rtx (mode);
19697 expand_simple_binop (mode, PLUS, half, half, one, 0, OPTAB_DIRECT);
19698 mhalf = gen_reg_rtx (mode);
19699 expand_simple_binop (mode, MINUS, half, one, mhalf, 0, OPTAB_DIRECT);
19700
19701 /* Compensate. */
19702 tmp = gen_reg_rtx (mode);
19703 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
19704 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
19705 emit_insn (gen_rtx_SET (VOIDmode, tmp,
19706 gen_rtx_AND (mode, one, tmp)));
19707 expand_simple_binop (mode, MINUS, xa2, tmp, xa2, 0, OPTAB_DIRECT);
19708 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
19709 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
19710 emit_insn (gen_rtx_SET (VOIDmode, tmp,
19711 gen_rtx_AND (mode, one, tmp)));
19712 expand_simple_binop (mode, PLUS, xa2, tmp, xa2, 0, OPTAB_DIRECT);
19713
19714 /* res = copysign (xa2, operand1) */
19715 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
19716
19717 emit_label (label);
19718 LABEL_NUSES (label) = 1;
19719
19720 emit_move_insn (operand0, res);
19721 }
19722
19723 /* Expand SSE sequence for computing trunc from OPERAND1 storing
19724 into OPERAND0. */
19725 void
19726 ix86_expand_trunc (rtx operand0, rtx operand1)
19727 {
19728 /* C code for SSE variant we expand below.
19729 double xa = fabs (x), x2;
19730 if (!isless (xa, TWO52))
19731 return x;
19732 x2 = (double)(long)x;
19733 if (HONOR_SIGNED_ZEROS (mode))
19734 return copysign (x2, x);
19735 return x2;
19736 */
19737 enum machine_mode mode = GET_MODE (operand0);
19738 rtx xa, xi, TWO52, label, res, mask;
19739
19740 TWO52 = ix86_gen_TWO52 (mode);
19741
19742 /* Temporary for holding the result, initialized to the input
19743 operand to ease control flow. */
19744 res = gen_reg_rtx (mode);
19745 emit_move_insn (res, operand1);
19746
19747 /* xa = abs (operand1) */
19748 xa = ix86_expand_sse_fabs (res, &mask);
19749
19750 /* if (!isless (xa, TWO52)) goto label; */
19751 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19752
19753 /* x = (double)(long)x */
19754 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
19755 expand_fix (xi, res, 0);
19756 expand_float (res, xi, 0);
19757
19758 if (HONOR_SIGNED_ZEROS (mode))
19759 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
19760
19761 emit_label (label);
19762 LABEL_NUSES (label) = 1;
19763
19764 emit_move_insn (operand0, res);
19765 }
19766
19767 /* Expand SSE sequence for computing trunc from OPERAND1 storing
19768 into OPERAND0. */
19769 void
19770 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
19771 {
19772 enum machine_mode mode = GET_MODE (operand0);
19773 rtx xa, mask, TWO52, label, one, res, smask;
19774
19775 /* C code for SSE variant we expand below.
19776 double xa = fabs (x), x2;
19777 if (!isless (xa, TWO52))
19778 return x;
19779 xa2 = xa + TWO52 - TWO52;
19780 Compensate:
19781 if (xa2 > xa)
19782 xa2 -= 1.0;
19783 x2 = copysign (xa2, x);
19784 return x2;
19785 */
19786
19787 TWO52 = ix86_gen_TWO52 (mode);
19788
19789 /* Temporary for holding the result, initialized to the input
19790 operand to ease control flow. */
19791 res = gen_reg_rtx (mode);
19792 emit_move_insn (res, operand1);
19793
19794 /* xa = abs (operand1) */
19795 xa = ix86_expand_sse_fabs (res, &smask);
19796
19797 /* if (!isless (xa, TWO52)) goto label; */
19798 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19799
19800 /* res = xa + TWO52 - TWO52; */
19801 expand_simple_binop (mode, PLUS, xa, TWO52, res, 0, OPTAB_DIRECT);
19802 expand_simple_binop (mode, MINUS, res, TWO52, res, 0, OPTAB_DIRECT);
19803
19804 /* generate 1.0 */
19805 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
19806
19807 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
19808 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
19809 emit_insn (gen_rtx_SET (VOIDmode, mask,
19810 gen_rtx_AND (mode, mask, one)));
19811 expand_simple_binop (mode, MINUS,
19812 res, mask, res, 0, OPTAB_DIRECT);
19813
19814 /* res = copysign (res, operand1) */
19815 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
19816
19817 emit_label (label);
19818 LABEL_NUSES (label) = 1;
19819
19820 emit_move_insn (operand0, res);
19821 }
19822
19823 /* Expand SSE sequence for computing round from OPERAND1 storing
19824 into OPERAND0. */
19825 void
19826 ix86_expand_round (rtx operand0, rtx operand1)
19827 {
19828 /* C code for the stuff we're doing below:
19829 double xa = fabs (x);
19830 if (!isless (xa, TWO52))
19831 return x;
19832 xa = (double)(long)(xa + nextafter (0.5, 0.0));
19833 return copysign (xa, x);
19834 */
19835 enum machine_mode mode = GET_MODE (operand0);
19836 rtx res, TWO52, xa, label, xi, half, mask;
19837 const struct real_format *fmt;
19838 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
19839
19840 /* Temporary for holding the result, initialized to the input
19841 operand to ease control flow. */
19842 res = gen_reg_rtx (mode);
19843 emit_move_insn (res, operand1);
19844
19845 TWO52 = ix86_gen_TWO52 (mode);
19846 xa = ix86_expand_sse_fabs (res, &mask);
19847 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19848
19849 /* load nextafter (0.5, 0.0) */
19850 fmt = REAL_MODE_FORMAT (mode);
19851 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
19852 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
19853
19854 /* xa = xa + 0.5 */
19855 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
19856 expand_simple_binop (mode, PLUS, xa, half, xa, 0, OPTAB_DIRECT);
19857
19858 /* xa = (double)(int64_t)xa */
19859 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
19860 expand_fix (xi, xa, 0);
19861 expand_float (xa, xi, 0);
19862
19863 /* res = copysign (xa, operand1) */
19864 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
19865
19866 emit_label (label);
19867 LABEL_NUSES (label) = 1;
19868
19869 emit_move_insn (operand0, res);
19870 }
19871
19872 #include "gt-i386.h"