cse.c (cse_process_notes): Copy the propagated value.
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51 #include "dwarf2.h"
52 #include "tm-constrs.h"
53
54 #ifndef CHECK_STACK_LIMIT
55 #define CHECK_STACK_LIMIT (-1)
56 #endif
57
58 /* Return index of given mode in mult and division cost tables. */
59 #define MODE_INDEX(mode) \
60 ((mode) == QImode ? 0 \
61 : (mode) == HImode ? 1 \
62 : (mode) == SImode ? 2 \
63 : (mode) == DImode ? 3 \
64 : 4)
65
66 /* Processor costs (relative to an add) */
67 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
68 #define COSTS_N_BYTES(N) ((N) * 2)
69
70 static const
71 struct processor_costs size_cost = { /* costs for tuning for size */
72 COSTS_N_BYTES (2), /* cost of an add instruction */
73 COSTS_N_BYTES (3), /* cost of a lea instruction */
74 COSTS_N_BYTES (2), /* variable shift costs */
75 COSTS_N_BYTES (3), /* constant shift costs */
76 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
77 COSTS_N_BYTES (3), /* HI */
78 COSTS_N_BYTES (3), /* SI */
79 COSTS_N_BYTES (3), /* DI */
80 COSTS_N_BYTES (5)}, /* other */
81 0, /* cost of multiply per each bit set */
82 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
83 COSTS_N_BYTES (3), /* HI */
84 COSTS_N_BYTES (3), /* SI */
85 COSTS_N_BYTES (3), /* DI */
86 COSTS_N_BYTES (5)}, /* other */
87 COSTS_N_BYTES (3), /* cost of movsx */
88 COSTS_N_BYTES (3), /* cost of movzx */
89 0, /* "large" insn */
90 2, /* MOVE_RATIO */
91 2, /* cost for loading QImode using movzbl */
92 {2, 2, 2}, /* cost of loading integer registers
93 in QImode, HImode and SImode.
94 Relative to reg-reg move (2). */
95 {2, 2, 2}, /* cost of storing integer registers */
96 2, /* cost of reg,reg fld/fst */
97 {2, 2, 2}, /* cost of loading fp registers
98 in SFmode, DFmode and XFmode */
99 {2, 2, 2}, /* cost of storing fp registers
100 in SFmode, DFmode and XFmode */
101 3, /* cost of moving MMX register */
102 {3, 3}, /* cost of loading MMX registers
103 in SImode and DImode */
104 {3, 3}, /* cost of storing MMX registers
105 in SImode and DImode */
106 3, /* cost of moving SSE register */
107 {3, 3, 3}, /* cost of loading SSE registers
108 in SImode, DImode and TImode */
109 {3, 3, 3}, /* cost of storing SSE registers
110 in SImode, DImode and TImode */
111 3, /* MMX or SSE register to integer */
112 0, /* size of prefetch block */
113 0, /* number of parallel prefetches */
114 2, /* Branch cost */
115 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
116 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
117 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
118 COSTS_N_BYTES (2), /* cost of FABS instruction. */
119 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
120 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
121 };
122
123 /* Processor costs (relative to an add) */
124 static const
125 struct processor_costs i386_cost = { /* 386 specific costs */
126 COSTS_N_INSNS (1), /* cost of an add instruction */
127 COSTS_N_INSNS (1), /* cost of a lea instruction */
128 COSTS_N_INSNS (3), /* variable shift costs */
129 COSTS_N_INSNS (2), /* constant shift costs */
130 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
131 COSTS_N_INSNS (6), /* HI */
132 COSTS_N_INSNS (6), /* SI */
133 COSTS_N_INSNS (6), /* DI */
134 COSTS_N_INSNS (6)}, /* other */
135 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
136 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
137 COSTS_N_INSNS (23), /* HI */
138 COSTS_N_INSNS (23), /* SI */
139 COSTS_N_INSNS (23), /* DI */
140 COSTS_N_INSNS (23)}, /* other */
141 COSTS_N_INSNS (3), /* cost of movsx */
142 COSTS_N_INSNS (2), /* cost of movzx */
143 15, /* "large" insn */
144 3, /* MOVE_RATIO */
145 4, /* cost for loading QImode using movzbl */
146 {2, 4, 2}, /* cost of loading integer registers
147 in QImode, HImode and SImode.
148 Relative to reg-reg move (2). */
149 {2, 4, 2}, /* cost of storing integer registers */
150 2, /* cost of reg,reg fld/fst */
151 {8, 8, 8}, /* cost of loading fp registers
152 in SFmode, DFmode and XFmode */
153 {8, 8, 8}, /* cost of storing fp registers
154 in SFmode, DFmode and XFmode */
155 2, /* cost of moving MMX register */
156 {4, 8}, /* cost of loading MMX registers
157 in SImode and DImode */
158 {4, 8}, /* cost of storing MMX registers
159 in SImode and DImode */
160 2, /* cost of moving SSE register */
161 {4, 8, 16}, /* cost of loading SSE registers
162 in SImode, DImode and TImode */
163 {4, 8, 16}, /* cost of storing SSE registers
164 in SImode, DImode and TImode */
165 3, /* MMX or SSE register to integer */
166 0, /* size of prefetch block */
167 0, /* number of parallel prefetches */
168 1, /* Branch cost */
169 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
170 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
171 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
172 COSTS_N_INSNS (22), /* cost of FABS instruction. */
173 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
174 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
175 };
176
177 static const
178 struct processor_costs i486_cost = { /* 486 specific costs */
179 COSTS_N_INSNS (1), /* cost of an add instruction */
180 COSTS_N_INSNS (1), /* cost of a lea instruction */
181 COSTS_N_INSNS (3), /* variable shift costs */
182 COSTS_N_INSNS (2), /* constant shift costs */
183 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
184 COSTS_N_INSNS (12), /* HI */
185 COSTS_N_INSNS (12), /* SI */
186 COSTS_N_INSNS (12), /* DI */
187 COSTS_N_INSNS (12)}, /* other */
188 1, /* cost of multiply per each bit set */
189 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
190 COSTS_N_INSNS (40), /* HI */
191 COSTS_N_INSNS (40), /* SI */
192 COSTS_N_INSNS (40), /* DI */
193 COSTS_N_INSNS (40)}, /* other */
194 COSTS_N_INSNS (3), /* cost of movsx */
195 COSTS_N_INSNS (2), /* cost of movzx */
196 15, /* "large" insn */
197 3, /* MOVE_RATIO */
198 4, /* cost for loading QImode using movzbl */
199 {2, 4, 2}, /* cost of loading integer registers
200 in QImode, HImode and SImode.
201 Relative to reg-reg move (2). */
202 {2, 4, 2}, /* cost of storing integer registers */
203 2, /* cost of reg,reg fld/fst */
204 {8, 8, 8}, /* cost of loading fp registers
205 in SFmode, DFmode and XFmode */
206 {8, 8, 8}, /* cost of storing fp registers
207 in SFmode, DFmode and XFmode */
208 2, /* cost of moving MMX register */
209 {4, 8}, /* cost of loading MMX registers
210 in SImode and DImode */
211 {4, 8}, /* cost of storing MMX registers
212 in SImode and DImode */
213 2, /* cost of moving SSE register */
214 {4, 8, 16}, /* cost of loading SSE registers
215 in SImode, DImode and TImode */
216 {4, 8, 16}, /* cost of storing SSE registers
217 in SImode, DImode and TImode */
218 3, /* MMX or SSE register to integer */
219 0, /* size of prefetch block */
220 0, /* number of parallel prefetches */
221 1, /* Branch cost */
222 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
223 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
224 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
225 COSTS_N_INSNS (3), /* cost of FABS instruction. */
226 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
227 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
228 };
229
230 static const
231 struct processor_costs pentium_cost = {
232 COSTS_N_INSNS (1), /* cost of an add instruction */
233 COSTS_N_INSNS (1), /* cost of a lea instruction */
234 COSTS_N_INSNS (4), /* variable shift costs */
235 COSTS_N_INSNS (1), /* constant shift costs */
236 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
237 COSTS_N_INSNS (11), /* HI */
238 COSTS_N_INSNS (11), /* SI */
239 COSTS_N_INSNS (11), /* DI */
240 COSTS_N_INSNS (11)}, /* other */
241 0, /* cost of multiply per each bit set */
242 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
243 COSTS_N_INSNS (25), /* HI */
244 COSTS_N_INSNS (25), /* SI */
245 COSTS_N_INSNS (25), /* DI */
246 COSTS_N_INSNS (25)}, /* other */
247 COSTS_N_INSNS (3), /* cost of movsx */
248 COSTS_N_INSNS (2), /* cost of movzx */
249 8, /* "large" insn */
250 6, /* MOVE_RATIO */
251 6, /* cost for loading QImode using movzbl */
252 {2, 4, 2}, /* cost of loading integer registers
253 in QImode, HImode and SImode.
254 Relative to reg-reg move (2). */
255 {2, 4, 2}, /* cost of storing integer registers */
256 2, /* cost of reg,reg fld/fst */
257 {2, 2, 6}, /* cost of loading fp registers
258 in SFmode, DFmode and XFmode */
259 {4, 4, 6}, /* cost of storing fp registers
260 in SFmode, DFmode and XFmode */
261 8, /* cost of moving MMX register */
262 {8, 8}, /* cost of loading MMX registers
263 in SImode and DImode */
264 {8, 8}, /* cost of storing MMX registers
265 in SImode and DImode */
266 2, /* cost of moving SSE register */
267 {4, 8, 16}, /* cost of loading SSE registers
268 in SImode, DImode and TImode */
269 {4, 8, 16}, /* cost of storing SSE registers
270 in SImode, DImode and TImode */
271 3, /* MMX or SSE register to integer */
272 0, /* size of prefetch block */
273 0, /* number of parallel prefetches */
274 2, /* Branch cost */
275 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
276 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
277 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
278 COSTS_N_INSNS (1), /* cost of FABS instruction. */
279 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
280 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
281 };
282
283 static const
284 struct processor_costs pentiumpro_cost = {
285 COSTS_N_INSNS (1), /* cost of an add instruction */
286 COSTS_N_INSNS (1), /* cost of a lea instruction */
287 COSTS_N_INSNS (1), /* variable shift costs */
288 COSTS_N_INSNS (1), /* constant shift costs */
289 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
290 COSTS_N_INSNS (4), /* HI */
291 COSTS_N_INSNS (4), /* SI */
292 COSTS_N_INSNS (4), /* DI */
293 COSTS_N_INSNS (4)}, /* other */
294 0, /* cost of multiply per each bit set */
295 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
296 COSTS_N_INSNS (17), /* HI */
297 COSTS_N_INSNS (17), /* SI */
298 COSTS_N_INSNS (17), /* DI */
299 COSTS_N_INSNS (17)}, /* other */
300 COSTS_N_INSNS (1), /* cost of movsx */
301 COSTS_N_INSNS (1), /* cost of movzx */
302 8, /* "large" insn */
303 6, /* MOVE_RATIO */
304 2, /* cost for loading QImode using movzbl */
305 {4, 4, 4}, /* cost of loading integer registers
306 in QImode, HImode and SImode.
307 Relative to reg-reg move (2). */
308 {2, 2, 2}, /* cost of storing integer registers */
309 2, /* cost of reg,reg fld/fst */
310 {2, 2, 6}, /* cost of loading fp registers
311 in SFmode, DFmode and XFmode */
312 {4, 4, 6}, /* cost of storing fp registers
313 in SFmode, DFmode and XFmode */
314 2, /* cost of moving MMX register */
315 {2, 2}, /* cost of loading MMX registers
316 in SImode and DImode */
317 {2, 2}, /* cost of storing MMX registers
318 in SImode and DImode */
319 2, /* cost of moving SSE register */
320 {2, 2, 8}, /* cost of loading SSE registers
321 in SImode, DImode and TImode */
322 {2, 2, 8}, /* cost of storing SSE registers
323 in SImode, DImode and TImode */
324 3, /* MMX or SSE register to integer */
325 32, /* size of prefetch block */
326 6, /* number of parallel prefetches */
327 2, /* Branch cost */
328 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
329 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
330 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
331 COSTS_N_INSNS (2), /* cost of FABS instruction. */
332 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
333 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
334 };
335
336 static const
337 struct processor_costs geode_cost = {
338 COSTS_N_INSNS (1), /* cost of an add instruction */
339 COSTS_N_INSNS (1), /* cost of a lea instruction */
340 COSTS_N_INSNS (2), /* variable shift costs */
341 COSTS_N_INSNS (1), /* constant shift costs */
342 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
343 COSTS_N_INSNS (4), /* HI */
344 COSTS_N_INSNS (7), /* SI */
345 COSTS_N_INSNS (7), /* DI */
346 COSTS_N_INSNS (7)}, /* other */
347 0, /* cost of multiply per each bit set */
348 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
349 COSTS_N_INSNS (23), /* HI */
350 COSTS_N_INSNS (39), /* SI */
351 COSTS_N_INSNS (39), /* DI */
352 COSTS_N_INSNS (39)}, /* other */
353 COSTS_N_INSNS (1), /* cost of movsx */
354 COSTS_N_INSNS (1), /* cost of movzx */
355 8, /* "large" insn */
356 4, /* MOVE_RATIO */
357 1, /* cost for loading QImode using movzbl */
358 {1, 1, 1}, /* cost of loading integer registers
359 in QImode, HImode and SImode.
360 Relative to reg-reg move (2). */
361 {1, 1, 1}, /* cost of storing integer registers */
362 1, /* cost of reg,reg fld/fst */
363 {1, 1, 1}, /* cost of loading fp registers
364 in SFmode, DFmode and XFmode */
365 {4, 6, 6}, /* cost of storing fp registers
366 in SFmode, DFmode and XFmode */
367
368 1, /* cost of moving MMX register */
369 {1, 1}, /* cost of loading MMX registers
370 in SImode and DImode */
371 {1, 1}, /* cost of storing MMX registers
372 in SImode and DImode */
373 1, /* cost of moving SSE register */
374 {1, 1, 1}, /* cost of loading SSE registers
375 in SImode, DImode and TImode */
376 {1, 1, 1}, /* cost of storing SSE registers
377 in SImode, DImode and TImode */
378 1, /* MMX or SSE register to integer */
379 32, /* size of prefetch block */
380 1, /* number of parallel prefetches */
381 1, /* Branch cost */
382 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
383 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
384 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
385 COSTS_N_INSNS (1), /* cost of FABS instruction. */
386 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
387 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
388 };
389
390 static const
391 struct processor_costs k6_cost = {
392 COSTS_N_INSNS (1), /* cost of an add instruction */
393 COSTS_N_INSNS (2), /* cost of a lea instruction */
394 COSTS_N_INSNS (1), /* variable shift costs */
395 COSTS_N_INSNS (1), /* constant shift costs */
396 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
397 COSTS_N_INSNS (3), /* HI */
398 COSTS_N_INSNS (3), /* SI */
399 COSTS_N_INSNS (3), /* DI */
400 COSTS_N_INSNS (3)}, /* other */
401 0, /* cost of multiply per each bit set */
402 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
403 COSTS_N_INSNS (18), /* HI */
404 COSTS_N_INSNS (18), /* SI */
405 COSTS_N_INSNS (18), /* DI */
406 COSTS_N_INSNS (18)}, /* other */
407 COSTS_N_INSNS (2), /* cost of movsx */
408 COSTS_N_INSNS (2), /* cost of movzx */
409 8, /* "large" insn */
410 4, /* MOVE_RATIO */
411 3, /* cost for loading QImode using movzbl */
412 {4, 5, 4}, /* cost of loading integer registers
413 in QImode, HImode and SImode.
414 Relative to reg-reg move (2). */
415 {2, 3, 2}, /* cost of storing integer registers */
416 4, /* cost of reg,reg fld/fst */
417 {6, 6, 6}, /* cost of loading fp registers
418 in SFmode, DFmode and XFmode */
419 {4, 4, 4}, /* cost of storing fp registers
420 in SFmode, DFmode and XFmode */
421 2, /* cost of moving MMX register */
422 {2, 2}, /* cost of loading MMX registers
423 in SImode and DImode */
424 {2, 2}, /* cost of storing MMX registers
425 in SImode and DImode */
426 2, /* cost of moving SSE register */
427 {2, 2, 8}, /* cost of loading SSE registers
428 in SImode, DImode and TImode */
429 {2, 2, 8}, /* cost of storing SSE registers
430 in SImode, DImode and TImode */
431 6, /* MMX or SSE register to integer */
432 32, /* size of prefetch block */
433 1, /* number of parallel prefetches */
434 1, /* Branch cost */
435 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
436 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
437 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
438 COSTS_N_INSNS (2), /* cost of FABS instruction. */
439 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
440 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
441 };
442
443 static const
444 struct processor_costs athlon_cost = {
445 COSTS_N_INSNS (1), /* cost of an add instruction */
446 COSTS_N_INSNS (2), /* cost of a lea instruction */
447 COSTS_N_INSNS (1), /* variable shift costs */
448 COSTS_N_INSNS (1), /* constant shift costs */
449 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
450 COSTS_N_INSNS (5), /* HI */
451 COSTS_N_INSNS (5), /* SI */
452 COSTS_N_INSNS (5), /* DI */
453 COSTS_N_INSNS (5)}, /* other */
454 0, /* cost of multiply per each bit set */
455 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
456 COSTS_N_INSNS (26), /* HI */
457 COSTS_N_INSNS (42), /* SI */
458 COSTS_N_INSNS (74), /* DI */
459 COSTS_N_INSNS (74)}, /* other */
460 COSTS_N_INSNS (1), /* cost of movsx */
461 COSTS_N_INSNS (1), /* cost of movzx */
462 8, /* "large" insn */
463 9, /* MOVE_RATIO */
464 4, /* cost for loading QImode using movzbl */
465 {3, 4, 3}, /* cost of loading integer registers
466 in QImode, HImode and SImode.
467 Relative to reg-reg move (2). */
468 {3, 4, 3}, /* cost of storing integer registers */
469 4, /* cost of reg,reg fld/fst */
470 {4, 4, 12}, /* cost of loading fp registers
471 in SFmode, DFmode and XFmode */
472 {6, 6, 8}, /* cost of storing fp registers
473 in SFmode, DFmode and XFmode */
474 2, /* cost of moving MMX register */
475 {4, 4}, /* cost of loading MMX registers
476 in SImode and DImode */
477 {4, 4}, /* cost of storing MMX registers
478 in SImode and DImode */
479 2, /* cost of moving SSE register */
480 {4, 4, 6}, /* cost of loading SSE registers
481 in SImode, DImode and TImode */
482 {4, 4, 5}, /* cost of storing SSE registers
483 in SImode, DImode and TImode */
484 5, /* MMX or SSE register to integer */
485 64, /* size of prefetch block */
486 6, /* number of parallel prefetches */
487 5, /* Branch cost */
488 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
489 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
490 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
491 COSTS_N_INSNS (2), /* cost of FABS instruction. */
492 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
493 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
494 };
495
496 static const
497 struct processor_costs k8_cost = {
498 COSTS_N_INSNS (1), /* cost of an add instruction */
499 COSTS_N_INSNS (2), /* cost of a lea instruction */
500 COSTS_N_INSNS (1), /* variable shift costs */
501 COSTS_N_INSNS (1), /* constant shift costs */
502 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
503 COSTS_N_INSNS (4), /* HI */
504 COSTS_N_INSNS (3), /* SI */
505 COSTS_N_INSNS (4), /* DI */
506 COSTS_N_INSNS (5)}, /* other */
507 0, /* cost of multiply per each bit set */
508 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
509 COSTS_N_INSNS (26), /* HI */
510 COSTS_N_INSNS (42), /* SI */
511 COSTS_N_INSNS (74), /* DI */
512 COSTS_N_INSNS (74)}, /* other */
513 COSTS_N_INSNS (1), /* cost of movsx */
514 COSTS_N_INSNS (1), /* cost of movzx */
515 8, /* "large" insn */
516 9, /* MOVE_RATIO */
517 4, /* cost for loading QImode using movzbl */
518 {3, 4, 3}, /* cost of loading integer registers
519 in QImode, HImode and SImode.
520 Relative to reg-reg move (2). */
521 {3, 4, 3}, /* cost of storing integer registers */
522 4, /* cost of reg,reg fld/fst */
523 {4, 4, 12}, /* cost of loading fp registers
524 in SFmode, DFmode and XFmode */
525 {6, 6, 8}, /* cost of storing fp registers
526 in SFmode, DFmode and XFmode */
527 2, /* cost of moving MMX register */
528 {3, 3}, /* cost of loading MMX registers
529 in SImode and DImode */
530 {4, 4}, /* cost of storing MMX registers
531 in SImode and DImode */
532 2, /* cost of moving SSE register */
533 {4, 3, 6}, /* cost of loading SSE registers
534 in SImode, DImode and TImode */
535 {4, 4, 5}, /* cost of storing SSE registers
536 in SImode, DImode and TImode */
537 5, /* MMX or SSE register to integer */
538 64, /* size of prefetch block */
539 6, /* number of parallel prefetches */
540 5, /* Branch cost */
541 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
542 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
543 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
544 COSTS_N_INSNS (2), /* cost of FABS instruction. */
545 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
546 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
547 };
548
549 static const
550 struct processor_costs pentium4_cost = {
551 COSTS_N_INSNS (1), /* cost of an add instruction */
552 COSTS_N_INSNS (3), /* cost of a lea instruction */
553 COSTS_N_INSNS (4), /* variable shift costs */
554 COSTS_N_INSNS (4), /* constant shift costs */
555 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
556 COSTS_N_INSNS (15), /* HI */
557 COSTS_N_INSNS (15), /* SI */
558 COSTS_N_INSNS (15), /* DI */
559 COSTS_N_INSNS (15)}, /* other */
560 0, /* cost of multiply per each bit set */
561 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
562 COSTS_N_INSNS (56), /* HI */
563 COSTS_N_INSNS (56), /* SI */
564 COSTS_N_INSNS (56), /* DI */
565 COSTS_N_INSNS (56)}, /* other */
566 COSTS_N_INSNS (1), /* cost of movsx */
567 COSTS_N_INSNS (1), /* cost of movzx */
568 16, /* "large" insn */
569 6, /* MOVE_RATIO */
570 2, /* cost for loading QImode using movzbl */
571 {4, 5, 4}, /* cost of loading integer registers
572 in QImode, HImode and SImode.
573 Relative to reg-reg move (2). */
574 {2, 3, 2}, /* cost of storing integer registers */
575 2, /* cost of reg,reg fld/fst */
576 {2, 2, 6}, /* cost of loading fp registers
577 in SFmode, DFmode and XFmode */
578 {4, 4, 6}, /* cost of storing fp registers
579 in SFmode, DFmode and XFmode */
580 2, /* cost of moving MMX register */
581 {2, 2}, /* cost of loading MMX registers
582 in SImode and DImode */
583 {2, 2}, /* cost of storing MMX registers
584 in SImode and DImode */
585 12, /* cost of moving SSE register */
586 {12, 12, 12}, /* cost of loading SSE registers
587 in SImode, DImode and TImode */
588 {2, 2, 8}, /* cost of storing SSE registers
589 in SImode, DImode and TImode */
590 10, /* MMX or SSE register to integer */
591 64, /* size of prefetch block */
592 6, /* number of parallel prefetches */
593 2, /* Branch cost */
594 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
595 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
596 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
597 COSTS_N_INSNS (2), /* cost of FABS instruction. */
598 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
599 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
600 };
601
602 static const
603 struct processor_costs nocona_cost = {
604 COSTS_N_INSNS (1), /* cost of an add instruction */
605 COSTS_N_INSNS (1), /* cost of a lea instruction */
606 COSTS_N_INSNS (1), /* variable shift costs */
607 COSTS_N_INSNS (1), /* constant shift costs */
608 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
609 COSTS_N_INSNS (10), /* HI */
610 COSTS_N_INSNS (10), /* SI */
611 COSTS_N_INSNS (10), /* DI */
612 COSTS_N_INSNS (10)}, /* other */
613 0, /* cost of multiply per each bit set */
614 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
615 COSTS_N_INSNS (66), /* HI */
616 COSTS_N_INSNS (66), /* SI */
617 COSTS_N_INSNS (66), /* DI */
618 COSTS_N_INSNS (66)}, /* other */
619 COSTS_N_INSNS (1), /* cost of movsx */
620 COSTS_N_INSNS (1), /* cost of movzx */
621 16, /* "large" insn */
622 17, /* MOVE_RATIO */
623 4, /* cost for loading QImode using movzbl */
624 {4, 4, 4}, /* cost of loading integer registers
625 in QImode, HImode and SImode.
626 Relative to reg-reg move (2). */
627 {4, 4, 4}, /* cost of storing integer registers */
628 3, /* cost of reg,reg fld/fst */
629 {12, 12, 12}, /* cost of loading fp registers
630 in SFmode, DFmode and XFmode */
631 {4, 4, 4}, /* cost of storing fp registers
632 in SFmode, DFmode and XFmode */
633 6, /* cost of moving MMX register */
634 {12, 12}, /* cost of loading MMX registers
635 in SImode and DImode */
636 {12, 12}, /* cost of storing MMX registers
637 in SImode and DImode */
638 6, /* cost of moving SSE register */
639 {12, 12, 12}, /* cost of loading SSE registers
640 in SImode, DImode and TImode */
641 {12, 12, 12}, /* cost of storing SSE registers
642 in SImode, DImode and TImode */
643 8, /* MMX or SSE register to integer */
644 128, /* size of prefetch block */
645 8, /* number of parallel prefetches */
646 1, /* Branch cost */
647 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
648 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
649 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
650 COSTS_N_INSNS (3), /* cost of FABS instruction. */
651 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
652 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
653 };
654
655 /* Generic64 should produce code tuned for Nocona and K8. */
656 static const
657 struct processor_costs generic64_cost = {
658 COSTS_N_INSNS (1), /* cost of an add instruction */
659 /* On all chips taken into consideration lea is 2 cycles and more. With
660 this cost however our current implementation of synth_mult results in
661 use of unnecessary temporary registers causing regression on several
662 SPECfp benchmarks. */
663 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
664 COSTS_N_INSNS (1), /* variable shift costs */
665 COSTS_N_INSNS (1), /* constant shift costs */
666 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
667 COSTS_N_INSNS (4), /* HI */
668 COSTS_N_INSNS (3), /* SI */
669 COSTS_N_INSNS (4), /* DI */
670 COSTS_N_INSNS (2)}, /* other */
671 0, /* cost of multiply per each bit set */
672 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
673 COSTS_N_INSNS (26), /* HI */
674 COSTS_N_INSNS (42), /* SI */
675 COSTS_N_INSNS (74), /* DI */
676 COSTS_N_INSNS (74)}, /* other */
677 COSTS_N_INSNS (1), /* cost of movsx */
678 COSTS_N_INSNS (1), /* cost of movzx */
679 8, /* "large" insn */
680 17, /* MOVE_RATIO */
681 4, /* cost for loading QImode using movzbl */
682 {4, 4, 4}, /* cost of loading integer registers
683 in QImode, HImode and SImode.
684 Relative to reg-reg move (2). */
685 {4, 4, 4}, /* cost of storing integer registers */
686 4, /* cost of reg,reg fld/fst */
687 {12, 12, 12}, /* cost of loading fp registers
688 in SFmode, DFmode and XFmode */
689 {6, 6, 8}, /* cost of storing fp registers
690 in SFmode, DFmode and XFmode */
691 2, /* cost of moving MMX register */
692 {8, 8}, /* cost of loading MMX registers
693 in SImode and DImode */
694 {8, 8}, /* cost of storing MMX registers
695 in SImode and DImode */
696 2, /* cost of moving SSE register */
697 {8, 8, 8}, /* cost of loading SSE registers
698 in SImode, DImode and TImode */
699 {8, 8, 8}, /* cost of storing SSE registers
700 in SImode, DImode and TImode */
701 5, /* MMX or SSE register to integer */
702 64, /* size of prefetch block */
703 6, /* number of parallel prefetches */
704 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
705 is increased to perhaps more appropriate value of 5. */
706 3, /* Branch cost */
707 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (8), /* cost of FABS instruction. */
711 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
713 };
714
715 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
716 static const
717 struct processor_costs generic32_cost = {
718 COSTS_N_INSNS (1), /* cost of an add instruction */
719 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
720 COSTS_N_INSNS (1), /* variable shift costs */
721 COSTS_N_INSNS (1), /* constant shift costs */
722 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
723 COSTS_N_INSNS (4), /* HI */
724 COSTS_N_INSNS (3), /* SI */
725 COSTS_N_INSNS (4), /* DI */
726 COSTS_N_INSNS (2)}, /* other */
727 0, /* cost of multiply per each bit set */
728 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
729 COSTS_N_INSNS (26), /* HI */
730 COSTS_N_INSNS (42), /* SI */
731 COSTS_N_INSNS (74), /* DI */
732 COSTS_N_INSNS (74)}, /* other */
733 COSTS_N_INSNS (1), /* cost of movsx */
734 COSTS_N_INSNS (1), /* cost of movzx */
735 8, /* "large" insn */
736 17, /* MOVE_RATIO */
737 4, /* cost for loading QImode using movzbl */
738 {4, 4, 4}, /* cost of loading integer registers
739 in QImode, HImode and SImode.
740 Relative to reg-reg move (2). */
741 {4, 4, 4}, /* cost of storing integer registers */
742 4, /* cost of reg,reg fld/fst */
743 {12, 12, 12}, /* cost of loading fp registers
744 in SFmode, DFmode and XFmode */
745 {6, 6, 8}, /* cost of storing fp registers
746 in SFmode, DFmode and XFmode */
747 2, /* cost of moving MMX register */
748 {8, 8}, /* cost of loading MMX registers
749 in SImode and DImode */
750 {8, 8}, /* cost of storing MMX registers
751 in SImode and DImode */
752 2, /* cost of moving SSE register */
753 {8, 8, 8}, /* cost of loading SSE registers
754 in SImode, DImode and TImode */
755 {8, 8, 8}, /* cost of storing SSE registers
756 in SImode, DImode and TImode */
757 5, /* MMX or SSE register to integer */
758 64, /* size of prefetch block */
759 6, /* number of parallel prefetches */
760 3, /* Branch cost */
761 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
762 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
763 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
764 COSTS_N_INSNS (8), /* cost of FABS instruction. */
765 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
766 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
767 };
768
769 const struct processor_costs *ix86_cost = &pentium_cost;
770
771 /* Processor feature/optimization bitmasks. */
772 #define m_386 (1<<PROCESSOR_I386)
773 #define m_486 (1<<PROCESSOR_I486)
774 #define m_PENT (1<<PROCESSOR_PENTIUM)
775 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
776 #define m_GEODE (1<<PROCESSOR_GEODE)
777 #define m_K6_GEODE (m_K6 | m_GEODE)
778 #define m_K6 (1<<PROCESSOR_K6)
779 #define m_ATHLON (1<<PROCESSOR_ATHLON)
780 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
781 #define m_K8 (1<<PROCESSOR_K8)
782 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
783 #define m_NOCONA (1<<PROCESSOR_NOCONA)
784 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
785 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
786 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
787
788 /* Generic instruction choice should be common subset of supported CPUs
789 (PPro/PENT4/NOCONA/Athlon/K8). */
790
791 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
792 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
793 generic because it is not working well with PPro base chips. */
794 const int x86_use_leave = m_386 | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC64;
795 const int x86_push_memory = m_386 | m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
796 const int x86_zero_extend_with_and = m_486 | m_PENT;
797 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC | m_GEODE /* m_386 | m_K6 */;
798 const int x86_double_with_add = ~m_386;
799 const int x86_use_bit_test = m_386;
800 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_GENERIC;
801 const int x86_cmove = m_PPRO | m_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
802 const int x86_3dnow_a = m_ATHLON_K8;
803 const int x86_deep_branch = m_PPRO | m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
804 /* Branch hints were put in P4 based on simulation result. But
805 after P4 was made, no performance benefit was observed with
806 branch hints. It also increases the code size. As the result,
807 icc never generates branch hints. */
808 const int x86_branch_hints = 0;
809 const int x86_use_sahf = m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
810 /* We probably ought to watch for partial register stalls on Generic32
811 compilation setting as well. However in current implementation the
812 partial register stalls are not eliminated very well - they can
813 be introduced via subregs synthesized by combine and can happen
814 in caller/callee saving sequences.
815 Because this option pays back little on PPro based chips and is in conflict
816 with partial reg. dependencies used by Athlon/P4 based chips, it is better
817 to leave it off for generic32 for now. */
818 const int x86_partial_reg_stall = m_PPRO;
819 const int x86_partial_flag_reg_stall = m_GENERIC;
820 const int x86_use_himode_fiop = m_386 | m_486 | m_K6_GEODE;
821 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_GENERIC);
822 const int x86_use_mov0 = m_K6;
823 const int x86_use_cltd = ~(m_PENT | m_K6 | m_GENERIC);
824 const int x86_read_modify_write = ~m_PENT;
825 const int x86_read_modify = ~(m_PENT | m_PPRO);
826 const int x86_split_long_moves = m_PPRO;
827 const int x86_promote_QImode = m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_GENERIC; /* m_PENT4 ? */
828 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
829 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
830 const int x86_qimode_math = ~(0);
831 const int x86_promote_qi_regs = 0;
832 /* On PPro this flag is meant to avoid partial register stalls. Just like
833 the x86_partial_reg_stall this option might be considered for Generic32
834 if our scheme for avoiding partial stalls was more effective. */
835 const int x86_himode_math = ~(m_PPRO);
836 const int x86_promote_hi_regs = m_PPRO;
837 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC;
838 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
839 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC;
840 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6_GEODE | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
841 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC | m_GEODE);
842 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
843 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
844 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
845 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
846 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
847 const int x86_shift1 = ~m_486;
848 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
849 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
850 that thread 128bit SSE registers as single units versus K8 based chips that
851 divide SSE registers to two 64bit halves.
852 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
853 to allow register renaming on 128bit SSE units, but usually results in one
854 extra microop on 64bit SSE units. Experimental results shows that disabling
855 this option on P4 brings over 20% SPECfp regression, while enabling it on
856 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
857 of moves. */
858 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
859 /* Set for machines where the type and dependencies are resolved on SSE
860 register parts instead of whole registers, so we may maintain just
861 lower part of scalar values in proper format leaving the upper part
862 undefined. */
863 const int x86_sse_split_regs = m_ATHLON_K8;
864 const int x86_sse_typeless_stores = m_ATHLON_K8;
865 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
866 const int x86_use_ffreep = m_ATHLON_K8;
867 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6_GEODE;
868 const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_GENERIC);
869
870 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
871 integer data in xmm registers. Which results in pretty abysmal code. */
872 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
873
874 const int x86_ext_80387_constants = m_K6_GEODE | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC32;
875 /* Some CPU cores are not able to predict more than 4 branch instructions in
876 the 16 byte window. */
877 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
878 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6_GEODE | m_PENT | m_GENERIC;
879 const int x86_use_bt = m_ATHLON_K8;
880 /* Compare and exchange was added for 80486. */
881 const int x86_cmpxchg = ~m_386;
882 /* Compare and exchange 8 bytes was added for pentium. */
883 const int x86_cmpxchg8b = ~(m_386 | m_486);
884 /* Compare and exchange 16 bytes was added for nocona. */
885 const int x86_cmpxchg16b = m_NOCONA;
886 /* Exchange and add was added for 80486. */
887 const int x86_xadd = ~m_386;
888 /* Byteswap was added for 80486. */
889 const int x86_bswap = ~m_386;
890 const int x86_pad_returns = m_ATHLON_K8 | m_GENERIC;
891
892 /* In case the average insn count for single function invocation is
893 lower than this constant, emit fast (but longer) prologue and
894 epilogue code. */
895 #define FAST_PROLOGUE_INSN_COUNT 20
896
897 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
898 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
899 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
900 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
901
902 /* Array of the smallest class containing reg number REGNO, indexed by
903 REGNO. Used by REGNO_REG_CLASS in i386.h. */
904
905 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
906 {
907 /* ax, dx, cx, bx */
908 AREG, DREG, CREG, BREG,
909 /* si, di, bp, sp */
910 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
911 /* FP registers */
912 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
913 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
914 /* arg pointer */
915 NON_Q_REGS,
916 /* flags, fpsr, fpcr, dirflag, frame */
917 NO_REGS, NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
918 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
919 SSE_REGS, SSE_REGS,
920 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
921 MMX_REGS, MMX_REGS,
922 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
923 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
924 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
925 SSE_REGS, SSE_REGS,
926 };
927
928 /* The "default" register map used in 32bit mode. */
929
930 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
931 {
932 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
933 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
934 -1, -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
935 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
936 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
937 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
938 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
939 };
940
941 static int const x86_64_int_parameter_registers[6] =
942 {
943 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
944 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
945 };
946
947 static int const x86_64_int_return_registers[4] =
948 {
949 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
950 };
951
952 /* The "default" register map used in 64bit mode. */
953 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
954 {
955 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
956 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
957 -1, -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
958 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
959 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
960 8,9,10,11,12,13,14,15, /* extended integer registers */
961 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
962 };
963
964 /* Define the register numbers to be used in Dwarf debugging information.
965 The SVR4 reference port C compiler uses the following register numbers
966 in its Dwarf output code:
967 0 for %eax (gcc regno = 0)
968 1 for %ecx (gcc regno = 2)
969 2 for %edx (gcc regno = 1)
970 3 for %ebx (gcc regno = 3)
971 4 for %esp (gcc regno = 7)
972 5 for %ebp (gcc regno = 6)
973 6 for %esi (gcc regno = 4)
974 7 for %edi (gcc regno = 5)
975 The following three DWARF register numbers are never generated by
976 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
977 believes these numbers have these meanings.
978 8 for %eip (no gcc equivalent)
979 9 for %eflags (gcc regno = 17)
980 10 for %trapno (no gcc equivalent)
981 It is not at all clear how we should number the FP stack registers
982 for the x86 architecture. If the version of SDB on x86/svr4 were
983 a bit less brain dead with respect to floating-point then we would
984 have a precedent to follow with respect to DWARF register numbers
985 for x86 FP registers, but the SDB on x86/svr4 is so completely
986 broken with respect to FP registers that it is hardly worth thinking
987 of it as something to strive for compatibility with.
988 The version of x86/svr4 SDB I have at the moment does (partially)
989 seem to believe that DWARF register number 11 is associated with
990 the x86 register %st(0), but that's about all. Higher DWARF
991 register numbers don't seem to be associated with anything in
992 particular, and even for DWARF regno 11, SDB only seems to under-
993 stand that it should say that a variable lives in %st(0) (when
994 asked via an `=' command) if we said it was in DWARF regno 11,
995 but SDB still prints garbage when asked for the value of the
996 variable in question (via a `/' command).
997 (Also note that the labels SDB prints for various FP stack regs
998 when doing an `x' command are all wrong.)
999 Note that these problems generally don't affect the native SVR4
1000 C compiler because it doesn't allow the use of -O with -g and
1001 because when it is *not* optimizing, it allocates a memory
1002 location for each floating-point variable, and the memory
1003 location is what gets described in the DWARF AT_location
1004 attribute for the variable in question.
1005 Regardless of the severe mental illness of the x86/svr4 SDB, we
1006 do something sensible here and we use the following DWARF
1007 register numbers. Note that these are all stack-top-relative
1008 numbers.
1009 11 for %st(0) (gcc regno = 8)
1010 12 for %st(1) (gcc regno = 9)
1011 13 for %st(2) (gcc regno = 10)
1012 14 for %st(3) (gcc regno = 11)
1013 15 for %st(4) (gcc regno = 12)
1014 16 for %st(5) (gcc regno = 13)
1015 17 for %st(6) (gcc regno = 14)
1016 18 for %st(7) (gcc regno = 15)
1017 */
1018 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1019 {
1020 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1021 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1022 -1, 9, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
1023 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1024 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1025 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1026 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1027 };
1028
1029 /* Test and compare insns in i386.md store the information needed to
1030 generate branch and scc insns here. */
1031
1032 rtx ix86_compare_op0 = NULL_RTX;
1033 rtx ix86_compare_op1 = NULL_RTX;
1034 rtx ix86_compare_emitted = NULL_RTX;
1035
1036 /* Size of the register save area. */
1037 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1038
1039 /* Define the structure for the machine field in struct function. */
1040
1041 struct stack_local_entry GTY(())
1042 {
1043 unsigned short mode;
1044 unsigned short n;
1045 rtx rtl;
1046 struct stack_local_entry *next;
1047 };
1048
1049 /* Structure describing stack frame layout.
1050 Stack grows downward:
1051
1052 [arguments]
1053 <- ARG_POINTER
1054 saved pc
1055
1056 saved frame pointer if frame_pointer_needed
1057 <- HARD_FRAME_POINTER
1058 [saved regs]
1059
1060 [padding1] \
1061 )
1062 [va_arg registers] (
1063 > to_allocate <- FRAME_POINTER
1064 [frame] (
1065 )
1066 [padding2] /
1067 */
1068 struct ix86_frame
1069 {
1070 int nregs;
1071 int padding1;
1072 int va_arg_size;
1073 HOST_WIDE_INT frame;
1074 int padding2;
1075 int outgoing_arguments_size;
1076 int red_zone_size;
1077
1078 HOST_WIDE_INT to_allocate;
1079 /* The offsets relative to ARG_POINTER. */
1080 HOST_WIDE_INT frame_pointer_offset;
1081 HOST_WIDE_INT hard_frame_pointer_offset;
1082 HOST_WIDE_INT stack_pointer_offset;
1083
1084 /* When save_regs_using_mov is set, emit prologue using
1085 move instead of push instructions. */
1086 bool save_regs_using_mov;
1087 };
1088
1089 /* Code model option. */
1090 enum cmodel ix86_cmodel;
1091 /* Asm dialect. */
1092 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1093 /* TLS dialects. */
1094 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1095
1096 /* Which unit we are generating floating point math for. */
1097 enum fpmath_unit ix86_fpmath;
1098
1099 /* Which cpu are we scheduling for. */
1100 enum processor_type ix86_tune;
1101 /* Which instruction set architecture to use. */
1102 enum processor_type ix86_arch;
1103
1104 /* true if sse prefetch instruction is not NOOP. */
1105 int x86_prefetch_sse;
1106
1107 /* ix86_regparm_string as a number */
1108 static int ix86_regparm;
1109
1110 /* -mstackrealign option */
1111 extern int ix86_force_align_arg_pointer;
1112 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1113
1114 /* Preferred alignment for stack boundary in bits. */
1115 unsigned int ix86_preferred_stack_boundary;
1116
1117 /* Values 1-5: see jump.c */
1118 int ix86_branch_cost;
1119
1120 /* Variables which are this size or smaller are put in the data/bss
1121 or ldata/lbss sections. */
1122
1123 int ix86_section_threshold = 65536;
1124
1125 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1126 char internal_label_prefix[16];
1127 int internal_label_prefix_len;
1128 \f
1129 static bool ix86_handle_option (size_t, const char *, int);
1130 static void output_pic_addr_const (FILE *, rtx, int);
1131 static void put_condition_code (enum rtx_code, enum machine_mode,
1132 int, int, FILE *);
1133 static const char *get_some_local_dynamic_name (void);
1134 static int get_some_local_dynamic_name_1 (rtx *, void *);
1135 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1136 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1137 rtx *);
1138 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1139 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1140 enum machine_mode);
1141 static rtx get_thread_pointer (int);
1142 static rtx legitimize_tls_address (rtx, enum tls_model, int);
1143 static void get_pc_thunk_name (char [32], unsigned int);
1144 static rtx gen_push (rtx);
1145 static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1146 static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1147 static struct machine_function * ix86_init_machine_status (void);
1148 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1149 static int ix86_nsaved_regs (void);
1150 static void ix86_emit_save_regs (void);
1151 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1152 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1153 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1154 static HOST_WIDE_INT ix86_GOT_alias_set (void);
1155 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1156 static rtx ix86_expand_aligntest (rtx, int);
1157 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1158 static int ix86_issue_rate (void);
1159 static int ix86_adjust_cost (rtx, rtx, rtx, int);
1160 static int ia32_multipass_dfa_lookahead (void);
1161 static void ix86_init_mmx_sse_builtins (void);
1162 static rtx x86_this_parameter (tree);
1163 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1164 HOST_WIDE_INT, tree);
1165 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1166 static void x86_file_start (void);
1167 static void ix86_reorg (void);
1168 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1169 static tree ix86_build_builtin_va_list (void);
1170 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1171 tree, int *, int);
1172 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1173 static bool ix86_scalar_mode_supported_p (enum machine_mode);
1174 static bool ix86_vector_mode_supported_p (enum machine_mode);
1175
1176 static int ix86_address_cost (rtx);
1177 static bool ix86_cannot_force_const_mem (rtx);
1178 static rtx ix86_delegitimize_address (rtx);
1179
1180 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1181
1182 struct builtin_description;
1183 static rtx ix86_expand_sse_comi (const struct builtin_description *,
1184 tree, rtx);
1185 static rtx ix86_expand_sse_compare (const struct builtin_description *,
1186 tree, rtx);
1187 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1188 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1189 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1190 static rtx ix86_expand_store_builtin (enum insn_code, tree);
1191 static rtx safe_vector_operand (rtx, enum machine_mode);
1192 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1193 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1194 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1195 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1196 static int ix86_fp_comparison_cost (enum rtx_code code);
1197 static unsigned int ix86_select_alt_pic_regnum (void);
1198 static int ix86_save_reg (unsigned int, int);
1199 static void ix86_compute_frame_layout (struct ix86_frame *);
1200 static int ix86_comp_type_attributes (tree, tree);
1201 static int ix86_function_regparm (tree, tree);
1202 const struct attribute_spec ix86_attribute_table[];
1203 static bool ix86_function_ok_for_sibcall (tree, tree);
1204 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1205 static int ix86_value_regno (enum machine_mode, tree, tree);
1206 static bool contains_128bit_aligned_vector_p (tree);
1207 static rtx ix86_struct_value_rtx (tree, int);
1208 static bool ix86_ms_bitfield_layout_p (tree);
1209 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1210 static int extended_reg_mentioned_1 (rtx *, void *);
1211 static bool ix86_rtx_costs (rtx, int, int, int *);
1212 static int min_insn_size (rtx);
1213 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1214 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1215 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1216 tree, bool);
1217 static void ix86_init_builtins (void);
1218 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1219 static const char *ix86_mangle_fundamental_type (tree);
1220 static tree ix86_stack_protect_fail (void);
1221 static rtx ix86_internal_arg_pointer (void);
1222 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1223
1224 /* This function is only used on Solaris. */
1225 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1226 ATTRIBUTE_UNUSED;
1227
1228 /* Register class used for passing given 64bit part of the argument.
1229 These represent classes as documented by the PS ABI, with the exception
1230 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1231 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1232
1233 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1234 whenever possible (upper half does contain padding).
1235 */
1236 enum x86_64_reg_class
1237 {
1238 X86_64_NO_CLASS,
1239 X86_64_INTEGER_CLASS,
1240 X86_64_INTEGERSI_CLASS,
1241 X86_64_SSE_CLASS,
1242 X86_64_SSESF_CLASS,
1243 X86_64_SSEDF_CLASS,
1244 X86_64_SSEUP_CLASS,
1245 X86_64_X87_CLASS,
1246 X86_64_X87UP_CLASS,
1247 X86_64_COMPLEX_X87_CLASS,
1248 X86_64_MEMORY_CLASS
1249 };
1250 static const char * const x86_64_reg_class_name[] = {
1251 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1252 "sseup", "x87", "x87up", "cplx87", "no"
1253 };
1254
1255 #define MAX_CLASSES 4
1256
1257 /* Table of constants used by fldpi, fldln2, etc.... */
1258 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1259 static bool ext_80387_constants_init = 0;
1260 static void init_ext_80387_constants (void);
1261 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1262 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1263 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1264 static section *x86_64_elf_select_section (tree decl, int reloc,
1265 unsigned HOST_WIDE_INT align)
1266 ATTRIBUTE_UNUSED;
1267 \f
1268 /* Initialize the GCC target structure. */
1269 #undef TARGET_ATTRIBUTE_TABLE
1270 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1271 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1272 # undef TARGET_MERGE_DECL_ATTRIBUTES
1273 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1274 #endif
1275
1276 #undef TARGET_COMP_TYPE_ATTRIBUTES
1277 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1278
1279 #undef TARGET_INIT_BUILTINS
1280 #define TARGET_INIT_BUILTINS ix86_init_builtins
1281 #undef TARGET_EXPAND_BUILTIN
1282 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1283
1284 #undef TARGET_ASM_FUNCTION_EPILOGUE
1285 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1286
1287 #undef TARGET_ENCODE_SECTION_INFO
1288 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1289 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1290 #else
1291 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1292 #endif
1293
1294 #undef TARGET_ASM_OPEN_PAREN
1295 #define TARGET_ASM_OPEN_PAREN ""
1296 #undef TARGET_ASM_CLOSE_PAREN
1297 #define TARGET_ASM_CLOSE_PAREN ""
1298
1299 #undef TARGET_ASM_ALIGNED_HI_OP
1300 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1301 #undef TARGET_ASM_ALIGNED_SI_OP
1302 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1303 #ifdef ASM_QUAD
1304 #undef TARGET_ASM_ALIGNED_DI_OP
1305 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1306 #endif
1307
1308 #undef TARGET_ASM_UNALIGNED_HI_OP
1309 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1310 #undef TARGET_ASM_UNALIGNED_SI_OP
1311 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1312 #undef TARGET_ASM_UNALIGNED_DI_OP
1313 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1314
1315 #undef TARGET_SCHED_ADJUST_COST
1316 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1317 #undef TARGET_SCHED_ISSUE_RATE
1318 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1319 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1320 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1321 ia32_multipass_dfa_lookahead
1322
1323 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1324 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1325
1326 #ifdef HAVE_AS_TLS
1327 #undef TARGET_HAVE_TLS
1328 #define TARGET_HAVE_TLS true
1329 #endif
1330 #undef TARGET_CANNOT_FORCE_CONST_MEM
1331 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1332 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1333 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1334
1335 #undef TARGET_DELEGITIMIZE_ADDRESS
1336 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1337
1338 #undef TARGET_MS_BITFIELD_LAYOUT_P
1339 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1340
1341 #if TARGET_MACHO
1342 #undef TARGET_BINDS_LOCAL_P
1343 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1344 #endif
1345
1346 #undef TARGET_ASM_OUTPUT_MI_THUNK
1347 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1348 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1349 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1350
1351 #undef TARGET_ASM_FILE_START
1352 #define TARGET_ASM_FILE_START x86_file_start
1353
1354 #undef TARGET_DEFAULT_TARGET_FLAGS
1355 #define TARGET_DEFAULT_TARGET_FLAGS \
1356 (TARGET_DEFAULT \
1357 | TARGET_64BIT_DEFAULT \
1358 | TARGET_SUBTARGET_DEFAULT \
1359 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1360
1361 #undef TARGET_HANDLE_OPTION
1362 #define TARGET_HANDLE_OPTION ix86_handle_option
1363
1364 #undef TARGET_RTX_COSTS
1365 #define TARGET_RTX_COSTS ix86_rtx_costs
1366 #undef TARGET_ADDRESS_COST
1367 #define TARGET_ADDRESS_COST ix86_address_cost
1368
1369 #undef TARGET_FIXED_CONDITION_CODE_REGS
1370 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1371 #undef TARGET_CC_MODES_COMPATIBLE
1372 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1373
1374 #undef TARGET_MACHINE_DEPENDENT_REORG
1375 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1376
1377 #undef TARGET_BUILD_BUILTIN_VA_LIST
1378 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1379
1380 #undef TARGET_MD_ASM_CLOBBERS
1381 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1382
1383 #undef TARGET_PROMOTE_PROTOTYPES
1384 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1385 #undef TARGET_STRUCT_VALUE_RTX
1386 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1387 #undef TARGET_SETUP_INCOMING_VARARGS
1388 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1389 #undef TARGET_MUST_PASS_IN_STACK
1390 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1391 #undef TARGET_PASS_BY_REFERENCE
1392 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1393 #undef TARGET_INTERNAL_ARG_POINTER
1394 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1395 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1396 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1397
1398 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1399 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1400
1401 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1402 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1403
1404 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1405 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1406
1407 #ifdef HAVE_AS_TLS
1408 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1409 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1410 #endif
1411
1412 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1413 #undef TARGET_INSERT_ATTRIBUTES
1414 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1415 #endif
1416
1417 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1418 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1419
1420 #undef TARGET_STACK_PROTECT_FAIL
1421 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1422
1423 #undef TARGET_FUNCTION_VALUE
1424 #define TARGET_FUNCTION_VALUE ix86_function_value
1425
1426 struct gcc_target targetm = TARGET_INITIALIZER;
1427
1428 \f
1429 /* The svr4 ABI for the i386 says that records and unions are returned
1430 in memory. */
1431 #ifndef DEFAULT_PCC_STRUCT_RETURN
1432 #define DEFAULT_PCC_STRUCT_RETURN 1
1433 #endif
1434
1435 /* Implement TARGET_HANDLE_OPTION. */
1436
1437 static bool
1438 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1439 {
1440 switch (code)
1441 {
1442 case OPT_m3dnow:
1443 if (!value)
1444 {
1445 target_flags &= ~MASK_3DNOW_A;
1446 target_flags_explicit |= MASK_3DNOW_A;
1447 }
1448 return true;
1449
1450 case OPT_mmmx:
1451 if (!value)
1452 {
1453 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1454 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1455 }
1456 return true;
1457
1458 case OPT_msse:
1459 if (!value)
1460 {
1461 target_flags &= ~(MASK_SSE2 | MASK_SSE3);
1462 target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
1463 }
1464 return true;
1465
1466 case OPT_msse2:
1467 if (!value)
1468 {
1469 target_flags &= ~MASK_SSE3;
1470 target_flags_explicit |= MASK_SSE3;
1471 }
1472 return true;
1473
1474 default:
1475 return true;
1476 }
1477 }
1478
1479 /* Sometimes certain combinations of command options do not make
1480 sense on a particular target machine. You can define a macro
1481 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1482 defined, is executed once just after all the command options have
1483 been parsed.
1484
1485 Don't use this macro to turn on various extra optimizations for
1486 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1487
1488 void
1489 override_options (void)
1490 {
1491 int i;
1492 int ix86_tune_defaulted = 0;
1493
1494 /* Comes from final.c -- no real reason to change it. */
1495 #define MAX_CODE_ALIGN 16
1496
1497 static struct ptt
1498 {
1499 const struct processor_costs *cost; /* Processor costs */
1500 const int target_enable; /* Target flags to enable. */
1501 const int target_disable; /* Target flags to disable. */
1502 const int align_loop; /* Default alignments. */
1503 const int align_loop_max_skip;
1504 const int align_jump;
1505 const int align_jump_max_skip;
1506 const int align_func;
1507 }
1508 const processor_target_table[PROCESSOR_max] =
1509 {
1510 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1511 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1512 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1513 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1514 {&geode_cost, 0, 0, 0, 0, 0, 0, 0},
1515 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1516 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1517 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1518 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1519 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1520 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1521 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16}
1522 };
1523
1524 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1525 static struct pta
1526 {
1527 const char *const name; /* processor name or nickname. */
1528 const enum processor_type processor;
1529 const enum pta_flags
1530 {
1531 PTA_SSE = 1,
1532 PTA_SSE2 = 2,
1533 PTA_SSE3 = 4,
1534 PTA_MMX = 8,
1535 PTA_PREFETCH_SSE = 16,
1536 PTA_3DNOW = 32,
1537 PTA_3DNOW_A = 64,
1538 PTA_64BIT = 128,
1539 PTA_SSSE3 = 256
1540 } flags;
1541 }
1542 const processor_alias_table[] =
1543 {
1544 {"i386", PROCESSOR_I386, 0},
1545 {"i486", PROCESSOR_I486, 0},
1546 {"i586", PROCESSOR_PENTIUM, 0},
1547 {"pentium", PROCESSOR_PENTIUM, 0},
1548 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1549 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1550 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1551 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1552 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1553 {"i686", PROCESSOR_PENTIUMPRO, 0},
1554 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1555 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1556 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1557 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1558 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1559 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1560 | PTA_MMX | PTA_PREFETCH_SSE},
1561 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1562 | PTA_MMX | PTA_PREFETCH_SSE},
1563 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1564 | PTA_MMX | PTA_PREFETCH_SSE},
1565 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1566 | PTA_MMX | PTA_PREFETCH_SSE},
1567 {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1568 | PTA_3DNOW_A},
1569 {"k6", PROCESSOR_K6, PTA_MMX},
1570 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1571 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1572 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1573 | PTA_3DNOW_A},
1574 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1575 | PTA_3DNOW | PTA_3DNOW_A},
1576 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1577 | PTA_3DNOW_A | PTA_SSE},
1578 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1579 | PTA_3DNOW_A | PTA_SSE},
1580 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1581 | PTA_3DNOW_A | PTA_SSE},
1582 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1583 | PTA_SSE | PTA_SSE2 },
1584 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1585 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1586 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1587 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1588 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1589 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1590 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1591 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1592 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1593 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1594 };
1595
1596 int const pta_size = ARRAY_SIZE (processor_alias_table);
1597
1598 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1599 SUBTARGET_OVERRIDE_OPTIONS;
1600 #endif
1601
1602 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1603 SUBSUBTARGET_OVERRIDE_OPTIONS;
1604 #endif
1605
1606 /* -fPIC is the default for x86_64. */
1607 if (TARGET_MACHO && TARGET_64BIT)
1608 flag_pic = 2;
1609
1610 /* Set the default values for switches whose default depends on TARGET_64BIT
1611 in case they weren't overwritten by command line options. */
1612 if (TARGET_64BIT)
1613 {
1614 /* Mach-O doesn't support omitting the frame pointer for now. */
1615 if (flag_omit_frame_pointer == 2)
1616 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1617 if (flag_asynchronous_unwind_tables == 2)
1618 flag_asynchronous_unwind_tables = 1;
1619 if (flag_pcc_struct_return == 2)
1620 flag_pcc_struct_return = 0;
1621 }
1622 else
1623 {
1624 if (flag_omit_frame_pointer == 2)
1625 flag_omit_frame_pointer = 0;
1626 if (flag_asynchronous_unwind_tables == 2)
1627 flag_asynchronous_unwind_tables = 0;
1628 if (flag_pcc_struct_return == 2)
1629 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1630 }
1631
1632 /* Need to check -mtune=generic first. */
1633 if (ix86_tune_string)
1634 {
1635 if (!strcmp (ix86_tune_string, "generic")
1636 || !strcmp (ix86_tune_string, "i686")
1637 /* As special support for cross compilers we read -mtune=native
1638 as -mtune=generic. With native compilers we won't see the
1639 -mtune=native, as it was changed by the driver. */
1640 || !strcmp (ix86_tune_string, "native"))
1641 {
1642 if (TARGET_64BIT)
1643 ix86_tune_string = "generic64";
1644 else
1645 ix86_tune_string = "generic32";
1646 }
1647 else if (!strncmp (ix86_tune_string, "generic", 7))
1648 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1649 }
1650 else
1651 {
1652 if (ix86_arch_string)
1653 ix86_tune_string = ix86_arch_string;
1654 if (!ix86_tune_string)
1655 {
1656 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1657 ix86_tune_defaulted = 1;
1658 }
1659
1660 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1661 need to use a sensible tune option. */
1662 if (!strcmp (ix86_tune_string, "generic")
1663 || !strcmp (ix86_tune_string, "x86-64")
1664 || !strcmp (ix86_tune_string, "i686"))
1665 {
1666 if (TARGET_64BIT)
1667 ix86_tune_string = "generic64";
1668 else
1669 ix86_tune_string = "generic32";
1670 }
1671 }
1672 if (!strcmp (ix86_tune_string, "x86-64"))
1673 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1674 "-mtune=generic instead as appropriate.");
1675
1676 if (!ix86_arch_string)
1677 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1678 if (!strcmp (ix86_arch_string, "generic"))
1679 error ("generic CPU can be used only for -mtune= switch");
1680 if (!strncmp (ix86_arch_string, "generic", 7))
1681 error ("bad value (%s) for -march= switch", ix86_arch_string);
1682
1683 if (ix86_cmodel_string != 0)
1684 {
1685 if (!strcmp (ix86_cmodel_string, "small"))
1686 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1687 else if (!strcmp (ix86_cmodel_string, "medium"))
1688 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1689 else if (flag_pic)
1690 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1691 else if (!strcmp (ix86_cmodel_string, "32"))
1692 ix86_cmodel = CM_32;
1693 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1694 ix86_cmodel = CM_KERNEL;
1695 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1696 ix86_cmodel = CM_LARGE;
1697 else
1698 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1699 }
1700 else
1701 {
1702 ix86_cmodel = CM_32;
1703 if (TARGET_64BIT)
1704 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1705 }
1706 if (ix86_asm_string != 0)
1707 {
1708 if (! TARGET_MACHO
1709 && !strcmp (ix86_asm_string, "intel"))
1710 ix86_asm_dialect = ASM_INTEL;
1711 else if (!strcmp (ix86_asm_string, "att"))
1712 ix86_asm_dialect = ASM_ATT;
1713 else
1714 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1715 }
1716 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1717 error ("code model %qs not supported in the %s bit mode",
1718 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1719 if (ix86_cmodel == CM_LARGE)
1720 sorry ("code model %<large%> not supported yet");
1721 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1722 sorry ("%i-bit mode not compiled in",
1723 (target_flags & MASK_64BIT) ? 64 : 32);
1724
1725 for (i = 0; i < pta_size; i++)
1726 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1727 {
1728 ix86_arch = processor_alias_table[i].processor;
1729 /* Default cpu tuning to the architecture. */
1730 ix86_tune = ix86_arch;
1731 if (processor_alias_table[i].flags & PTA_MMX
1732 && !(target_flags_explicit & MASK_MMX))
1733 target_flags |= MASK_MMX;
1734 if (processor_alias_table[i].flags & PTA_3DNOW
1735 && !(target_flags_explicit & MASK_3DNOW))
1736 target_flags |= MASK_3DNOW;
1737 if (processor_alias_table[i].flags & PTA_3DNOW_A
1738 && !(target_flags_explicit & MASK_3DNOW_A))
1739 target_flags |= MASK_3DNOW_A;
1740 if (processor_alias_table[i].flags & PTA_SSE
1741 && !(target_flags_explicit & MASK_SSE))
1742 target_flags |= MASK_SSE;
1743 if (processor_alias_table[i].flags & PTA_SSE2
1744 && !(target_flags_explicit & MASK_SSE2))
1745 target_flags |= MASK_SSE2;
1746 if (processor_alias_table[i].flags & PTA_SSE3
1747 && !(target_flags_explicit & MASK_SSE3))
1748 target_flags |= MASK_SSE3;
1749 if (processor_alias_table[i].flags & PTA_SSSE3
1750 && !(target_flags_explicit & MASK_SSSE3))
1751 target_flags |= MASK_SSSE3;
1752 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1753 x86_prefetch_sse = true;
1754 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1755 error ("CPU you selected does not support x86-64 "
1756 "instruction set");
1757 break;
1758 }
1759
1760 if (i == pta_size)
1761 error ("bad value (%s) for -march= switch", ix86_arch_string);
1762
1763 for (i = 0; i < pta_size; i++)
1764 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1765 {
1766 ix86_tune = processor_alias_table[i].processor;
1767 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1768 {
1769 if (ix86_tune_defaulted)
1770 {
1771 ix86_tune_string = "x86-64";
1772 for (i = 0; i < pta_size; i++)
1773 if (! strcmp (ix86_tune_string,
1774 processor_alias_table[i].name))
1775 break;
1776 ix86_tune = processor_alias_table[i].processor;
1777 }
1778 else
1779 error ("CPU you selected does not support x86-64 "
1780 "instruction set");
1781 }
1782 /* Intel CPUs have always interpreted SSE prefetch instructions as
1783 NOPs; so, we can enable SSE prefetch instructions even when
1784 -mtune (rather than -march) points us to a processor that has them.
1785 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1786 higher processors. */
1787 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1788 x86_prefetch_sse = true;
1789 break;
1790 }
1791 if (i == pta_size)
1792 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1793
1794 if (optimize_size)
1795 ix86_cost = &size_cost;
1796 else
1797 ix86_cost = processor_target_table[ix86_tune].cost;
1798 target_flags |= processor_target_table[ix86_tune].target_enable;
1799 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1800
1801 /* Arrange to set up i386_stack_locals for all functions. */
1802 init_machine_status = ix86_init_machine_status;
1803
1804 /* Validate -mregparm= value. */
1805 if (ix86_regparm_string)
1806 {
1807 i = atoi (ix86_regparm_string);
1808 if (i < 0 || i > REGPARM_MAX)
1809 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1810 else
1811 ix86_regparm = i;
1812 }
1813 else
1814 if (TARGET_64BIT)
1815 ix86_regparm = REGPARM_MAX;
1816
1817 /* If the user has provided any of the -malign-* options,
1818 warn and use that value only if -falign-* is not set.
1819 Remove this code in GCC 3.2 or later. */
1820 if (ix86_align_loops_string)
1821 {
1822 warning (0, "-malign-loops is obsolete, use -falign-loops");
1823 if (align_loops == 0)
1824 {
1825 i = atoi (ix86_align_loops_string);
1826 if (i < 0 || i > MAX_CODE_ALIGN)
1827 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1828 else
1829 align_loops = 1 << i;
1830 }
1831 }
1832
1833 if (ix86_align_jumps_string)
1834 {
1835 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1836 if (align_jumps == 0)
1837 {
1838 i = atoi (ix86_align_jumps_string);
1839 if (i < 0 || i > MAX_CODE_ALIGN)
1840 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1841 else
1842 align_jumps = 1 << i;
1843 }
1844 }
1845
1846 if (ix86_align_funcs_string)
1847 {
1848 warning (0, "-malign-functions is obsolete, use -falign-functions");
1849 if (align_functions == 0)
1850 {
1851 i = atoi (ix86_align_funcs_string);
1852 if (i < 0 || i > MAX_CODE_ALIGN)
1853 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1854 else
1855 align_functions = 1 << i;
1856 }
1857 }
1858
1859 /* Default align_* from the processor table. */
1860 if (align_loops == 0)
1861 {
1862 align_loops = processor_target_table[ix86_tune].align_loop;
1863 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1864 }
1865 if (align_jumps == 0)
1866 {
1867 align_jumps = processor_target_table[ix86_tune].align_jump;
1868 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1869 }
1870 if (align_functions == 0)
1871 {
1872 align_functions = processor_target_table[ix86_tune].align_func;
1873 }
1874
1875 /* Validate -mbranch-cost= value, or provide default. */
1876 ix86_branch_cost = ix86_cost->branch_cost;
1877 if (ix86_branch_cost_string)
1878 {
1879 i = atoi (ix86_branch_cost_string);
1880 if (i < 0 || i > 5)
1881 error ("-mbranch-cost=%d is not between 0 and 5", i);
1882 else
1883 ix86_branch_cost = i;
1884 }
1885 if (ix86_section_threshold_string)
1886 {
1887 i = atoi (ix86_section_threshold_string);
1888 if (i < 0)
1889 error ("-mlarge-data-threshold=%d is negative", i);
1890 else
1891 ix86_section_threshold = i;
1892 }
1893
1894 if (ix86_tls_dialect_string)
1895 {
1896 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1897 ix86_tls_dialect = TLS_DIALECT_GNU;
1898 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
1899 ix86_tls_dialect = TLS_DIALECT_GNU2;
1900 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1901 ix86_tls_dialect = TLS_DIALECT_SUN;
1902 else
1903 error ("bad value (%s) for -mtls-dialect= switch",
1904 ix86_tls_dialect_string);
1905 }
1906
1907 /* Keep nonleaf frame pointers. */
1908 if (flag_omit_frame_pointer)
1909 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1910 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1911 flag_omit_frame_pointer = 1;
1912
1913 /* If we're doing fast math, we don't care about comparison order
1914 wrt NaNs. This lets us use a shorter comparison sequence. */
1915 if (flag_finite_math_only)
1916 target_flags &= ~MASK_IEEE_FP;
1917
1918 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1919 since the insns won't need emulation. */
1920 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1921 target_flags &= ~MASK_NO_FANCY_MATH_387;
1922
1923 /* Likewise, if the target doesn't have a 387, or we've specified
1924 software floating point, don't use 387 inline intrinsics. */
1925 if (!TARGET_80387)
1926 target_flags |= MASK_NO_FANCY_MATH_387;
1927
1928 /* Turn on SSE3 builtins for -mssse3. */
1929 if (TARGET_SSSE3)
1930 target_flags |= MASK_SSE3;
1931
1932 /* Turn on SSE2 builtins for -msse3. */
1933 if (TARGET_SSE3)
1934 target_flags |= MASK_SSE2;
1935
1936 /* Turn on SSE builtins for -msse2. */
1937 if (TARGET_SSE2)
1938 target_flags |= MASK_SSE;
1939
1940 /* Turn on MMX builtins for -msse. */
1941 if (TARGET_SSE)
1942 {
1943 target_flags |= MASK_MMX & ~target_flags_explicit;
1944 x86_prefetch_sse = true;
1945 }
1946
1947 /* Turn on MMX builtins for 3Dnow. */
1948 if (TARGET_3DNOW)
1949 target_flags |= MASK_MMX;
1950
1951 if (TARGET_64BIT)
1952 {
1953 if (TARGET_ALIGN_DOUBLE)
1954 error ("-malign-double makes no sense in the 64bit mode");
1955 if (TARGET_RTD)
1956 error ("-mrtd calling convention not supported in the 64bit mode");
1957
1958 /* Enable by default the SSE and MMX builtins. Do allow the user to
1959 explicitly disable any of these. In particular, disabling SSE and
1960 MMX for kernel code is extremely useful. */
1961 target_flags
1962 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1963 & ~target_flags_explicit);
1964 }
1965 else
1966 {
1967 /* i386 ABI does not specify red zone. It still makes sense to use it
1968 when programmer takes care to stack from being destroyed. */
1969 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1970 target_flags |= MASK_NO_RED_ZONE;
1971 }
1972
1973 /* Validate -mpreferred-stack-boundary= value, or provide default.
1974 The default of 128 bits is for Pentium III's SSE __m128. We can't
1975 change it because of optimize_size. Otherwise, we can't mix object
1976 files compiled with -Os and -On. */
1977 ix86_preferred_stack_boundary = 128;
1978 if (ix86_preferred_stack_boundary_string)
1979 {
1980 i = atoi (ix86_preferred_stack_boundary_string);
1981 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1982 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1983 TARGET_64BIT ? 4 : 2);
1984 else
1985 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1986 }
1987
1988 /* Accept -msseregparm only if at least SSE support is enabled. */
1989 if (TARGET_SSEREGPARM
1990 && ! TARGET_SSE)
1991 error ("-msseregparm used without SSE enabled");
1992
1993 ix86_fpmath = TARGET_FPMATH_DEFAULT;
1994
1995 if (ix86_fpmath_string != 0)
1996 {
1997 if (! strcmp (ix86_fpmath_string, "387"))
1998 ix86_fpmath = FPMATH_387;
1999 else if (! strcmp (ix86_fpmath_string, "sse"))
2000 {
2001 if (!TARGET_SSE)
2002 {
2003 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2004 ix86_fpmath = FPMATH_387;
2005 }
2006 else
2007 ix86_fpmath = FPMATH_SSE;
2008 }
2009 else if (! strcmp (ix86_fpmath_string, "387,sse")
2010 || ! strcmp (ix86_fpmath_string, "sse,387"))
2011 {
2012 if (!TARGET_SSE)
2013 {
2014 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2015 ix86_fpmath = FPMATH_387;
2016 }
2017 else if (!TARGET_80387)
2018 {
2019 warning (0, "387 instruction set disabled, using SSE arithmetics");
2020 ix86_fpmath = FPMATH_SSE;
2021 }
2022 else
2023 ix86_fpmath = FPMATH_SSE | FPMATH_387;
2024 }
2025 else
2026 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2027 }
2028
2029 /* If the i387 is disabled, then do not return values in it. */
2030 if (!TARGET_80387)
2031 target_flags &= ~MASK_FLOAT_RETURNS;
2032
2033 if ((x86_accumulate_outgoing_args & TUNEMASK)
2034 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2035 && !optimize_size)
2036 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2037
2038 /* ??? Unwind info is not correct around the CFG unless either a frame
2039 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2040 unwind info generation to be aware of the CFG and propagating states
2041 around edges. */
2042 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2043 || flag_exceptions || flag_non_call_exceptions)
2044 && flag_omit_frame_pointer
2045 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2046 {
2047 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2048 warning (0, "unwind tables currently require either a frame pointer "
2049 "or -maccumulate-outgoing-args for correctness");
2050 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2051 }
2052
2053 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2054 {
2055 char *p;
2056 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2057 p = strchr (internal_label_prefix, 'X');
2058 internal_label_prefix_len = p - internal_label_prefix;
2059 *p = '\0';
2060 }
2061
2062 /* When scheduling description is not available, disable scheduler pass
2063 so it won't slow down the compilation and make x87 code slower. */
2064 if (!TARGET_SCHEDULE)
2065 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2066 }
2067 \f
2068 /* switch to the appropriate section for output of DECL.
2069 DECL is either a `VAR_DECL' node or a constant of some sort.
2070 RELOC indicates whether forming the initial value of DECL requires
2071 link-time relocations. */
2072
2073 static section *
2074 x86_64_elf_select_section (tree decl, int reloc,
2075 unsigned HOST_WIDE_INT align)
2076 {
2077 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2078 && ix86_in_large_data_p (decl))
2079 {
2080 const char *sname = NULL;
2081 unsigned int flags = SECTION_WRITE;
2082 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2083 {
2084 case SECCAT_DATA:
2085 sname = ".ldata";
2086 break;
2087 case SECCAT_DATA_REL:
2088 sname = ".ldata.rel";
2089 break;
2090 case SECCAT_DATA_REL_LOCAL:
2091 sname = ".ldata.rel.local";
2092 break;
2093 case SECCAT_DATA_REL_RO:
2094 sname = ".ldata.rel.ro";
2095 break;
2096 case SECCAT_DATA_REL_RO_LOCAL:
2097 sname = ".ldata.rel.ro.local";
2098 break;
2099 case SECCAT_BSS:
2100 sname = ".lbss";
2101 flags |= SECTION_BSS;
2102 break;
2103 case SECCAT_RODATA:
2104 case SECCAT_RODATA_MERGE_STR:
2105 case SECCAT_RODATA_MERGE_STR_INIT:
2106 case SECCAT_RODATA_MERGE_CONST:
2107 sname = ".lrodata";
2108 flags = 0;
2109 break;
2110 case SECCAT_SRODATA:
2111 case SECCAT_SDATA:
2112 case SECCAT_SBSS:
2113 gcc_unreachable ();
2114 case SECCAT_TEXT:
2115 case SECCAT_TDATA:
2116 case SECCAT_TBSS:
2117 /* We don't split these for medium model. Place them into
2118 default sections and hope for best. */
2119 break;
2120 }
2121 if (sname)
2122 {
2123 /* We might get called with string constants, but get_named_section
2124 doesn't like them as they are not DECLs. Also, we need to set
2125 flags in that case. */
2126 if (!DECL_P (decl))
2127 return get_section (sname, flags, NULL);
2128 return get_named_section (decl, sname, reloc);
2129 }
2130 }
2131 return default_elf_select_section (decl, reloc, align);
2132 }
2133
2134 /* Build up a unique section name, expressed as a
2135 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2136 RELOC indicates whether the initial value of EXP requires
2137 link-time relocations. */
2138
2139 static void
2140 x86_64_elf_unique_section (tree decl, int reloc)
2141 {
2142 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2143 && ix86_in_large_data_p (decl))
2144 {
2145 const char *prefix = NULL;
2146 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2147 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2148
2149 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2150 {
2151 case SECCAT_DATA:
2152 case SECCAT_DATA_REL:
2153 case SECCAT_DATA_REL_LOCAL:
2154 case SECCAT_DATA_REL_RO:
2155 case SECCAT_DATA_REL_RO_LOCAL:
2156 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2157 break;
2158 case SECCAT_BSS:
2159 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2160 break;
2161 case SECCAT_RODATA:
2162 case SECCAT_RODATA_MERGE_STR:
2163 case SECCAT_RODATA_MERGE_STR_INIT:
2164 case SECCAT_RODATA_MERGE_CONST:
2165 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2166 break;
2167 case SECCAT_SRODATA:
2168 case SECCAT_SDATA:
2169 case SECCAT_SBSS:
2170 gcc_unreachable ();
2171 case SECCAT_TEXT:
2172 case SECCAT_TDATA:
2173 case SECCAT_TBSS:
2174 /* We don't split these for medium model. Place them into
2175 default sections and hope for best. */
2176 break;
2177 }
2178 if (prefix)
2179 {
2180 const char *name;
2181 size_t nlen, plen;
2182 char *string;
2183 plen = strlen (prefix);
2184
2185 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2186 name = targetm.strip_name_encoding (name);
2187 nlen = strlen (name);
2188
2189 string = alloca (nlen + plen + 1);
2190 memcpy (string, prefix, plen);
2191 memcpy (string + plen, name, nlen + 1);
2192
2193 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2194 return;
2195 }
2196 }
2197 default_unique_section (decl, reloc);
2198 }
2199
2200 #ifdef COMMON_ASM_OP
2201 /* This says how to output assembler code to declare an
2202 uninitialized external linkage data object.
2203
2204 For medium model x86-64 we need to use .largecomm opcode for
2205 large objects. */
2206 void
2207 x86_elf_aligned_common (FILE *file,
2208 const char *name, unsigned HOST_WIDE_INT size,
2209 int align)
2210 {
2211 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2212 && size > (unsigned int)ix86_section_threshold)
2213 fprintf (file, ".largecomm\t");
2214 else
2215 fprintf (file, "%s", COMMON_ASM_OP);
2216 assemble_name (file, name);
2217 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2218 size, align / BITS_PER_UNIT);
2219 }
2220
2221 /* Utility function for targets to use in implementing
2222 ASM_OUTPUT_ALIGNED_BSS. */
2223
2224 void
2225 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2226 const char *name, unsigned HOST_WIDE_INT size,
2227 int align)
2228 {
2229 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2230 && size > (unsigned int)ix86_section_threshold)
2231 switch_to_section (get_named_section (decl, ".lbss", 0));
2232 else
2233 switch_to_section (bss_section);
2234 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2235 #ifdef ASM_DECLARE_OBJECT_NAME
2236 last_assemble_variable_decl = decl;
2237 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2238 #else
2239 /* Standard thing is just output label for the object. */
2240 ASM_OUTPUT_LABEL (file, name);
2241 #endif /* ASM_DECLARE_OBJECT_NAME */
2242 ASM_OUTPUT_SKIP (file, size ? size : 1);
2243 }
2244 #endif
2245 \f
2246 void
2247 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2248 {
2249 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2250 make the problem with not enough registers even worse. */
2251 #ifdef INSN_SCHEDULING
2252 if (level > 1)
2253 flag_schedule_insns = 0;
2254 #endif
2255
2256 if (TARGET_MACHO)
2257 /* The Darwin libraries never set errno, so we might as well
2258 avoid calling them when that's the only reason we would. */
2259 flag_errno_math = 0;
2260
2261 /* The default values of these switches depend on the TARGET_64BIT
2262 that is not known at this moment. Mark these values with 2 and
2263 let user the to override these. In case there is no command line option
2264 specifying them, we will set the defaults in override_options. */
2265 if (optimize >= 1)
2266 flag_omit_frame_pointer = 2;
2267 flag_pcc_struct_return = 2;
2268 flag_asynchronous_unwind_tables = 2;
2269 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2270 SUBTARGET_OPTIMIZATION_OPTIONS;
2271 #endif
2272 }
2273 \f
2274 /* Table of valid machine attributes. */
2275 const struct attribute_spec ix86_attribute_table[] =
2276 {
2277 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2278 /* Stdcall attribute says callee is responsible for popping arguments
2279 if they are not variable. */
2280 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2281 /* Fastcall attribute says callee is responsible for popping arguments
2282 if they are not variable. */
2283 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2284 /* Cdecl attribute says the callee is a normal C declaration */
2285 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2286 /* Regparm attribute specifies how many integer arguments are to be
2287 passed in registers. */
2288 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2289 /* Sseregparm attribute says we are using x86_64 calling conventions
2290 for FP arguments. */
2291 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2292 /* force_align_arg_pointer says this function realigns the stack at entry. */
2293 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2294 false, true, true, ix86_handle_cconv_attribute },
2295 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2296 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2297 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2298 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2299 #endif
2300 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2301 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2302 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2303 SUBTARGET_ATTRIBUTE_TABLE,
2304 #endif
2305 { NULL, 0, 0, false, false, false, NULL }
2306 };
2307
2308 /* Decide whether we can make a sibling call to a function. DECL is the
2309 declaration of the function being targeted by the call and EXP is the
2310 CALL_EXPR representing the call. */
2311
2312 static bool
2313 ix86_function_ok_for_sibcall (tree decl, tree exp)
2314 {
2315 tree func;
2316 rtx a, b;
2317
2318 /* If we are generating position-independent code, we cannot sibcall
2319 optimize any indirect call, or a direct call to a global function,
2320 as the PLT requires %ebx be live. */
2321 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2322 return false;
2323
2324 if (decl)
2325 func = decl;
2326 else
2327 {
2328 func = TREE_TYPE (TREE_OPERAND (exp, 0));
2329 if (POINTER_TYPE_P (func))
2330 func = TREE_TYPE (func);
2331 }
2332
2333 /* Check that the return value locations are the same. Like
2334 if we are returning floats on the 80387 register stack, we cannot
2335 make a sibcall from a function that doesn't return a float to a
2336 function that does or, conversely, from a function that does return
2337 a float to a function that doesn't; the necessary stack adjustment
2338 would not be executed. This is also the place we notice
2339 differences in the return value ABI. Note that it is ok for one
2340 of the functions to have void return type as long as the return
2341 value of the other is passed in a register. */
2342 a = ix86_function_value (TREE_TYPE (exp), func, false);
2343 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2344 cfun->decl, false);
2345 if (STACK_REG_P (a) || STACK_REG_P (b))
2346 {
2347 if (!rtx_equal_p (a, b))
2348 return false;
2349 }
2350 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2351 ;
2352 else if (!rtx_equal_p (a, b))
2353 return false;
2354
2355 /* If this call is indirect, we'll need to be able to use a call-clobbered
2356 register for the address of the target function. Make sure that all
2357 such registers are not used for passing parameters. */
2358 if (!decl && !TARGET_64BIT)
2359 {
2360 tree type;
2361
2362 /* We're looking at the CALL_EXPR, we need the type of the function. */
2363 type = TREE_OPERAND (exp, 0); /* pointer expression */
2364 type = TREE_TYPE (type); /* pointer type */
2365 type = TREE_TYPE (type); /* function type */
2366
2367 if (ix86_function_regparm (type, NULL) >= 3)
2368 {
2369 /* ??? Need to count the actual number of registers to be used,
2370 not the possible number of registers. Fix later. */
2371 return false;
2372 }
2373 }
2374
2375 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2376 /* Dllimport'd functions are also called indirectly. */
2377 if (decl && DECL_DLLIMPORT_P (decl)
2378 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2379 return false;
2380 #endif
2381
2382 /* If we forced aligned the stack, then sibcalling would unalign the
2383 stack, which may break the called function. */
2384 if (cfun->machine->force_align_arg_pointer)
2385 return false;
2386
2387 /* Otherwise okay. That also includes certain types of indirect calls. */
2388 return true;
2389 }
2390
2391 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2392 calling convention attributes;
2393 arguments as in struct attribute_spec.handler. */
2394
2395 static tree
2396 ix86_handle_cconv_attribute (tree *node, tree name,
2397 tree args,
2398 int flags ATTRIBUTE_UNUSED,
2399 bool *no_add_attrs)
2400 {
2401 if (TREE_CODE (*node) != FUNCTION_TYPE
2402 && TREE_CODE (*node) != METHOD_TYPE
2403 && TREE_CODE (*node) != FIELD_DECL
2404 && TREE_CODE (*node) != TYPE_DECL)
2405 {
2406 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2407 IDENTIFIER_POINTER (name));
2408 *no_add_attrs = true;
2409 return NULL_TREE;
2410 }
2411
2412 /* Can combine regparm with all attributes but fastcall. */
2413 if (is_attribute_p ("regparm", name))
2414 {
2415 tree cst;
2416
2417 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2418 {
2419 error ("fastcall and regparm attributes are not compatible");
2420 }
2421
2422 cst = TREE_VALUE (args);
2423 if (TREE_CODE (cst) != INTEGER_CST)
2424 {
2425 warning (OPT_Wattributes,
2426 "%qs attribute requires an integer constant argument",
2427 IDENTIFIER_POINTER (name));
2428 *no_add_attrs = true;
2429 }
2430 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2431 {
2432 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2433 IDENTIFIER_POINTER (name), REGPARM_MAX);
2434 *no_add_attrs = true;
2435 }
2436
2437 if (!TARGET_64BIT
2438 && lookup_attribute (ix86_force_align_arg_pointer_string,
2439 TYPE_ATTRIBUTES (*node))
2440 && compare_tree_int (cst, REGPARM_MAX-1))
2441 {
2442 error ("%s functions limited to %d register parameters",
2443 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2444 }
2445
2446 return NULL_TREE;
2447 }
2448
2449 if (TARGET_64BIT)
2450 {
2451 warning (OPT_Wattributes, "%qs attribute ignored",
2452 IDENTIFIER_POINTER (name));
2453 *no_add_attrs = true;
2454 return NULL_TREE;
2455 }
2456
2457 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2458 if (is_attribute_p ("fastcall", name))
2459 {
2460 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2461 {
2462 error ("fastcall and cdecl attributes are not compatible");
2463 }
2464 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2465 {
2466 error ("fastcall and stdcall attributes are not compatible");
2467 }
2468 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2469 {
2470 error ("fastcall and regparm attributes are not compatible");
2471 }
2472 }
2473
2474 /* Can combine stdcall with fastcall (redundant), regparm and
2475 sseregparm. */
2476 else if (is_attribute_p ("stdcall", name))
2477 {
2478 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2479 {
2480 error ("stdcall and cdecl attributes are not compatible");
2481 }
2482 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2483 {
2484 error ("stdcall and fastcall attributes are not compatible");
2485 }
2486 }
2487
2488 /* Can combine cdecl with regparm and sseregparm. */
2489 else if (is_attribute_p ("cdecl", name))
2490 {
2491 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2492 {
2493 error ("stdcall and cdecl attributes are not compatible");
2494 }
2495 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2496 {
2497 error ("fastcall and cdecl attributes are not compatible");
2498 }
2499 }
2500
2501 /* Can combine sseregparm with all attributes. */
2502
2503 return NULL_TREE;
2504 }
2505
2506 /* Return 0 if the attributes for two types are incompatible, 1 if they
2507 are compatible, and 2 if they are nearly compatible (which causes a
2508 warning to be generated). */
2509
2510 static int
2511 ix86_comp_type_attributes (tree type1, tree type2)
2512 {
2513 /* Check for mismatch of non-default calling convention. */
2514 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2515
2516 if (TREE_CODE (type1) != FUNCTION_TYPE)
2517 return 1;
2518
2519 /* Check for mismatched fastcall/regparm types. */
2520 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2521 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2522 || (ix86_function_regparm (type1, NULL)
2523 != ix86_function_regparm (type2, NULL)))
2524 return 0;
2525
2526 /* Check for mismatched sseregparm types. */
2527 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2528 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2529 return 0;
2530
2531 /* Check for mismatched return types (cdecl vs stdcall). */
2532 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2533 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2534 return 0;
2535
2536 return 1;
2537 }
2538 \f
2539 /* Return the regparm value for a function with the indicated TYPE and DECL.
2540 DECL may be NULL when calling function indirectly
2541 or considering a libcall. */
2542
2543 static int
2544 ix86_function_regparm (tree type, tree decl)
2545 {
2546 tree attr;
2547 int regparm = ix86_regparm;
2548 bool user_convention = false;
2549
2550 if (!TARGET_64BIT)
2551 {
2552 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2553 if (attr)
2554 {
2555 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2556 user_convention = true;
2557 }
2558
2559 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2560 {
2561 regparm = 2;
2562 user_convention = true;
2563 }
2564
2565 /* Use register calling convention for local functions when possible. */
2566 if (!TARGET_64BIT && !user_convention && decl
2567 && flag_unit_at_a_time && !profile_flag)
2568 {
2569 struct cgraph_local_info *i = cgraph_local_info (decl);
2570 if (i && i->local)
2571 {
2572 int local_regparm, globals = 0, regno;
2573
2574 /* Make sure no regparm register is taken by a global register
2575 variable. */
2576 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2577 if (global_regs[local_regparm])
2578 break;
2579 /* We can't use regparm(3) for nested functions as these use
2580 static chain pointer in third argument. */
2581 if (local_regparm == 3
2582 && decl_function_context (decl)
2583 && !DECL_NO_STATIC_CHAIN (decl))
2584 local_regparm = 2;
2585 /* If the function realigns its stackpointer, the
2586 prologue will clobber %ecx. If we've already
2587 generated code for the callee, the callee
2588 DECL_STRUCT_FUNCTION is gone, so we fall back to
2589 scanning the attributes for the self-realigning
2590 property. */
2591 if ((DECL_STRUCT_FUNCTION (decl)
2592 && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
2593 || (!DECL_STRUCT_FUNCTION (decl)
2594 && lookup_attribute (ix86_force_align_arg_pointer_string,
2595 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2596 local_regparm = 2;
2597 /* Each global register variable increases register preassure,
2598 so the more global reg vars there are, the smaller regparm
2599 optimization use, unless requested by the user explicitly. */
2600 for (regno = 0; regno < 6; regno++)
2601 if (global_regs[regno])
2602 globals++;
2603 local_regparm
2604 = globals < local_regparm ? local_regparm - globals : 0;
2605
2606 if (local_regparm > regparm)
2607 regparm = local_regparm;
2608 }
2609 }
2610 }
2611 return regparm;
2612 }
2613
2614 /* Return 1 or 2, if we can pass up to 8 SFmode (1) and DFmode (2) arguments
2615 in SSE registers for a function with the indicated TYPE and DECL.
2616 DECL may be NULL when calling function indirectly
2617 or considering a libcall. Otherwise return 0. */
2618
2619 static int
2620 ix86_function_sseregparm (tree type, tree decl)
2621 {
2622 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2623 by the sseregparm attribute. */
2624 if (TARGET_SSEREGPARM
2625 || (type
2626 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2627 {
2628 if (!TARGET_SSE)
2629 {
2630 if (decl)
2631 error ("Calling %qD with attribute sseregparm without "
2632 "SSE/SSE2 enabled", decl);
2633 else
2634 error ("Calling %qT with attribute sseregparm without "
2635 "SSE/SSE2 enabled", type);
2636 return 0;
2637 }
2638
2639 return 2;
2640 }
2641
2642 /* For local functions, pass SFmode (and DFmode for SSE2) arguments
2643 in SSE registers even for 32-bit mode and not just 3, but up to
2644 8 SSE arguments in registers. */
2645 if (!TARGET_64BIT && decl
2646 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2647 {
2648 struct cgraph_local_info *i = cgraph_local_info (decl);
2649 if (i && i->local)
2650 return TARGET_SSE2 ? 2 : 1;
2651 }
2652
2653 return 0;
2654 }
2655
2656 /* Return true if EAX is live at the start of the function. Used by
2657 ix86_expand_prologue to determine if we need special help before
2658 calling allocate_stack_worker. */
2659
2660 static bool
2661 ix86_eax_live_at_start_p (void)
2662 {
2663 /* Cheat. Don't bother working forward from ix86_function_regparm
2664 to the function type to whether an actual argument is located in
2665 eax. Instead just look at cfg info, which is still close enough
2666 to correct at this point. This gives false positives for broken
2667 functions that might use uninitialized data that happens to be
2668 allocated in eax, but who cares? */
2669 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2670 }
2671
2672 /* Value is the number of bytes of arguments automatically
2673 popped when returning from a subroutine call.
2674 FUNDECL is the declaration node of the function (as a tree),
2675 FUNTYPE is the data type of the function (as a tree),
2676 or for a library call it is an identifier node for the subroutine name.
2677 SIZE is the number of bytes of arguments passed on the stack.
2678
2679 On the 80386, the RTD insn may be used to pop them if the number
2680 of args is fixed, but if the number is variable then the caller
2681 must pop them all. RTD can't be used for library calls now
2682 because the library is compiled with the Unix compiler.
2683 Use of RTD is a selectable option, since it is incompatible with
2684 standard Unix calling sequences. If the option is not selected,
2685 the caller must always pop the args.
2686
2687 The attribute stdcall is equivalent to RTD on a per module basis. */
2688
2689 int
2690 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2691 {
2692 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2693
2694 /* Cdecl functions override -mrtd, and never pop the stack. */
2695 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2696
2697 /* Stdcall and fastcall functions will pop the stack if not
2698 variable args. */
2699 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2700 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2701 rtd = 1;
2702
2703 if (rtd
2704 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2705 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2706 == void_type_node)))
2707 return size;
2708 }
2709
2710 /* Lose any fake structure return argument if it is passed on the stack. */
2711 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2712 && !TARGET_64BIT
2713 && !KEEP_AGGREGATE_RETURN_POINTER)
2714 {
2715 int nregs = ix86_function_regparm (funtype, fundecl);
2716
2717 if (!nregs)
2718 return GET_MODE_SIZE (Pmode);
2719 }
2720
2721 return 0;
2722 }
2723 \f
2724 /* Argument support functions. */
2725
2726 /* Return true when register may be used to pass function parameters. */
2727 bool
2728 ix86_function_arg_regno_p (int regno)
2729 {
2730 int i;
2731 if (!TARGET_64BIT)
2732 return (regno < REGPARM_MAX
2733 || (TARGET_MMX && MMX_REGNO_P (regno)
2734 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2735 || (TARGET_SSE && SSE_REGNO_P (regno)
2736 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2737
2738 if (TARGET_SSE && SSE_REGNO_P (regno)
2739 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2740 return true;
2741 /* RAX is used as hidden argument to va_arg functions. */
2742 if (!regno)
2743 return true;
2744 for (i = 0; i < REGPARM_MAX; i++)
2745 if (regno == x86_64_int_parameter_registers[i])
2746 return true;
2747 return false;
2748 }
2749
2750 /* Return if we do not know how to pass TYPE solely in registers. */
2751
2752 static bool
2753 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2754 {
2755 if (must_pass_in_stack_var_size_or_pad (mode, type))
2756 return true;
2757
2758 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2759 The layout_type routine is crafty and tries to trick us into passing
2760 currently unsupported vector types on the stack by using TImode. */
2761 return (!TARGET_64BIT && mode == TImode
2762 && type && TREE_CODE (type) != VECTOR_TYPE);
2763 }
2764
2765 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2766 for a call to a function whose data type is FNTYPE.
2767 For a library call, FNTYPE is 0. */
2768
2769 void
2770 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
2771 tree fntype, /* tree ptr for function decl */
2772 rtx libname, /* SYMBOL_REF of library name or 0 */
2773 tree fndecl)
2774 {
2775 static CUMULATIVE_ARGS zero_cum;
2776 tree param, next_param;
2777
2778 if (TARGET_DEBUG_ARG)
2779 {
2780 fprintf (stderr, "\ninit_cumulative_args (");
2781 if (fntype)
2782 fprintf (stderr, "fntype code = %s, ret code = %s",
2783 tree_code_name[(int) TREE_CODE (fntype)],
2784 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
2785 else
2786 fprintf (stderr, "no fntype");
2787
2788 if (libname)
2789 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
2790 }
2791
2792 *cum = zero_cum;
2793
2794 /* Set up the number of registers to use for passing arguments. */
2795 cum->nregs = ix86_regparm;
2796 if (TARGET_SSE)
2797 cum->sse_nregs = SSE_REGPARM_MAX;
2798 if (TARGET_MMX)
2799 cum->mmx_nregs = MMX_REGPARM_MAX;
2800 cum->warn_sse = true;
2801 cum->warn_mmx = true;
2802 cum->maybe_vaarg = false;
2803
2804 /* Use ecx and edx registers if function has fastcall attribute,
2805 else look for regparm information. */
2806 if (fntype && !TARGET_64BIT)
2807 {
2808 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2809 {
2810 cum->nregs = 2;
2811 cum->fastcall = 1;
2812 }
2813 else
2814 cum->nregs = ix86_function_regparm (fntype, fndecl);
2815 }
2816
2817 /* Set up the number of SSE registers used for passing SFmode
2818 and DFmode arguments. Warn for mismatching ABI. */
2819 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
2820
2821 /* Determine if this function has variable arguments. This is
2822 indicated by the last argument being 'void_type_mode' if there
2823 are no variable arguments. If there are variable arguments, then
2824 we won't pass anything in registers in 32-bit mode. */
2825
2826 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2827 {
2828 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2829 param != 0; param = next_param)
2830 {
2831 next_param = TREE_CHAIN (param);
2832 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2833 {
2834 if (!TARGET_64BIT)
2835 {
2836 cum->nregs = 0;
2837 cum->sse_nregs = 0;
2838 cum->mmx_nregs = 0;
2839 cum->warn_sse = 0;
2840 cum->warn_mmx = 0;
2841 cum->fastcall = 0;
2842 cum->float_in_sse = 0;
2843 }
2844 cum->maybe_vaarg = true;
2845 }
2846 }
2847 }
2848 if ((!fntype && !libname)
2849 || (fntype && !TYPE_ARG_TYPES (fntype)))
2850 cum->maybe_vaarg = true;
2851
2852 if (TARGET_DEBUG_ARG)
2853 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2854
2855 return;
2856 }
2857
2858 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2859 But in the case of vector types, it is some vector mode.
2860
2861 When we have only some of our vector isa extensions enabled, then there
2862 are some modes for which vector_mode_supported_p is false. For these
2863 modes, the generic vector support in gcc will choose some non-vector mode
2864 in order to implement the type. By computing the natural mode, we'll
2865 select the proper ABI location for the operand and not depend on whatever
2866 the middle-end decides to do with these vector types. */
2867
2868 static enum machine_mode
2869 type_natural_mode (tree type)
2870 {
2871 enum machine_mode mode = TYPE_MODE (type);
2872
2873 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2874 {
2875 HOST_WIDE_INT size = int_size_in_bytes (type);
2876 if ((size == 8 || size == 16)
2877 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2878 && TYPE_VECTOR_SUBPARTS (type) > 1)
2879 {
2880 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2881
2882 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2883 mode = MIN_MODE_VECTOR_FLOAT;
2884 else
2885 mode = MIN_MODE_VECTOR_INT;
2886
2887 /* Get the mode which has this inner mode and number of units. */
2888 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2889 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2890 && GET_MODE_INNER (mode) == innermode)
2891 return mode;
2892
2893 gcc_unreachable ();
2894 }
2895 }
2896
2897 return mode;
2898 }
2899
2900 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2901 this may not agree with the mode that the type system has chosen for the
2902 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2903 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2904
2905 static rtx
2906 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2907 unsigned int regno)
2908 {
2909 rtx tmp;
2910
2911 if (orig_mode != BLKmode)
2912 tmp = gen_rtx_REG (orig_mode, regno);
2913 else
2914 {
2915 tmp = gen_rtx_REG (mode, regno);
2916 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2917 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2918 }
2919
2920 return tmp;
2921 }
2922
2923 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2924 of this code is to classify each 8bytes of incoming argument by the register
2925 class and assign registers accordingly. */
2926
2927 /* Return the union class of CLASS1 and CLASS2.
2928 See the x86-64 PS ABI for details. */
2929
2930 static enum x86_64_reg_class
2931 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2932 {
2933 /* Rule #1: If both classes are equal, this is the resulting class. */
2934 if (class1 == class2)
2935 return class1;
2936
2937 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2938 the other class. */
2939 if (class1 == X86_64_NO_CLASS)
2940 return class2;
2941 if (class2 == X86_64_NO_CLASS)
2942 return class1;
2943
2944 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2945 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2946 return X86_64_MEMORY_CLASS;
2947
2948 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2949 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2950 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2951 return X86_64_INTEGERSI_CLASS;
2952 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2953 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2954 return X86_64_INTEGER_CLASS;
2955
2956 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2957 MEMORY is used. */
2958 if (class1 == X86_64_X87_CLASS
2959 || class1 == X86_64_X87UP_CLASS
2960 || class1 == X86_64_COMPLEX_X87_CLASS
2961 || class2 == X86_64_X87_CLASS
2962 || class2 == X86_64_X87UP_CLASS
2963 || class2 == X86_64_COMPLEX_X87_CLASS)
2964 return X86_64_MEMORY_CLASS;
2965
2966 /* Rule #6: Otherwise class SSE is used. */
2967 return X86_64_SSE_CLASS;
2968 }
2969
2970 /* Classify the argument of type TYPE and mode MODE.
2971 CLASSES will be filled by the register class used to pass each word
2972 of the operand. The number of words is returned. In case the parameter
2973 should be passed in memory, 0 is returned. As a special case for zero
2974 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2975
2976 BIT_OFFSET is used internally for handling records and specifies offset
2977 of the offset in bits modulo 256 to avoid overflow cases.
2978
2979 See the x86-64 PS ABI for details.
2980 */
2981
2982 static int
2983 classify_argument (enum machine_mode mode, tree type,
2984 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2985 {
2986 HOST_WIDE_INT bytes =
2987 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2988 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2989
2990 /* Variable sized entities are always passed/returned in memory. */
2991 if (bytes < 0)
2992 return 0;
2993
2994 if (mode != VOIDmode
2995 && targetm.calls.must_pass_in_stack (mode, type))
2996 return 0;
2997
2998 if (type && AGGREGATE_TYPE_P (type))
2999 {
3000 int i;
3001 tree field;
3002 enum x86_64_reg_class subclasses[MAX_CLASSES];
3003
3004 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3005 if (bytes > 16)
3006 return 0;
3007
3008 for (i = 0; i < words; i++)
3009 classes[i] = X86_64_NO_CLASS;
3010
3011 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3012 signalize memory class, so handle it as special case. */
3013 if (!words)
3014 {
3015 classes[0] = X86_64_NO_CLASS;
3016 return 1;
3017 }
3018
3019 /* Classify each field of record and merge classes. */
3020 switch (TREE_CODE (type))
3021 {
3022 case RECORD_TYPE:
3023 /* And now merge the fields of structure. */
3024 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3025 {
3026 if (TREE_CODE (field) == FIELD_DECL)
3027 {
3028 int num;
3029
3030 if (TREE_TYPE (field) == error_mark_node)
3031 continue;
3032
3033 /* Bitfields are always classified as integer. Handle them
3034 early, since later code would consider them to be
3035 misaligned integers. */
3036 if (DECL_BIT_FIELD (field))
3037 {
3038 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3039 i < ((int_bit_position (field) + (bit_offset % 64))
3040 + tree_low_cst (DECL_SIZE (field), 0)
3041 + 63) / 8 / 8; i++)
3042 classes[i] =
3043 merge_classes (X86_64_INTEGER_CLASS,
3044 classes[i]);
3045 }
3046 else
3047 {
3048 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3049 TREE_TYPE (field), subclasses,
3050 (int_bit_position (field)
3051 + bit_offset) % 256);
3052 if (!num)
3053 return 0;
3054 for (i = 0; i < num; i++)
3055 {
3056 int pos =
3057 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3058 classes[i + pos] =
3059 merge_classes (subclasses[i], classes[i + pos]);
3060 }
3061 }
3062 }
3063 }
3064 break;
3065
3066 case ARRAY_TYPE:
3067 /* Arrays are handled as small records. */
3068 {
3069 int num;
3070 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3071 TREE_TYPE (type), subclasses, bit_offset);
3072 if (!num)
3073 return 0;
3074
3075 /* The partial classes are now full classes. */
3076 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3077 subclasses[0] = X86_64_SSE_CLASS;
3078 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3079 subclasses[0] = X86_64_INTEGER_CLASS;
3080
3081 for (i = 0; i < words; i++)
3082 classes[i] = subclasses[i % num];
3083
3084 break;
3085 }
3086 case UNION_TYPE:
3087 case QUAL_UNION_TYPE:
3088 /* Unions are similar to RECORD_TYPE but offset is always 0.
3089 */
3090 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3091 {
3092 if (TREE_CODE (field) == FIELD_DECL)
3093 {
3094 int num;
3095
3096 if (TREE_TYPE (field) == error_mark_node)
3097 continue;
3098
3099 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3100 TREE_TYPE (field), subclasses,
3101 bit_offset);
3102 if (!num)
3103 return 0;
3104 for (i = 0; i < num; i++)
3105 classes[i] = merge_classes (subclasses[i], classes[i]);
3106 }
3107 }
3108 break;
3109
3110 default:
3111 gcc_unreachable ();
3112 }
3113
3114 /* Final merger cleanup. */
3115 for (i = 0; i < words; i++)
3116 {
3117 /* If one class is MEMORY, everything should be passed in
3118 memory. */
3119 if (classes[i] == X86_64_MEMORY_CLASS)
3120 return 0;
3121
3122 /* The X86_64_SSEUP_CLASS should be always preceded by
3123 X86_64_SSE_CLASS. */
3124 if (classes[i] == X86_64_SSEUP_CLASS
3125 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3126 classes[i] = X86_64_SSE_CLASS;
3127
3128 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3129 if (classes[i] == X86_64_X87UP_CLASS
3130 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3131 classes[i] = X86_64_SSE_CLASS;
3132 }
3133 return words;
3134 }
3135
3136 /* Compute alignment needed. We align all types to natural boundaries with
3137 exception of XFmode that is aligned to 64bits. */
3138 if (mode != VOIDmode && mode != BLKmode)
3139 {
3140 int mode_alignment = GET_MODE_BITSIZE (mode);
3141
3142 if (mode == XFmode)
3143 mode_alignment = 128;
3144 else if (mode == XCmode)
3145 mode_alignment = 256;
3146 if (COMPLEX_MODE_P (mode))
3147 mode_alignment /= 2;
3148 /* Misaligned fields are always returned in memory. */
3149 if (bit_offset % mode_alignment)
3150 return 0;
3151 }
3152
3153 /* for V1xx modes, just use the base mode */
3154 if (VECTOR_MODE_P (mode)
3155 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3156 mode = GET_MODE_INNER (mode);
3157
3158 /* Classification of atomic types. */
3159 switch (mode)
3160 {
3161 case SDmode:
3162 case DDmode:
3163 classes[0] = X86_64_SSE_CLASS;
3164 return 1;
3165 case TDmode:
3166 classes[0] = X86_64_SSE_CLASS;
3167 classes[1] = X86_64_SSEUP_CLASS;
3168 return 2;
3169 case DImode:
3170 case SImode:
3171 case HImode:
3172 case QImode:
3173 case CSImode:
3174 case CHImode:
3175 case CQImode:
3176 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3177 classes[0] = X86_64_INTEGERSI_CLASS;
3178 else
3179 classes[0] = X86_64_INTEGER_CLASS;
3180 return 1;
3181 case CDImode:
3182 case TImode:
3183 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3184 return 2;
3185 case CTImode:
3186 return 0;
3187 case SFmode:
3188 if (!(bit_offset % 64))
3189 classes[0] = X86_64_SSESF_CLASS;
3190 else
3191 classes[0] = X86_64_SSE_CLASS;
3192 return 1;
3193 case DFmode:
3194 classes[0] = X86_64_SSEDF_CLASS;
3195 return 1;
3196 case XFmode:
3197 classes[0] = X86_64_X87_CLASS;
3198 classes[1] = X86_64_X87UP_CLASS;
3199 return 2;
3200 case TFmode:
3201 classes[0] = X86_64_SSE_CLASS;
3202 classes[1] = X86_64_SSEUP_CLASS;
3203 return 2;
3204 case SCmode:
3205 classes[0] = X86_64_SSE_CLASS;
3206 return 1;
3207 case DCmode:
3208 classes[0] = X86_64_SSEDF_CLASS;
3209 classes[1] = X86_64_SSEDF_CLASS;
3210 return 2;
3211 case XCmode:
3212 classes[0] = X86_64_COMPLEX_X87_CLASS;
3213 return 1;
3214 case TCmode:
3215 /* This modes is larger than 16 bytes. */
3216 return 0;
3217 case V4SFmode:
3218 case V4SImode:
3219 case V16QImode:
3220 case V8HImode:
3221 case V2DFmode:
3222 case V2DImode:
3223 classes[0] = X86_64_SSE_CLASS;
3224 classes[1] = X86_64_SSEUP_CLASS;
3225 return 2;
3226 case V2SFmode:
3227 case V2SImode:
3228 case V4HImode:
3229 case V8QImode:
3230 classes[0] = X86_64_SSE_CLASS;
3231 return 1;
3232 case BLKmode:
3233 case VOIDmode:
3234 return 0;
3235 default:
3236 gcc_assert (VECTOR_MODE_P (mode));
3237
3238 if (bytes > 16)
3239 return 0;
3240
3241 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3242
3243 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3244 classes[0] = X86_64_INTEGERSI_CLASS;
3245 else
3246 classes[0] = X86_64_INTEGER_CLASS;
3247 classes[1] = X86_64_INTEGER_CLASS;
3248 return 1 + (bytes > 8);
3249 }
3250 }
3251
3252 /* Examine the argument and return set number of register required in each
3253 class. Return 0 iff parameter should be passed in memory. */
3254 static int
3255 examine_argument (enum machine_mode mode, tree type, int in_return,
3256 int *int_nregs, int *sse_nregs)
3257 {
3258 enum x86_64_reg_class class[MAX_CLASSES];
3259 int n = classify_argument (mode, type, class, 0);
3260
3261 *int_nregs = 0;
3262 *sse_nregs = 0;
3263 if (!n)
3264 return 0;
3265 for (n--; n >= 0; n--)
3266 switch (class[n])
3267 {
3268 case X86_64_INTEGER_CLASS:
3269 case X86_64_INTEGERSI_CLASS:
3270 (*int_nregs)++;
3271 break;
3272 case X86_64_SSE_CLASS:
3273 case X86_64_SSESF_CLASS:
3274 case X86_64_SSEDF_CLASS:
3275 (*sse_nregs)++;
3276 break;
3277 case X86_64_NO_CLASS:
3278 case X86_64_SSEUP_CLASS:
3279 break;
3280 case X86_64_X87_CLASS:
3281 case X86_64_X87UP_CLASS:
3282 if (!in_return)
3283 return 0;
3284 break;
3285 case X86_64_COMPLEX_X87_CLASS:
3286 return in_return ? 2 : 0;
3287 case X86_64_MEMORY_CLASS:
3288 gcc_unreachable ();
3289 }
3290 return 1;
3291 }
3292
3293 /* Construct container for the argument used by GCC interface. See
3294 FUNCTION_ARG for the detailed description. */
3295
3296 static rtx
3297 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3298 tree type, int in_return, int nintregs, int nsseregs,
3299 const int *intreg, int sse_regno)
3300 {
3301 /* The following variables hold the static issued_error state. */
3302 static bool issued_sse_arg_error;
3303 static bool issued_sse_ret_error;
3304 static bool issued_x87_ret_error;
3305
3306 enum machine_mode tmpmode;
3307 int bytes =
3308 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3309 enum x86_64_reg_class class[MAX_CLASSES];
3310 int n;
3311 int i;
3312 int nexps = 0;
3313 int needed_sseregs, needed_intregs;
3314 rtx exp[MAX_CLASSES];
3315 rtx ret;
3316
3317 n = classify_argument (mode, type, class, 0);
3318 if (TARGET_DEBUG_ARG)
3319 {
3320 if (!n)
3321 fprintf (stderr, "Memory class\n");
3322 else
3323 {
3324 fprintf (stderr, "Classes:");
3325 for (i = 0; i < n; i++)
3326 {
3327 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3328 }
3329 fprintf (stderr, "\n");
3330 }
3331 }
3332 if (!n)
3333 return NULL;
3334 if (!examine_argument (mode, type, in_return, &needed_intregs,
3335 &needed_sseregs))
3336 return NULL;
3337 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3338 return NULL;
3339
3340 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3341 some less clueful developer tries to use floating-point anyway. */
3342 if (needed_sseregs && !TARGET_SSE)
3343 {
3344 if (in_return)
3345 {
3346 if (!issued_sse_ret_error)
3347 {
3348 error ("SSE register return with SSE disabled");
3349 issued_sse_ret_error = true;
3350 }
3351 }
3352 else if (!issued_sse_arg_error)
3353 {
3354 error ("SSE register argument with SSE disabled");
3355 issued_sse_arg_error = true;
3356 }
3357 return NULL;
3358 }
3359
3360 /* Likewise, error if the ABI requires us to return values in the
3361 x87 registers and the user specified -mno-80387. */
3362 if (!TARGET_80387 && in_return)
3363 for (i = 0; i < n; i++)
3364 if (class[i] == X86_64_X87_CLASS
3365 || class[i] == X86_64_X87UP_CLASS
3366 || class[i] == X86_64_COMPLEX_X87_CLASS)
3367 {
3368 if (!issued_x87_ret_error)
3369 {
3370 error ("x87 register return with x87 disabled");
3371 issued_x87_ret_error = true;
3372 }
3373 return NULL;
3374 }
3375
3376 /* First construct simple cases. Avoid SCmode, since we want to use
3377 single register to pass this type. */
3378 if (n == 1 && mode != SCmode)
3379 switch (class[0])
3380 {
3381 case X86_64_INTEGER_CLASS:
3382 case X86_64_INTEGERSI_CLASS:
3383 return gen_rtx_REG (mode, intreg[0]);
3384 case X86_64_SSE_CLASS:
3385 case X86_64_SSESF_CLASS:
3386 case X86_64_SSEDF_CLASS:
3387 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3388 case X86_64_X87_CLASS:
3389 case X86_64_COMPLEX_X87_CLASS:
3390 return gen_rtx_REG (mode, FIRST_STACK_REG);
3391 case X86_64_NO_CLASS:
3392 /* Zero sized array, struct or class. */
3393 return NULL;
3394 default:
3395 gcc_unreachable ();
3396 }
3397 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3398 && mode != BLKmode)
3399 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3400 if (n == 2
3401 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3402 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3403 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3404 && class[1] == X86_64_INTEGER_CLASS
3405 && (mode == CDImode || mode == TImode || mode == TFmode)
3406 && intreg[0] + 1 == intreg[1])
3407 return gen_rtx_REG (mode, intreg[0]);
3408
3409 /* Otherwise figure out the entries of the PARALLEL. */
3410 for (i = 0; i < n; i++)
3411 {
3412 switch (class[i])
3413 {
3414 case X86_64_NO_CLASS:
3415 break;
3416 case X86_64_INTEGER_CLASS:
3417 case X86_64_INTEGERSI_CLASS:
3418 /* Merge TImodes on aligned occasions here too. */
3419 if (i * 8 + 8 > bytes)
3420 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3421 else if (class[i] == X86_64_INTEGERSI_CLASS)
3422 tmpmode = SImode;
3423 else
3424 tmpmode = DImode;
3425 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3426 if (tmpmode == BLKmode)
3427 tmpmode = DImode;
3428 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3429 gen_rtx_REG (tmpmode, *intreg),
3430 GEN_INT (i*8));
3431 intreg++;
3432 break;
3433 case X86_64_SSESF_CLASS:
3434 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3435 gen_rtx_REG (SFmode,
3436 SSE_REGNO (sse_regno)),
3437 GEN_INT (i*8));
3438 sse_regno++;
3439 break;
3440 case X86_64_SSEDF_CLASS:
3441 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3442 gen_rtx_REG (DFmode,
3443 SSE_REGNO (sse_regno)),
3444 GEN_INT (i*8));
3445 sse_regno++;
3446 break;
3447 case X86_64_SSE_CLASS:
3448 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3449 tmpmode = TImode;
3450 else
3451 tmpmode = DImode;
3452 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3453 gen_rtx_REG (tmpmode,
3454 SSE_REGNO (sse_regno)),
3455 GEN_INT (i*8));
3456 if (tmpmode == TImode)
3457 i++;
3458 sse_regno++;
3459 break;
3460 default:
3461 gcc_unreachable ();
3462 }
3463 }
3464
3465 /* Empty aligned struct, union or class. */
3466 if (nexps == 0)
3467 return NULL;
3468
3469 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3470 for (i = 0; i < nexps; i++)
3471 XVECEXP (ret, 0, i) = exp [i];
3472 return ret;
3473 }
3474
3475 /* Update the data in CUM to advance over an argument
3476 of mode MODE and data type TYPE.
3477 (TYPE is null for libcalls where that information may not be available.) */
3478
3479 void
3480 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3481 tree type, int named)
3482 {
3483 int bytes =
3484 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3485 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3486
3487 if (type)
3488 mode = type_natural_mode (type);
3489
3490 if (TARGET_DEBUG_ARG)
3491 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3492 "mode=%s, named=%d)\n\n",
3493 words, cum->words, cum->nregs, cum->sse_nregs,
3494 GET_MODE_NAME (mode), named);
3495
3496 if (TARGET_64BIT)
3497 {
3498 int int_nregs, sse_nregs;
3499 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3500 cum->words += words;
3501 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3502 {
3503 cum->nregs -= int_nregs;
3504 cum->sse_nregs -= sse_nregs;
3505 cum->regno += int_nregs;
3506 cum->sse_regno += sse_nregs;
3507 }
3508 else
3509 cum->words += words;
3510 }
3511 else
3512 {
3513 switch (mode)
3514 {
3515 default:
3516 break;
3517
3518 case BLKmode:
3519 if (bytes < 0)
3520 break;
3521 /* FALLTHRU */
3522
3523 case DImode:
3524 case SImode:
3525 case HImode:
3526 case QImode:
3527 cum->words += words;
3528 cum->nregs -= words;
3529 cum->regno += words;
3530
3531 if (cum->nregs <= 0)
3532 {
3533 cum->nregs = 0;
3534 cum->regno = 0;
3535 }
3536 break;
3537
3538 case DFmode:
3539 if (cum->float_in_sse < 2)
3540 break;
3541 case SFmode:
3542 if (cum->float_in_sse < 1)
3543 break;
3544 /* FALLTHRU */
3545
3546 case TImode:
3547 case V16QImode:
3548 case V8HImode:
3549 case V4SImode:
3550 case V2DImode:
3551 case V4SFmode:
3552 case V2DFmode:
3553 if (!type || !AGGREGATE_TYPE_P (type))
3554 {
3555 cum->sse_words += words;
3556 cum->sse_nregs -= 1;
3557 cum->sse_regno += 1;
3558 if (cum->sse_nregs <= 0)
3559 {
3560 cum->sse_nregs = 0;
3561 cum->sse_regno = 0;
3562 }
3563 }
3564 break;
3565
3566 case V8QImode:
3567 case V4HImode:
3568 case V2SImode:
3569 case V2SFmode:
3570 if (!type || !AGGREGATE_TYPE_P (type))
3571 {
3572 cum->mmx_words += words;
3573 cum->mmx_nregs -= 1;
3574 cum->mmx_regno += 1;
3575 if (cum->mmx_nregs <= 0)
3576 {
3577 cum->mmx_nregs = 0;
3578 cum->mmx_regno = 0;
3579 }
3580 }
3581 break;
3582 }
3583 }
3584 }
3585
3586 /* Define where to put the arguments to a function.
3587 Value is zero to push the argument on the stack,
3588 or a hard register in which to store the argument.
3589
3590 MODE is the argument's machine mode.
3591 TYPE is the data type of the argument (as a tree).
3592 This is null for libcalls where that information may
3593 not be available.
3594 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3595 the preceding args and about the function being called.
3596 NAMED is nonzero if this argument is a named parameter
3597 (otherwise it is an extra parameter matching an ellipsis). */
3598
3599 rtx
3600 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3601 tree type, int named)
3602 {
3603 enum machine_mode mode = orig_mode;
3604 rtx ret = NULL_RTX;
3605 int bytes =
3606 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3607 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3608 static bool warnedsse, warnedmmx;
3609
3610 /* To simplify the code below, represent vector types with a vector mode
3611 even if MMX/SSE are not active. */
3612 if (type && TREE_CODE (type) == VECTOR_TYPE)
3613 mode = type_natural_mode (type);
3614
3615 /* Handle a hidden AL argument containing number of registers for varargs
3616 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
3617 any AL settings. */
3618 if (mode == VOIDmode)
3619 {
3620 if (TARGET_64BIT)
3621 return GEN_INT (cum->maybe_vaarg
3622 ? (cum->sse_nregs < 0
3623 ? SSE_REGPARM_MAX
3624 : cum->sse_regno)
3625 : -1);
3626 else
3627 return constm1_rtx;
3628 }
3629 if (TARGET_64BIT)
3630 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
3631 cum->sse_nregs,
3632 &x86_64_int_parameter_registers [cum->regno],
3633 cum->sse_regno);
3634 else
3635 switch (mode)
3636 {
3637 /* For now, pass fp/complex values on the stack. */
3638 default:
3639 break;
3640
3641 case BLKmode:
3642 if (bytes < 0)
3643 break;
3644 /* FALLTHRU */
3645 case DImode:
3646 case SImode:
3647 case HImode:
3648 case QImode:
3649 if (words <= cum->nregs)
3650 {
3651 int regno = cum->regno;
3652
3653 /* Fastcall allocates the first two DWORD (SImode) or
3654 smaller arguments to ECX and EDX. */
3655 if (cum->fastcall)
3656 {
3657 if (mode == BLKmode || mode == DImode)
3658 break;
3659
3660 /* ECX not EAX is the first allocated register. */
3661 if (regno == 0)
3662 regno = 2;
3663 }
3664 ret = gen_rtx_REG (mode, regno);
3665 }
3666 break;
3667 case DFmode:
3668 if (cum->float_in_sse < 2)
3669 break;
3670 case SFmode:
3671 if (cum->float_in_sse < 1)
3672 break;
3673 /* FALLTHRU */
3674 case TImode:
3675 case V16QImode:
3676 case V8HImode:
3677 case V4SImode:
3678 case V2DImode:
3679 case V4SFmode:
3680 case V2DFmode:
3681 if (!type || !AGGREGATE_TYPE_P (type))
3682 {
3683 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3684 {
3685 warnedsse = true;
3686 warning (0, "SSE vector argument without SSE enabled "
3687 "changes the ABI");
3688 }
3689 if (cum->sse_nregs)
3690 ret = gen_reg_or_parallel (mode, orig_mode,
3691 cum->sse_regno + FIRST_SSE_REG);
3692 }
3693 break;
3694 case V8QImode:
3695 case V4HImode:
3696 case V2SImode:
3697 case V2SFmode:
3698 if (!type || !AGGREGATE_TYPE_P (type))
3699 {
3700 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3701 {
3702 warnedmmx = true;
3703 warning (0, "MMX vector argument without MMX enabled "
3704 "changes the ABI");
3705 }
3706 if (cum->mmx_nregs)
3707 ret = gen_reg_or_parallel (mode, orig_mode,
3708 cum->mmx_regno + FIRST_MMX_REG);
3709 }
3710 break;
3711 }
3712
3713 if (TARGET_DEBUG_ARG)
3714 {
3715 fprintf (stderr,
3716 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3717 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
3718
3719 if (ret)
3720 print_simple_rtl (stderr, ret);
3721 else
3722 fprintf (stderr, ", stack");
3723
3724 fprintf (stderr, " )\n");
3725 }
3726
3727 return ret;
3728 }
3729
3730 /* A C expression that indicates when an argument must be passed by
3731 reference. If nonzero for an argument, a copy of that argument is
3732 made in memory and a pointer to the argument is passed instead of
3733 the argument itself. The pointer is passed in whatever way is
3734 appropriate for passing a pointer to that type. */
3735
3736 static bool
3737 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3738 enum machine_mode mode ATTRIBUTE_UNUSED,
3739 tree type, bool named ATTRIBUTE_UNUSED)
3740 {
3741 if (!TARGET_64BIT)
3742 return 0;
3743
3744 if (type && int_size_in_bytes (type) == -1)
3745 {
3746 if (TARGET_DEBUG_ARG)
3747 fprintf (stderr, "function_arg_pass_by_reference\n");
3748 return 1;
3749 }
3750
3751 return 0;
3752 }
3753
3754 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
3755 ABI. Only called if TARGET_SSE. */
3756 static bool
3757 contains_128bit_aligned_vector_p (tree type)
3758 {
3759 enum machine_mode mode = TYPE_MODE (type);
3760 if (SSE_REG_MODE_P (mode)
3761 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3762 return true;
3763 if (TYPE_ALIGN (type) < 128)
3764 return false;
3765
3766 if (AGGREGATE_TYPE_P (type))
3767 {
3768 /* Walk the aggregates recursively. */
3769 switch (TREE_CODE (type))
3770 {
3771 case RECORD_TYPE:
3772 case UNION_TYPE:
3773 case QUAL_UNION_TYPE:
3774 {
3775 tree field;
3776
3777 /* Walk all the structure fields. */
3778 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3779 {
3780 if (TREE_CODE (field) == FIELD_DECL
3781 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
3782 return true;
3783 }
3784 break;
3785 }
3786
3787 case ARRAY_TYPE:
3788 /* Just for use if some languages passes arrays by value. */
3789 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
3790 return true;
3791 break;
3792
3793 default:
3794 gcc_unreachable ();
3795 }
3796 }
3797 return false;
3798 }
3799
3800 /* Gives the alignment boundary, in bits, of an argument with the
3801 specified mode and type. */
3802
3803 int
3804 ix86_function_arg_boundary (enum machine_mode mode, tree type)
3805 {
3806 int align;
3807 if (type)
3808 align = TYPE_ALIGN (type);
3809 else
3810 align = GET_MODE_ALIGNMENT (mode);
3811 if (align < PARM_BOUNDARY)
3812 align = PARM_BOUNDARY;
3813 if (!TARGET_64BIT)
3814 {
3815 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3816 make an exception for SSE modes since these require 128bit
3817 alignment.
3818
3819 The handling here differs from field_alignment. ICC aligns MMX
3820 arguments to 4 byte boundaries, while structure fields are aligned
3821 to 8 byte boundaries. */
3822 if (!TARGET_SSE)
3823 align = PARM_BOUNDARY;
3824 else if (!type)
3825 {
3826 if (!SSE_REG_MODE_P (mode))
3827 align = PARM_BOUNDARY;
3828 }
3829 else
3830 {
3831 if (!contains_128bit_aligned_vector_p (type))
3832 align = PARM_BOUNDARY;
3833 }
3834 }
3835 if (align > 128)
3836 align = 128;
3837 return align;
3838 }
3839
3840 /* Return true if N is a possible register number of function value. */
3841 bool
3842 ix86_function_value_regno_p (int regno)
3843 {
3844 if (regno == 0
3845 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3846 || (regno == FIRST_SSE_REG && TARGET_SSE))
3847 return true;
3848
3849 if (!TARGET_64BIT
3850 && (regno == FIRST_MMX_REG && TARGET_MMX))
3851 return true;
3852
3853 return false;
3854 }
3855
3856 /* Define how to find the value returned by a function.
3857 VALTYPE is the data type of the value (as a tree).
3858 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3859 otherwise, FUNC is 0. */
3860 rtx
3861 ix86_function_value (tree valtype, tree fntype_or_decl,
3862 bool outgoing ATTRIBUTE_UNUSED)
3863 {
3864 enum machine_mode natmode = type_natural_mode (valtype);
3865
3866 if (TARGET_64BIT)
3867 {
3868 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
3869 1, REGPARM_MAX, SSE_REGPARM_MAX,
3870 x86_64_int_return_registers, 0);
3871 /* For zero sized structures, construct_container return NULL, but we
3872 need to keep rest of compiler happy by returning meaningful value. */
3873 if (!ret)
3874 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
3875 return ret;
3876 }
3877 else
3878 {
3879 tree fn = NULL_TREE, fntype;
3880 if (fntype_or_decl
3881 && DECL_P (fntype_or_decl))
3882 fn = fntype_or_decl;
3883 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
3884 return gen_rtx_REG (TYPE_MODE (valtype),
3885 ix86_value_regno (natmode, fn, fntype));
3886 }
3887 }
3888
3889 /* Return true iff type is returned in memory. */
3890 int
3891 ix86_return_in_memory (tree type)
3892 {
3893 int needed_intregs, needed_sseregs, size;
3894 enum machine_mode mode = type_natural_mode (type);
3895
3896 if (TARGET_64BIT)
3897 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
3898
3899 if (mode == BLKmode)
3900 return 1;
3901
3902 size = int_size_in_bytes (type);
3903
3904 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3905 return 0;
3906
3907 if (VECTOR_MODE_P (mode) || mode == TImode)
3908 {
3909 /* User-created vectors small enough to fit in EAX. */
3910 if (size < 8)
3911 return 0;
3912
3913 /* MMX/3dNow values are returned in MM0,
3914 except when it doesn't exits. */
3915 if (size == 8)
3916 return (TARGET_MMX ? 0 : 1);
3917
3918 /* SSE values are returned in XMM0, except when it doesn't exist. */
3919 if (size == 16)
3920 return (TARGET_SSE ? 0 : 1);
3921 }
3922
3923 if (mode == XFmode)
3924 return 0;
3925
3926 if (mode == TDmode)
3927 return 1;
3928
3929 if (size > 12)
3930 return 1;
3931 return 0;
3932 }
3933
3934 /* When returning SSE vector types, we have a choice of either
3935 (1) being abi incompatible with a -march switch, or
3936 (2) generating an error.
3937 Given no good solution, I think the safest thing is one warning.
3938 The user won't be able to use -Werror, but....
3939
3940 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3941 called in response to actually generating a caller or callee that
3942 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3943 via aggregate_value_p for general type probing from tree-ssa. */
3944
3945 static rtx
3946 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3947 {
3948 static bool warnedsse, warnedmmx;
3949
3950 if (type)
3951 {
3952 /* Look at the return type of the function, not the function type. */
3953 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3954
3955 if (!TARGET_SSE && !warnedsse)
3956 {
3957 if (mode == TImode
3958 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3959 {
3960 warnedsse = true;
3961 warning (0, "SSE vector return without SSE enabled "
3962 "changes the ABI");
3963 }
3964 }
3965
3966 if (!TARGET_MMX && !warnedmmx)
3967 {
3968 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
3969 {
3970 warnedmmx = true;
3971 warning (0, "MMX vector return without MMX enabled "
3972 "changes the ABI");
3973 }
3974 }
3975 }
3976
3977 return NULL;
3978 }
3979
3980 /* Define how to find the value returned by a library function
3981 assuming the value has mode MODE. */
3982 rtx
3983 ix86_libcall_value (enum machine_mode mode)
3984 {
3985 if (TARGET_64BIT)
3986 {
3987 switch (mode)
3988 {
3989 case SFmode:
3990 case SCmode:
3991 case DFmode:
3992 case DCmode:
3993 case TFmode:
3994 case SDmode:
3995 case DDmode:
3996 case TDmode:
3997 return gen_rtx_REG (mode, FIRST_SSE_REG);
3998 case XFmode:
3999 case XCmode:
4000 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4001 case TCmode:
4002 return NULL;
4003 default:
4004 return gen_rtx_REG (mode, 0);
4005 }
4006 }
4007 else
4008 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
4009 }
4010
4011 /* Given a mode, return the register to use for a return value. */
4012
4013 static int
4014 ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
4015 {
4016 gcc_assert (!TARGET_64BIT);
4017
4018 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4019 we normally prevent this case when mmx is not available. However
4020 some ABIs may require the result to be returned like DImode. */
4021 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4022 return TARGET_MMX ? FIRST_MMX_REG : 0;
4023
4024 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4025 we prevent this case when sse is not available. However some ABIs
4026 may require the result to be returned like integer TImode. */
4027 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4028 return TARGET_SSE ? FIRST_SSE_REG : 0;
4029
4030 /* Decimal floating point values can go in %eax, unlike other float modes. */
4031 if (DECIMAL_FLOAT_MODE_P (mode))
4032 return 0;
4033
4034 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4035 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4036 return 0;
4037
4038 /* Floating point return values in %st(0), except for local functions when
4039 SSE math is enabled or for functions with sseregparm attribute. */
4040 if ((func || fntype)
4041 && (mode == SFmode || mode == DFmode))
4042 {
4043 int sse_level = ix86_function_sseregparm (fntype, func);
4044 if ((sse_level >= 1 && mode == SFmode)
4045 || (sse_level == 2 && mode == DFmode))
4046 return FIRST_SSE_REG;
4047 }
4048
4049 return FIRST_FLOAT_REG;
4050 }
4051 \f
4052 /* Create the va_list data type. */
4053
4054 static tree
4055 ix86_build_builtin_va_list (void)
4056 {
4057 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4058
4059 /* For i386 we use plain pointer to argument area. */
4060 if (!TARGET_64BIT)
4061 return build_pointer_type (char_type_node);
4062
4063 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4064 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4065
4066 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4067 unsigned_type_node);
4068 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4069 unsigned_type_node);
4070 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4071 ptr_type_node);
4072 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4073 ptr_type_node);
4074
4075 va_list_gpr_counter_field = f_gpr;
4076 va_list_fpr_counter_field = f_fpr;
4077
4078 DECL_FIELD_CONTEXT (f_gpr) = record;
4079 DECL_FIELD_CONTEXT (f_fpr) = record;
4080 DECL_FIELD_CONTEXT (f_ovf) = record;
4081 DECL_FIELD_CONTEXT (f_sav) = record;
4082
4083 TREE_CHAIN (record) = type_decl;
4084 TYPE_NAME (record) = type_decl;
4085 TYPE_FIELDS (record) = f_gpr;
4086 TREE_CHAIN (f_gpr) = f_fpr;
4087 TREE_CHAIN (f_fpr) = f_ovf;
4088 TREE_CHAIN (f_ovf) = f_sav;
4089
4090 layout_type (record);
4091
4092 /* The correct type is an array type of one element. */
4093 return build_array_type (record, build_index_type (size_zero_node));
4094 }
4095
4096 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4097
4098 static void
4099 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4100 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4101 int no_rtl)
4102 {
4103 CUMULATIVE_ARGS next_cum;
4104 rtx save_area = NULL_RTX, mem;
4105 rtx label;
4106 rtx label_ref;
4107 rtx tmp_reg;
4108 rtx nsse_reg;
4109 int set;
4110 tree fntype;
4111 int stdarg_p;
4112 int i;
4113
4114 if (!TARGET_64BIT)
4115 return;
4116
4117 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4118 return;
4119
4120 /* Indicate to allocate space on the stack for varargs save area. */
4121 ix86_save_varrargs_registers = 1;
4122
4123 cfun->stack_alignment_needed = 128;
4124
4125 fntype = TREE_TYPE (current_function_decl);
4126 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4127 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4128 != void_type_node));
4129
4130 /* For varargs, we do not want to skip the dummy va_dcl argument.
4131 For stdargs, we do want to skip the last named argument. */
4132 next_cum = *cum;
4133 if (stdarg_p)
4134 function_arg_advance (&next_cum, mode, type, 1);
4135
4136 if (!no_rtl)
4137 save_area = frame_pointer_rtx;
4138
4139 set = get_varargs_alias_set ();
4140
4141 for (i = next_cum.regno;
4142 i < ix86_regparm
4143 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4144 i++)
4145 {
4146 mem = gen_rtx_MEM (Pmode,
4147 plus_constant (save_area, i * UNITS_PER_WORD));
4148 MEM_NOTRAP_P (mem) = 1;
4149 set_mem_alias_set (mem, set);
4150 emit_move_insn (mem, gen_rtx_REG (Pmode,
4151 x86_64_int_parameter_registers[i]));
4152 }
4153
4154 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4155 {
4156 /* Now emit code to save SSE registers. The AX parameter contains number
4157 of SSE parameter registers used to call this function. We use
4158 sse_prologue_save insn template that produces computed jump across
4159 SSE saves. We need some preparation work to get this working. */
4160
4161 label = gen_label_rtx ();
4162 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4163
4164 /* Compute address to jump to :
4165 label - 5*eax + nnamed_sse_arguments*5 */
4166 tmp_reg = gen_reg_rtx (Pmode);
4167 nsse_reg = gen_reg_rtx (Pmode);
4168 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4169 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4170 gen_rtx_MULT (Pmode, nsse_reg,
4171 GEN_INT (4))));
4172 if (next_cum.sse_regno)
4173 emit_move_insn
4174 (nsse_reg,
4175 gen_rtx_CONST (DImode,
4176 gen_rtx_PLUS (DImode,
4177 label_ref,
4178 GEN_INT (next_cum.sse_regno * 4))));
4179 else
4180 emit_move_insn (nsse_reg, label_ref);
4181 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4182
4183 /* Compute address of memory block we save into. We always use pointer
4184 pointing 127 bytes after first byte to store - this is needed to keep
4185 instruction size limited by 4 bytes. */
4186 tmp_reg = gen_reg_rtx (Pmode);
4187 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4188 plus_constant (save_area,
4189 8 * REGPARM_MAX + 127)));
4190 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4191 MEM_NOTRAP_P (mem) = 1;
4192 set_mem_alias_set (mem, set);
4193 set_mem_align (mem, BITS_PER_WORD);
4194
4195 /* And finally do the dirty job! */
4196 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4197 GEN_INT (next_cum.sse_regno), label));
4198 }
4199
4200 }
4201
4202 /* Implement va_start. */
4203
4204 void
4205 ix86_va_start (tree valist, rtx nextarg)
4206 {
4207 HOST_WIDE_INT words, n_gpr, n_fpr;
4208 tree f_gpr, f_fpr, f_ovf, f_sav;
4209 tree gpr, fpr, ovf, sav, t;
4210 tree type;
4211
4212 /* Only 64bit target needs something special. */
4213 if (!TARGET_64BIT)
4214 {
4215 std_expand_builtin_va_start (valist, nextarg);
4216 return;
4217 }
4218
4219 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4220 f_fpr = TREE_CHAIN (f_gpr);
4221 f_ovf = TREE_CHAIN (f_fpr);
4222 f_sav = TREE_CHAIN (f_ovf);
4223
4224 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4225 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4226 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4227 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4228 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4229
4230 /* Count number of gp and fp argument registers used. */
4231 words = current_function_args_info.words;
4232 n_gpr = current_function_args_info.regno;
4233 n_fpr = current_function_args_info.sse_regno;
4234
4235 if (TARGET_DEBUG_ARG)
4236 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4237 (int) words, (int) n_gpr, (int) n_fpr);
4238
4239 if (cfun->va_list_gpr_size)
4240 {
4241 type = TREE_TYPE (gpr);
4242 t = build2 (MODIFY_EXPR, type, gpr,
4243 build_int_cst (type, n_gpr * 8));
4244 TREE_SIDE_EFFECTS (t) = 1;
4245 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4246 }
4247
4248 if (cfun->va_list_fpr_size)
4249 {
4250 type = TREE_TYPE (fpr);
4251 t = build2 (MODIFY_EXPR, type, fpr,
4252 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4253 TREE_SIDE_EFFECTS (t) = 1;
4254 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4255 }
4256
4257 /* Find the overflow area. */
4258 type = TREE_TYPE (ovf);
4259 t = make_tree (type, virtual_incoming_args_rtx);
4260 if (words != 0)
4261 t = build2 (PLUS_EXPR, type, t,
4262 build_int_cst (type, words * UNITS_PER_WORD));
4263 t = build2 (MODIFY_EXPR, type, ovf, t);
4264 TREE_SIDE_EFFECTS (t) = 1;
4265 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4266
4267 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4268 {
4269 /* Find the register save area.
4270 Prologue of the function save it right above stack frame. */
4271 type = TREE_TYPE (sav);
4272 t = make_tree (type, frame_pointer_rtx);
4273 t = build2 (MODIFY_EXPR, type, sav, t);
4274 TREE_SIDE_EFFECTS (t) = 1;
4275 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4276 }
4277 }
4278
4279 /* Implement va_arg. */
4280
4281 tree
4282 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4283 {
4284 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4285 tree f_gpr, f_fpr, f_ovf, f_sav;
4286 tree gpr, fpr, ovf, sav, t;
4287 int size, rsize;
4288 tree lab_false, lab_over = NULL_TREE;
4289 tree addr, t2;
4290 rtx container;
4291 int indirect_p = 0;
4292 tree ptrtype;
4293 enum machine_mode nat_mode;
4294
4295 /* Only 64bit target needs something special. */
4296 if (!TARGET_64BIT)
4297 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4298
4299 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4300 f_fpr = TREE_CHAIN (f_gpr);
4301 f_ovf = TREE_CHAIN (f_fpr);
4302 f_sav = TREE_CHAIN (f_ovf);
4303
4304 valist = build_va_arg_indirect_ref (valist);
4305 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4306 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4307 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4308 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4309
4310 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4311 if (indirect_p)
4312 type = build_pointer_type (type);
4313 size = int_size_in_bytes (type);
4314 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4315
4316 nat_mode = type_natural_mode (type);
4317 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4318 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4319
4320 /* Pull the value out of the saved registers. */
4321
4322 addr = create_tmp_var (ptr_type_node, "addr");
4323 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4324
4325 if (container)
4326 {
4327 int needed_intregs, needed_sseregs;
4328 bool need_temp;
4329 tree int_addr, sse_addr;
4330
4331 lab_false = create_artificial_label ();
4332 lab_over = create_artificial_label ();
4333
4334 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4335
4336 need_temp = (!REG_P (container)
4337 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4338 || TYPE_ALIGN (type) > 128));
4339
4340 /* In case we are passing structure, verify that it is consecutive block
4341 on the register save area. If not we need to do moves. */
4342 if (!need_temp && !REG_P (container))
4343 {
4344 /* Verify that all registers are strictly consecutive */
4345 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4346 {
4347 int i;
4348
4349 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4350 {
4351 rtx slot = XVECEXP (container, 0, i);
4352 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4353 || INTVAL (XEXP (slot, 1)) != i * 16)
4354 need_temp = 1;
4355 }
4356 }
4357 else
4358 {
4359 int i;
4360
4361 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4362 {
4363 rtx slot = XVECEXP (container, 0, i);
4364 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4365 || INTVAL (XEXP (slot, 1)) != i * 8)
4366 need_temp = 1;
4367 }
4368 }
4369 }
4370 if (!need_temp)
4371 {
4372 int_addr = addr;
4373 sse_addr = addr;
4374 }
4375 else
4376 {
4377 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4378 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4379 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4380 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4381 }
4382
4383 /* First ensure that we fit completely in registers. */
4384 if (needed_intregs)
4385 {
4386 t = build_int_cst (TREE_TYPE (gpr),
4387 (REGPARM_MAX - needed_intregs + 1) * 8);
4388 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4389 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4390 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4391 gimplify_and_add (t, pre_p);
4392 }
4393 if (needed_sseregs)
4394 {
4395 t = build_int_cst (TREE_TYPE (fpr),
4396 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4397 + REGPARM_MAX * 8);
4398 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4399 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4400 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4401 gimplify_and_add (t, pre_p);
4402 }
4403
4404 /* Compute index to start of area used for integer regs. */
4405 if (needed_intregs)
4406 {
4407 /* int_addr = gpr + sav; */
4408 t = fold_convert (ptr_type_node, gpr);
4409 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4410 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
4411 gimplify_and_add (t, pre_p);
4412 }
4413 if (needed_sseregs)
4414 {
4415 /* sse_addr = fpr + sav; */
4416 t = fold_convert (ptr_type_node, fpr);
4417 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4418 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
4419 gimplify_and_add (t, pre_p);
4420 }
4421 if (need_temp)
4422 {
4423 int i;
4424 tree temp = create_tmp_var (type, "va_arg_tmp");
4425
4426 /* addr = &temp; */
4427 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4428 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
4429 gimplify_and_add (t, pre_p);
4430
4431 for (i = 0; i < XVECLEN (container, 0); i++)
4432 {
4433 rtx slot = XVECEXP (container, 0, i);
4434 rtx reg = XEXP (slot, 0);
4435 enum machine_mode mode = GET_MODE (reg);
4436 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4437 tree addr_type = build_pointer_type (piece_type);
4438 tree src_addr, src;
4439 int src_offset;
4440 tree dest_addr, dest;
4441
4442 if (SSE_REGNO_P (REGNO (reg)))
4443 {
4444 src_addr = sse_addr;
4445 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4446 }
4447 else
4448 {
4449 src_addr = int_addr;
4450 src_offset = REGNO (reg) * 8;
4451 }
4452 src_addr = fold_convert (addr_type, src_addr);
4453 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4454 size_int (src_offset)));
4455 src = build_va_arg_indirect_ref (src_addr);
4456
4457 dest_addr = fold_convert (addr_type, addr);
4458 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4459 size_int (INTVAL (XEXP (slot, 1)))));
4460 dest = build_va_arg_indirect_ref (dest_addr);
4461
4462 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
4463 gimplify_and_add (t, pre_p);
4464 }
4465 }
4466
4467 if (needed_intregs)
4468 {
4469 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4470 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4471 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
4472 gimplify_and_add (t, pre_p);
4473 }
4474 if (needed_sseregs)
4475 {
4476 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4477 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4478 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
4479 gimplify_and_add (t, pre_p);
4480 }
4481
4482 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4483 gimplify_and_add (t, pre_p);
4484
4485 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4486 append_to_statement_list (t, pre_p);
4487 }
4488
4489 /* ... otherwise out of the overflow area. */
4490
4491 /* Care for on-stack alignment if needed. */
4492 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4493 || integer_zerop (TYPE_SIZE (type)))
4494 t = ovf;
4495 else
4496 {
4497 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4498 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4499 build_int_cst (TREE_TYPE (ovf), align - 1));
4500 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4501 build_int_cst (TREE_TYPE (t), -align));
4502 }
4503 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4504
4505 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
4506 gimplify_and_add (t2, pre_p);
4507
4508 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4509 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4510 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4511 gimplify_and_add (t, pre_p);
4512
4513 if (container)
4514 {
4515 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4516 append_to_statement_list (t, pre_p);
4517 }
4518
4519 ptrtype = build_pointer_type (type);
4520 addr = fold_convert (ptrtype, addr);
4521
4522 if (indirect_p)
4523 addr = build_va_arg_indirect_ref (addr);
4524 return build_va_arg_indirect_ref (addr);
4525 }
4526 \f
4527 /* Return nonzero if OPNUM's MEM should be matched
4528 in movabs* patterns. */
4529
4530 int
4531 ix86_check_movabs (rtx insn, int opnum)
4532 {
4533 rtx set, mem;
4534
4535 set = PATTERN (insn);
4536 if (GET_CODE (set) == PARALLEL)
4537 set = XVECEXP (set, 0, 0);
4538 gcc_assert (GET_CODE (set) == SET);
4539 mem = XEXP (set, opnum);
4540 while (GET_CODE (mem) == SUBREG)
4541 mem = SUBREG_REG (mem);
4542 gcc_assert (GET_CODE (mem) == MEM);
4543 return (volatile_ok || !MEM_VOLATILE_P (mem));
4544 }
4545 \f
4546 /* Initialize the table of extra 80387 mathematical constants. */
4547
4548 static void
4549 init_ext_80387_constants (void)
4550 {
4551 static const char * cst[5] =
4552 {
4553 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4554 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4555 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4556 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4557 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4558 };
4559 int i;
4560
4561 for (i = 0; i < 5; i++)
4562 {
4563 real_from_string (&ext_80387_constants_table[i], cst[i]);
4564 /* Ensure each constant is rounded to XFmode precision. */
4565 real_convert (&ext_80387_constants_table[i],
4566 XFmode, &ext_80387_constants_table[i]);
4567 }
4568
4569 ext_80387_constants_init = 1;
4570 }
4571
4572 /* Return true if the constant is something that can be loaded with
4573 a special instruction. */
4574
4575 int
4576 standard_80387_constant_p (rtx x)
4577 {
4578 REAL_VALUE_TYPE r;
4579
4580 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4581 return -1;
4582
4583 if (x == CONST0_RTX (GET_MODE (x)))
4584 return 1;
4585 if (x == CONST1_RTX (GET_MODE (x)))
4586 return 2;
4587
4588 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4589
4590 /* For XFmode constants, try to find a special 80387 instruction when
4591 optimizing for size or on those CPUs that benefit from them. */
4592 if (GET_MODE (x) == XFmode
4593 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4594 {
4595 int i;
4596
4597 if (! ext_80387_constants_init)
4598 init_ext_80387_constants ();
4599
4600 for (i = 0; i < 5; i++)
4601 if (real_identical (&r, &ext_80387_constants_table[i]))
4602 return i + 3;
4603 }
4604
4605 /* Load of the constant -0.0 or -1.0 will be split as
4606 fldz;fchs or fld1;fchs sequence. */
4607 if (real_isnegzero (&r))
4608 return 8;
4609 if (real_identical (&r, &dconstm1))
4610 return 9;
4611
4612 return 0;
4613 }
4614
4615 /* Return the opcode of the special instruction to be used to load
4616 the constant X. */
4617
4618 const char *
4619 standard_80387_constant_opcode (rtx x)
4620 {
4621 switch (standard_80387_constant_p (x))
4622 {
4623 case 1:
4624 return "fldz";
4625 case 2:
4626 return "fld1";
4627 case 3:
4628 return "fldlg2";
4629 case 4:
4630 return "fldln2";
4631 case 5:
4632 return "fldl2e";
4633 case 6:
4634 return "fldl2t";
4635 case 7:
4636 return "fldpi";
4637 case 8:
4638 case 9:
4639 return "#";
4640 default:
4641 gcc_unreachable ();
4642 }
4643 }
4644
4645 /* Return the CONST_DOUBLE representing the 80387 constant that is
4646 loaded by the specified special instruction. The argument IDX
4647 matches the return value from standard_80387_constant_p. */
4648
4649 rtx
4650 standard_80387_constant_rtx (int idx)
4651 {
4652 int i;
4653
4654 if (! ext_80387_constants_init)
4655 init_ext_80387_constants ();
4656
4657 switch (idx)
4658 {
4659 case 3:
4660 case 4:
4661 case 5:
4662 case 6:
4663 case 7:
4664 i = idx - 3;
4665 break;
4666
4667 default:
4668 gcc_unreachable ();
4669 }
4670
4671 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4672 XFmode);
4673 }
4674
4675 /* Return 1 if mode is a valid mode for sse. */
4676 static int
4677 standard_sse_mode_p (enum machine_mode mode)
4678 {
4679 switch (mode)
4680 {
4681 case V16QImode:
4682 case V8HImode:
4683 case V4SImode:
4684 case V2DImode:
4685 case V4SFmode:
4686 case V2DFmode:
4687 return 1;
4688
4689 default:
4690 return 0;
4691 }
4692 }
4693
4694 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4695 */
4696 int
4697 standard_sse_constant_p (rtx x)
4698 {
4699 enum machine_mode mode = GET_MODE (x);
4700
4701 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
4702 return 1;
4703 if (vector_all_ones_operand (x, mode)
4704 && standard_sse_mode_p (mode))
4705 return TARGET_SSE2 ? 2 : -1;
4706
4707 return 0;
4708 }
4709
4710 /* Return the opcode of the special instruction to be used to load
4711 the constant X. */
4712
4713 const char *
4714 standard_sse_constant_opcode (rtx insn, rtx x)
4715 {
4716 switch (standard_sse_constant_p (x))
4717 {
4718 case 1:
4719 if (get_attr_mode (insn) == MODE_V4SF)
4720 return "xorps\t%0, %0";
4721 else if (get_attr_mode (insn) == MODE_V2DF)
4722 return "xorpd\t%0, %0";
4723 else
4724 return "pxor\t%0, %0";
4725 case 2:
4726 return "pcmpeqd\t%0, %0";
4727 }
4728 gcc_unreachable ();
4729 }
4730
4731 /* Returns 1 if OP contains a symbol reference */
4732
4733 int
4734 symbolic_reference_mentioned_p (rtx op)
4735 {
4736 const char *fmt;
4737 int i;
4738
4739 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4740 return 1;
4741
4742 fmt = GET_RTX_FORMAT (GET_CODE (op));
4743 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4744 {
4745 if (fmt[i] == 'E')
4746 {
4747 int j;
4748
4749 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4750 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4751 return 1;
4752 }
4753
4754 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4755 return 1;
4756 }
4757
4758 return 0;
4759 }
4760
4761 /* Return 1 if it is appropriate to emit `ret' instructions in the
4762 body of a function. Do this only if the epilogue is simple, needing a
4763 couple of insns. Prior to reloading, we can't tell how many registers
4764 must be saved, so return 0 then. Return 0 if there is no frame
4765 marker to de-allocate. */
4766
4767 int
4768 ix86_can_use_return_insn_p (void)
4769 {
4770 struct ix86_frame frame;
4771
4772 if (! reload_completed || frame_pointer_needed)
4773 return 0;
4774
4775 /* Don't allow more than 32 pop, since that's all we can do
4776 with one instruction. */
4777 if (current_function_pops_args
4778 && current_function_args_size >= 32768)
4779 return 0;
4780
4781 ix86_compute_frame_layout (&frame);
4782 return frame.to_allocate == 0 && frame.nregs == 0;
4783 }
4784 \f
4785 /* Value should be nonzero if functions must have frame pointers.
4786 Zero means the frame pointer need not be set up (and parms may
4787 be accessed via the stack pointer) in functions that seem suitable. */
4788
4789 int
4790 ix86_frame_pointer_required (void)
4791 {
4792 /* If we accessed previous frames, then the generated code expects
4793 to be able to access the saved ebp value in our frame. */
4794 if (cfun->machine->accesses_prev_frame)
4795 return 1;
4796
4797 /* Several x86 os'es need a frame pointer for other reasons,
4798 usually pertaining to setjmp. */
4799 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4800 return 1;
4801
4802 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4803 the frame pointer by default. Turn it back on now if we've not
4804 got a leaf function. */
4805 if (TARGET_OMIT_LEAF_FRAME_POINTER
4806 && (!current_function_is_leaf
4807 || ix86_current_function_calls_tls_descriptor))
4808 return 1;
4809
4810 if (current_function_profile)
4811 return 1;
4812
4813 return 0;
4814 }
4815
4816 /* Record that the current function accesses previous call frames. */
4817
4818 void
4819 ix86_setup_frame_addresses (void)
4820 {
4821 cfun->machine->accesses_prev_frame = 1;
4822 }
4823 \f
4824 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
4825 # define USE_HIDDEN_LINKONCE 1
4826 #else
4827 # define USE_HIDDEN_LINKONCE 0
4828 #endif
4829
4830 static int pic_labels_used;
4831
4832 /* Fills in the label name that should be used for a pc thunk for
4833 the given register. */
4834
4835 static void
4836 get_pc_thunk_name (char name[32], unsigned int regno)
4837 {
4838 gcc_assert (!TARGET_64BIT);
4839
4840 if (USE_HIDDEN_LINKONCE)
4841 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4842 else
4843 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4844 }
4845
4846
4847 /* This function generates code for -fpic that loads %ebx with
4848 the return address of the caller and then returns. */
4849
4850 void
4851 ix86_file_end (void)
4852 {
4853 rtx xops[2];
4854 int regno;
4855
4856 for (regno = 0; regno < 8; ++regno)
4857 {
4858 char name[32];
4859
4860 if (! ((pic_labels_used >> regno) & 1))
4861 continue;
4862
4863 get_pc_thunk_name (name, regno);
4864
4865 #if TARGET_MACHO
4866 if (TARGET_MACHO)
4867 {
4868 switch_to_section (darwin_sections[text_coal_section]);
4869 fputs ("\t.weak_definition\t", asm_out_file);
4870 assemble_name (asm_out_file, name);
4871 fputs ("\n\t.private_extern\t", asm_out_file);
4872 assemble_name (asm_out_file, name);
4873 fputs ("\n", asm_out_file);
4874 ASM_OUTPUT_LABEL (asm_out_file, name);
4875 }
4876 else
4877 #endif
4878 if (USE_HIDDEN_LINKONCE)
4879 {
4880 tree decl;
4881
4882 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4883 error_mark_node);
4884 TREE_PUBLIC (decl) = 1;
4885 TREE_STATIC (decl) = 1;
4886 DECL_ONE_ONLY (decl) = 1;
4887
4888 (*targetm.asm_out.unique_section) (decl, 0);
4889 switch_to_section (get_named_section (decl, NULL, 0));
4890
4891 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4892 fputs ("\t.hidden\t", asm_out_file);
4893 assemble_name (asm_out_file, name);
4894 fputc ('\n', asm_out_file);
4895 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4896 }
4897 else
4898 {
4899 switch_to_section (text_section);
4900 ASM_OUTPUT_LABEL (asm_out_file, name);
4901 }
4902
4903 xops[0] = gen_rtx_REG (SImode, regno);
4904 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4905 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4906 output_asm_insn ("ret", xops);
4907 }
4908
4909 if (NEED_INDICATE_EXEC_STACK)
4910 file_end_indicate_exec_stack ();
4911 }
4912
4913 /* Emit code for the SET_GOT patterns. */
4914
4915 const char *
4916 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
4917 {
4918 rtx xops[3];
4919
4920 xops[0] = dest;
4921 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4922
4923 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4924 {
4925 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
4926
4927 if (!flag_pic)
4928 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4929 else
4930 output_asm_insn ("call\t%a2", xops);
4931
4932 #if TARGET_MACHO
4933 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4934 is what will be referenced by the Mach-O PIC subsystem. */
4935 if (!label)
4936 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4937 #endif
4938
4939 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4940 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4941
4942 if (flag_pic)
4943 output_asm_insn ("pop{l}\t%0", xops);
4944 }
4945 else
4946 {
4947 char name[32];
4948 get_pc_thunk_name (name, REGNO (dest));
4949 pic_labels_used |= 1 << REGNO (dest);
4950
4951 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4952 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4953 output_asm_insn ("call\t%X2", xops);
4954 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4955 is what will be referenced by the Mach-O PIC subsystem. */
4956 #if TARGET_MACHO
4957 if (!label)
4958 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4959 else
4960 targetm.asm_out.internal_label (asm_out_file, "L",
4961 CODE_LABEL_NUMBER (label));
4962 #endif
4963 }
4964
4965 if (TARGET_MACHO)
4966 return "";
4967
4968 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4969 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4970 else
4971 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
4972
4973 return "";
4974 }
4975
4976 /* Generate an "push" pattern for input ARG. */
4977
4978 static rtx
4979 gen_push (rtx arg)
4980 {
4981 return gen_rtx_SET (VOIDmode,
4982 gen_rtx_MEM (Pmode,
4983 gen_rtx_PRE_DEC (Pmode,
4984 stack_pointer_rtx)),
4985 arg);
4986 }
4987
4988 /* Return >= 0 if there is an unused call-clobbered register available
4989 for the entire function. */
4990
4991 static unsigned int
4992 ix86_select_alt_pic_regnum (void)
4993 {
4994 if (current_function_is_leaf && !current_function_profile
4995 && !ix86_current_function_calls_tls_descriptor)
4996 {
4997 int i;
4998 for (i = 2; i >= 0; --i)
4999 if (!regs_ever_live[i])
5000 return i;
5001 }
5002
5003 return INVALID_REGNUM;
5004 }
5005
5006 /* Return 1 if we need to save REGNO. */
5007 static int
5008 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5009 {
5010 if (pic_offset_table_rtx
5011 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5012 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5013 || current_function_profile
5014 || current_function_calls_eh_return
5015 || current_function_uses_const_pool))
5016 {
5017 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5018 return 0;
5019 return 1;
5020 }
5021
5022 if (current_function_calls_eh_return && maybe_eh_return)
5023 {
5024 unsigned i;
5025 for (i = 0; ; i++)
5026 {
5027 unsigned test = EH_RETURN_DATA_REGNO (i);
5028 if (test == INVALID_REGNUM)
5029 break;
5030 if (test == regno)
5031 return 1;
5032 }
5033 }
5034
5035 if (cfun->machine->force_align_arg_pointer
5036 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5037 return 1;
5038
5039 return (regs_ever_live[regno]
5040 && !call_used_regs[regno]
5041 && !fixed_regs[regno]
5042 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5043 }
5044
5045 /* Return number of registers to be saved on the stack. */
5046
5047 static int
5048 ix86_nsaved_regs (void)
5049 {
5050 int nregs = 0;
5051 int regno;
5052
5053 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5054 if (ix86_save_reg (regno, true))
5055 nregs++;
5056 return nregs;
5057 }
5058
5059 /* Return the offset between two registers, one to be eliminated, and the other
5060 its replacement, at the start of a routine. */
5061
5062 HOST_WIDE_INT
5063 ix86_initial_elimination_offset (int from, int to)
5064 {
5065 struct ix86_frame frame;
5066 ix86_compute_frame_layout (&frame);
5067
5068 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5069 return frame.hard_frame_pointer_offset;
5070 else if (from == FRAME_POINTER_REGNUM
5071 && to == HARD_FRAME_POINTER_REGNUM)
5072 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5073 else
5074 {
5075 gcc_assert (to == STACK_POINTER_REGNUM);
5076
5077 if (from == ARG_POINTER_REGNUM)
5078 return frame.stack_pointer_offset;
5079
5080 gcc_assert (from == FRAME_POINTER_REGNUM);
5081 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5082 }
5083 }
5084
5085 /* Fill structure ix86_frame about frame of currently computed function. */
5086
5087 static void
5088 ix86_compute_frame_layout (struct ix86_frame *frame)
5089 {
5090 HOST_WIDE_INT total_size;
5091 unsigned int stack_alignment_needed;
5092 HOST_WIDE_INT offset;
5093 unsigned int preferred_alignment;
5094 HOST_WIDE_INT size = get_frame_size ();
5095
5096 frame->nregs = ix86_nsaved_regs ();
5097 total_size = size;
5098
5099 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5100 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5101
5102 /* During reload iteration the amount of registers saved can change.
5103 Recompute the value as needed. Do not recompute when amount of registers
5104 didn't change as reload does multiple calls to the function and does not
5105 expect the decision to change within single iteration. */
5106 if (!optimize_size
5107 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5108 {
5109 int count = frame->nregs;
5110
5111 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5112 /* The fast prologue uses move instead of push to save registers. This
5113 is significantly longer, but also executes faster as modern hardware
5114 can execute the moves in parallel, but can't do that for push/pop.
5115
5116 Be careful about choosing what prologue to emit: When function takes
5117 many instructions to execute we may use slow version as well as in
5118 case function is known to be outside hot spot (this is known with
5119 feedback only). Weight the size of function by number of registers
5120 to save as it is cheap to use one or two push instructions but very
5121 slow to use many of them. */
5122 if (count)
5123 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5124 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5125 || (flag_branch_probabilities
5126 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5127 cfun->machine->use_fast_prologue_epilogue = false;
5128 else
5129 cfun->machine->use_fast_prologue_epilogue
5130 = !expensive_function_p (count);
5131 }
5132 if (TARGET_PROLOGUE_USING_MOVE
5133 && cfun->machine->use_fast_prologue_epilogue)
5134 frame->save_regs_using_mov = true;
5135 else
5136 frame->save_regs_using_mov = false;
5137
5138
5139 /* Skip return address and saved base pointer. */
5140 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5141
5142 frame->hard_frame_pointer_offset = offset;
5143
5144 /* Do some sanity checking of stack_alignment_needed and
5145 preferred_alignment, since i386 port is the only using those features
5146 that may break easily. */
5147
5148 gcc_assert (!size || stack_alignment_needed);
5149 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5150 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5151 gcc_assert (stack_alignment_needed
5152 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5153
5154 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5155 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5156
5157 /* Register save area */
5158 offset += frame->nregs * UNITS_PER_WORD;
5159
5160 /* Va-arg area */
5161 if (ix86_save_varrargs_registers)
5162 {
5163 offset += X86_64_VARARGS_SIZE;
5164 frame->va_arg_size = X86_64_VARARGS_SIZE;
5165 }
5166 else
5167 frame->va_arg_size = 0;
5168
5169 /* Align start of frame for local function. */
5170 frame->padding1 = ((offset + stack_alignment_needed - 1)
5171 & -stack_alignment_needed) - offset;
5172
5173 offset += frame->padding1;
5174
5175 /* Frame pointer points here. */
5176 frame->frame_pointer_offset = offset;
5177
5178 offset += size;
5179
5180 /* Add outgoing arguments area. Can be skipped if we eliminated
5181 all the function calls as dead code.
5182 Skipping is however impossible when function calls alloca. Alloca
5183 expander assumes that last current_function_outgoing_args_size
5184 of stack frame are unused. */
5185 if (ACCUMULATE_OUTGOING_ARGS
5186 && (!current_function_is_leaf || current_function_calls_alloca
5187 || ix86_current_function_calls_tls_descriptor))
5188 {
5189 offset += current_function_outgoing_args_size;
5190 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5191 }
5192 else
5193 frame->outgoing_arguments_size = 0;
5194
5195 /* Align stack boundary. Only needed if we're calling another function
5196 or using alloca. */
5197 if (!current_function_is_leaf || current_function_calls_alloca
5198 || ix86_current_function_calls_tls_descriptor)
5199 frame->padding2 = ((offset + preferred_alignment - 1)
5200 & -preferred_alignment) - offset;
5201 else
5202 frame->padding2 = 0;
5203
5204 offset += frame->padding2;
5205
5206 /* We've reached end of stack frame. */
5207 frame->stack_pointer_offset = offset;
5208
5209 /* Size prologue needs to allocate. */
5210 frame->to_allocate =
5211 (size + frame->padding1 + frame->padding2
5212 + frame->outgoing_arguments_size + frame->va_arg_size);
5213
5214 if ((!frame->to_allocate && frame->nregs <= 1)
5215 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5216 frame->save_regs_using_mov = false;
5217
5218 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5219 && current_function_is_leaf
5220 && !ix86_current_function_calls_tls_descriptor)
5221 {
5222 frame->red_zone_size = frame->to_allocate;
5223 if (frame->save_regs_using_mov)
5224 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5225 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5226 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5227 }
5228 else
5229 frame->red_zone_size = 0;
5230 frame->to_allocate -= frame->red_zone_size;
5231 frame->stack_pointer_offset -= frame->red_zone_size;
5232 #if 0
5233 fprintf (stderr, "nregs: %i\n", frame->nregs);
5234 fprintf (stderr, "size: %i\n", size);
5235 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5236 fprintf (stderr, "padding1: %i\n", frame->padding1);
5237 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5238 fprintf (stderr, "padding2: %i\n", frame->padding2);
5239 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5240 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5241 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5242 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5243 frame->hard_frame_pointer_offset);
5244 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5245 #endif
5246 }
5247
5248 /* Emit code to save registers in the prologue. */
5249
5250 static void
5251 ix86_emit_save_regs (void)
5252 {
5253 unsigned int regno;
5254 rtx insn;
5255
5256 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5257 if (ix86_save_reg (regno, true))
5258 {
5259 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5260 RTX_FRAME_RELATED_P (insn) = 1;
5261 }
5262 }
5263
5264 /* Emit code to save registers using MOV insns. First register
5265 is restored from POINTER + OFFSET. */
5266 static void
5267 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5268 {
5269 unsigned int regno;
5270 rtx insn;
5271
5272 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5273 if (ix86_save_reg (regno, true))
5274 {
5275 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5276 Pmode, offset),
5277 gen_rtx_REG (Pmode, regno));
5278 RTX_FRAME_RELATED_P (insn) = 1;
5279 offset += UNITS_PER_WORD;
5280 }
5281 }
5282
5283 /* Expand prologue or epilogue stack adjustment.
5284 The pattern exist to put a dependency on all ebp-based memory accesses.
5285 STYLE should be negative if instructions should be marked as frame related,
5286 zero if %r11 register is live and cannot be freely used and positive
5287 otherwise. */
5288
5289 static void
5290 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5291 {
5292 rtx insn;
5293
5294 if (! TARGET_64BIT)
5295 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5296 else if (x86_64_immediate_operand (offset, DImode))
5297 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5298 else
5299 {
5300 rtx r11;
5301 /* r11 is used by indirect sibcall return as well, set before the
5302 epilogue and used after the epilogue. ATM indirect sibcall
5303 shouldn't be used together with huge frame sizes in one
5304 function because of the frame_size check in sibcall.c. */
5305 gcc_assert (style);
5306 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5307 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5308 if (style < 0)
5309 RTX_FRAME_RELATED_P (insn) = 1;
5310 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5311 offset));
5312 }
5313 if (style < 0)
5314 RTX_FRAME_RELATED_P (insn) = 1;
5315 }
5316
5317 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5318
5319 static rtx
5320 ix86_internal_arg_pointer (void)
5321 {
5322 bool has_force_align_arg_pointer =
5323 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5324 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5325 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5326 && DECL_NAME (current_function_decl)
5327 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5328 && DECL_FILE_SCOPE_P (current_function_decl))
5329 || ix86_force_align_arg_pointer
5330 || has_force_align_arg_pointer)
5331 {
5332 /* Nested functions can't realign the stack due to a register
5333 conflict. */
5334 if (DECL_CONTEXT (current_function_decl)
5335 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5336 {
5337 if (ix86_force_align_arg_pointer)
5338 warning (0, "-mstackrealign ignored for nested functions");
5339 if (has_force_align_arg_pointer)
5340 error ("%s not supported for nested functions",
5341 ix86_force_align_arg_pointer_string);
5342 return virtual_incoming_args_rtx;
5343 }
5344 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5345 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5346 }
5347 else
5348 return virtual_incoming_args_rtx;
5349 }
5350
5351 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5352 This is called from dwarf2out.c to emit call frame instructions
5353 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5354 static void
5355 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5356 {
5357 rtx unspec = SET_SRC (pattern);
5358 gcc_assert (GET_CODE (unspec) == UNSPEC);
5359
5360 switch (index)
5361 {
5362 case UNSPEC_REG_SAVE:
5363 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5364 SET_DEST (pattern));
5365 break;
5366 case UNSPEC_DEF_CFA:
5367 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5368 INTVAL (XVECEXP (unspec, 0, 0)));
5369 break;
5370 default:
5371 gcc_unreachable ();
5372 }
5373 }
5374
5375 /* Expand the prologue into a bunch of separate insns. */
5376
5377 void
5378 ix86_expand_prologue (void)
5379 {
5380 rtx insn;
5381 bool pic_reg_used;
5382 struct ix86_frame frame;
5383 HOST_WIDE_INT allocate;
5384
5385 ix86_compute_frame_layout (&frame);
5386
5387 if (cfun->machine->force_align_arg_pointer)
5388 {
5389 rtx x, y;
5390
5391 /* Grab the argument pointer. */
5392 x = plus_constant (stack_pointer_rtx, 4);
5393 y = cfun->machine->force_align_arg_pointer;
5394 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5395 RTX_FRAME_RELATED_P (insn) = 1;
5396
5397 /* The unwind info consists of two parts: install the fafp as the cfa,
5398 and record the fafp as the "save register" of the stack pointer.
5399 The later is there in order that the unwinder can see where it
5400 should restore the stack pointer across the and insn. */
5401 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5402 x = gen_rtx_SET (VOIDmode, y, x);
5403 RTX_FRAME_RELATED_P (x) = 1;
5404 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5405 UNSPEC_REG_SAVE);
5406 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5407 RTX_FRAME_RELATED_P (y) = 1;
5408 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5409 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5410 REG_NOTES (insn) = x;
5411
5412 /* Align the stack. */
5413 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5414 GEN_INT (-16)));
5415
5416 /* And here we cheat like madmen with the unwind info. We force the
5417 cfa register back to sp+4, which is exactly what it was at the
5418 start of the function. Re-pushing the return address results in
5419 the return at the same spot relative to the cfa, and thus is
5420 correct wrt the unwind info. */
5421 x = cfun->machine->force_align_arg_pointer;
5422 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5423 insn = emit_insn (gen_push (x));
5424 RTX_FRAME_RELATED_P (insn) = 1;
5425
5426 x = GEN_INT (4);
5427 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5428 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5429 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5430 REG_NOTES (insn) = x;
5431 }
5432
5433 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5434 slower on all targets. Also sdb doesn't like it. */
5435
5436 if (frame_pointer_needed)
5437 {
5438 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5439 RTX_FRAME_RELATED_P (insn) = 1;
5440
5441 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5442 RTX_FRAME_RELATED_P (insn) = 1;
5443 }
5444
5445 allocate = frame.to_allocate;
5446
5447 if (!frame.save_regs_using_mov)
5448 ix86_emit_save_regs ();
5449 else
5450 allocate += frame.nregs * UNITS_PER_WORD;
5451
5452 /* When using red zone we may start register saving before allocating
5453 the stack frame saving one cycle of the prologue. */
5454 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5455 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5456 : stack_pointer_rtx,
5457 -frame.nregs * UNITS_PER_WORD);
5458
5459 if (allocate == 0)
5460 ;
5461 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5462 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5463 GEN_INT (-allocate), -1);
5464 else
5465 {
5466 /* Only valid for Win32. */
5467 rtx eax = gen_rtx_REG (SImode, 0);
5468 bool eax_live = ix86_eax_live_at_start_p ();
5469 rtx t;
5470
5471 gcc_assert (!TARGET_64BIT);
5472
5473 if (eax_live)
5474 {
5475 emit_insn (gen_push (eax));
5476 allocate -= 4;
5477 }
5478
5479 emit_move_insn (eax, GEN_INT (allocate));
5480
5481 insn = emit_insn (gen_allocate_stack_worker (eax));
5482 RTX_FRAME_RELATED_P (insn) = 1;
5483 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5484 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5485 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5486 t, REG_NOTES (insn));
5487
5488 if (eax_live)
5489 {
5490 if (frame_pointer_needed)
5491 t = plus_constant (hard_frame_pointer_rtx,
5492 allocate
5493 - frame.to_allocate
5494 - frame.nregs * UNITS_PER_WORD);
5495 else
5496 t = plus_constant (stack_pointer_rtx, allocate);
5497 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5498 }
5499 }
5500
5501 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5502 {
5503 if (!frame_pointer_needed || !frame.to_allocate)
5504 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5505 else
5506 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5507 -frame.nregs * UNITS_PER_WORD);
5508 }
5509
5510 pic_reg_used = false;
5511 if (pic_offset_table_rtx
5512 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5513 || current_function_profile))
5514 {
5515 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5516
5517 if (alt_pic_reg_used != INVALID_REGNUM)
5518 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5519
5520 pic_reg_used = true;
5521 }
5522
5523 if (pic_reg_used)
5524 {
5525 if (TARGET_64BIT)
5526 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5527 else
5528 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5529
5530 /* Even with accurate pre-reload life analysis, we can wind up
5531 deleting all references to the pic register after reload.
5532 Consider if cross-jumping unifies two sides of a branch
5533 controlled by a comparison vs the only read from a global.
5534 In which case, allow the set_got to be deleted, though we're
5535 too late to do anything about the ebx save in the prologue. */
5536 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5537 }
5538
5539 /* Prevent function calls from be scheduled before the call to mcount.
5540 In the pic_reg_used case, make sure that the got load isn't deleted. */
5541 if (current_function_profile)
5542 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5543 }
5544
5545 /* Emit code to restore saved registers using MOV insns. First register
5546 is restored from POINTER + OFFSET. */
5547 static void
5548 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5549 int maybe_eh_return)
5550 {
5551 int regno;
5552 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5553
5554 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5555 if (ix86_save_reg (regno, maybe_eh_return))
5556 {
5557 /* Ensure that adjust_address won't be forced to produce pointer
5558 out of range allowed by x86-64 instruction set. */
5559 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5560 {
5561 rtx r11;
5562
5563 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5564 emit_move_insn (r11, GEN_INT (offset));
5565 emit_insn (gen_adddi3 (r11, r11, pointer));
5566 base_address = gen_rtx_MEM (Pmode, r11);
5567 offset = 0;
5568 }
5569 emit_move_insn (gen_rtx_REG (Pmode, regno),
5570 adjust_address (base_address, Pmode, offset));
5571 offset += UNITS_PER_WORD;
5572 }
5573 }
5574
5575 /* Restore function stack, frame, and registers. */
5576
5577 void
5578 ix86_expand_epilogue (int style)
5579 {
5580 int regno;
5581 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5582 struct ix86_frame frame;
5583 HOST_WIDE_INT offset;
5584
5585 ix86_compute_frame_layout (&frame);
5586
5587 /* Calculate start of saved registers relative to ebp. Special care
5588 must be taken for the normal return case of a function using
5589 eh_return: the eax and edx registers are marked as saved, but not
5590 restored along this path. */
5591 offset = frame.nregs;
5592 if (current_function_calls_eh_return && style != 2)
5593 offset -= 2;
5594 offset *= -UNITS_PER_WORD;
5595
5596 /* If we're only restoring one register and sp is not valid then
5597 using a move instruction to restore the register since it's
5598 less work than reloading sp and popping the register.
5599
5600 The default code result in stack adjustment using add/lea instruction,
5601 while this code results in LEAVE instruction (or discrete equivalent),
5602 so it is profitable in some other cases as well. Especially when there
5603 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5604 and there is exactly one register to pop. This heuristic may need some
5605 tuning in future. */
5606 if ((!sp_valid && frame.nregs <= 1)
5607 || (TARGET_EPILOGUE_USING_MOVE
5608 && cfun->machine->use_fast_prologue_epilogue
5609 && (frame.nregs > 1 || frame.to_allocate))
5610 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5611 || (frame_pointer_needed && TARGET_USE_LEAVE
5612 && cfun->machine->use_fast_prologue_epilogue
5613 && frame.nregs == 1)
5614 || current_function_calls_eh_return)
5615 {
5616 /* Restore registers. We can use ebp or esp to address the memory
5617 locations. If both are available, default to ebp, since offsets
5618 are known to be small. Only exception is esp pointing directly to the
5619 end of block of saved registers, where we may simplify addressing
5620 mode. */
5621
5622 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5623 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5624 frame.to_allocate, style == 2);
5625 else
5626 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5627 offset, style == 2);
5628
5629 /* eh_return epilogues need %ecx added to the stack pointer. */
5630 if (style == 2)
5631 {
5632 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5633
5634 if (frame_pointer_needed)
5635 {
5636 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5637 tmp = plus_constant (tmp, UNITS_PER_WORD);
5638 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5639
5640 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5641 emit_move_insn (hard_frame_pointer_rtx, tmp);
5642
5643 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5644 const0_rtx, style);
5645 }
5646 else
5647 {
5648 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5649 tmp = plus_constant (tmp, (frame.to_allocate
5650 + frame.nregs * UNITS_PER_WORD));
5651 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5652 }
5653 }
5654 else if (!frame_pointer_needed)
5655 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5656 GEN_INT (frame.to_allocate
5657 + frame.nregs * UNITS_PER_WORD),
5658 style);
5659 /* If not an i386, mov & pop is faster than "leave". */
5660 else if (TARGET_USE_LEAVE || optimize_size
5661 || !cfun->machine->use_fast_prologue_epilogue)
5662 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5663 else
5664 {
5665 pro_epilogue_adjust_stack (stack_pointer_rtx,
5666 hard_frame_pointer_rtx,
5667 const0_rtx, style);
5668 if (TARGET_64BIT)
5669 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5670 else
5671 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5672 }
5673 }
5674 else
5675 {
5676 /* First step is to deallocate the stack frame so that we can
5677 pop the registers. */
5678 if (!sp_valid)
5679 {
5680 gcc_assert (frame_pointer_needed);
5681 pro_epilogue_adjust_stack (stack_pointer_rtx,
5682 hard_frame_pointer_rtx,
5683 GEN_INT (offset), style);
5684 }
5685 else if (frame.to_allocate)
5686 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5687 GEN_INT (frame.to_allocate), style);
5688
5689 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5690 if (ix86_save_reg (regno, false))
5691 {
5692 if (TARGET_64BIT)
5693 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5694 else
5695 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5696 }
5697 if (frame_pointer_needed)
5698 {
5699 /* Leave results in shorter dependency chains on CPUs that are
5700 able to grok it fast. */
5701 if (TARGET_USE_LEAVE)
5702 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5703 else if (TARGET_64BIT)
5704 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5705 else
5706 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5707 }
5708 }
5709
5710 if (cfun->machine->force_align_arg_pointer)
5711 {
5712 emit_insn (gen_addsi3 (stack_pointer_rtx,
5713 cfun->machine->force_align_arg_pointer,
5714 GEN_INT (-4)));
5715 }
5716
5717 /* Sibcall epilogues don't want a return instruction. */
5718 if (style == 0)
5719 return;
5720
5721 if (current_function_pops_args && current_function_args_size)
5722 {
5723 rtx popc = GEN_INT (current_function_pops_args);
5724
5725 /* i386 can only pop 64K bytes. If asked to pop more, pop
5726 return address, do explicit add, and jump indirectly to the
5727 caller. */
5728
5729 if (current_function_pops_args >= 65536)
5730 {
5731 rtx ecx = gen_rtx_REG (SImode, 2);
5732
5733 /* There is no "pascal" calling convention in 64bit ABI. */
5734 gcc_assert (!TARGET_64BIT);
5735
5736 emit_insn (gen_popsi1 (ecx));
5737 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5738 emit_jump_insn (gen_return_indirect_internal (ecx));
5739 }
5740 else
5741 emit_jump_insn (gen_return_pop_internal (popc));
5742 }
5743 else
5744 emit_jump_insn (gen_return_internal ());
5745 }
5746
5747 /* Reset from the function's potential modifications. */
5748
5749 static void
5750 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5751 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5752 {
5753 if (pic_offset_table_rtx)
5754 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5755 #if TARGET_MACHO
5756 /* Mach-O doesn't support labels at the end of objects, so if
5757 it looks like we might want one, insert a NOP. */
5758 {
5759 rtx insn = get_last_insn ();
5760 while (insn
5761 && NOTE_P (insn)
5762 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
5763 insn = PREV_INSN (insn);
5764 if (insn
5765 && (LABEL_P (insn)
5766 || (NOTE_P (insn)
5767 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
5768 fputs ("\tnop\n", file);
5769 }
5770 #endif
5771
5772 }
5773 \f
5774 /* Extract the parts of an RTL expression that is a valid memory address
5775 for an instruction. Return 0 if the structure of the address is
5776 grossly off. Return -1 if the address contains ASHIFT, so it is not
5777 strictly valid, but still used for computing length of lea instruction. */
5778
5779 int
5780 ix86_decompose_address (rtx addr, struct ix86_address *out)
5781 {
5782 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
5783 rtx base_reg, index_reg;
5784 HOST_WIDE_INT scale = 1;
5785 rtx scale_rtx = NULL_RTX;
5786 int retval = 1;
5787 enum ix86_address_seg seg = SEG_DEFAULT;
5788
5789 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5790 base = addr;
5791 else if (GET_CODE (addr) == PLUS)
5792 {
5793 rtx addends[4], op;
5794 int n = 0, i;
5795
5796 op = addr;
5797 do
5798 {
5799 if (n >= 4)
5800 return 0;
5801 addends[n++] = XEXP (op, 1);
5802 op = XEXP (op, 0);
5803 }
5804 while (GET_CODE (op) == PLUS);
5805 if (n >= 4)
5806 return 0;
5807 addends[n] = op;
5808
5809 for (i = n; i >= 0; --i)
5810 {
5811 op = addends[i];
5812 switch (GET_CODE (op))
5813 {
5814 case MULT:
5815 if (index)
5816 return 0;
5817 index = XEXP (op, 0);
5818 scale_rtx = XEXP (op, 1);
5819 break;
5820
5821 case UNSPEC:
5822 if (XINT (op, 1) == UNSPEC_TP
5823 && TARGET_TLS_DIRECT_SEG_REFS
5824 && seg == SEG_DEFAULT)
5825 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5826 else
5827 return 0;
5828 break;
5829
5830 case REG:
5831 case SUBREG:
5832 if (!base)
5833 base = op;
5834 else if (!index)
5835 index = op;
5836 else
5837 return 0;
5838 break;
5839
5840 case CONST:
5841 case CONST_INT:
5842 case SYMBOL_REF:
5843 case LABEL_REF:
5844 if (disp)
5845 return 0;
5846 disp = op;
5847 break;
5848
5849 default:
5850 return 0;
5851 }
5852 }
5853 }
5854 else if (GET_CODE (addr) == MULT)
5855 {
5856 index = XEXP (addr, 0); /* index*scale */
5857 scale_rtx = XEXP (addr, 1);
5858 }
5859 else if (GET_CODE (addr) == ASHIFT)
5860 {
5861 rtx tmp;
5862
5863 /* We're called for lea too, which implements ashift on occasion. */
5864 index = XEXP (addr, 0);
5865 tmp = XEXP (addr, 1);
5866 if (GET_CODE (tmp) != CONST_INT)
5867 return 0;
5868 scale = INTVAL (tmp);
5869 if ((unsigned HOST_WIDE_INT) scale > 3)
5870 return 0;
5871 scale = 1 << scale;
5872 retval = -1;
5873 }
5874 else
5875 disp = addr; /* displacement */
5876
5877 /* Extract the integral value of scale. */
5878 if (scale_rtx)
5879 {
5880 if (GET_CODE (scale_rtx) != CONST_INT)
5881 return 0;
5882 scale = INTVAL (scale_rtx);
5883 }
5884
5885 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
5886 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
5887
5888 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5889 if (base_reg && index_reg && scale == 1
5890 && (index_reg == arg_pointer_rtx
5891 || index_reg == frame_pointer_rtx
5892 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
5893 {
5894 rtx tmp;
5895 tmp = base, base = index, index = tmp;
5896 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
5897 }
5898
5899 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5900 if ((base_reg == hard_frame_pointer_rtx
5901 || base_reg == frame_pointer_rtx
5902 || base_reg == arg_pointer_rtx) && !disp)
5903 disp = const0_rtx;
5904
5905 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5906 Avoid this by transforming to [%esi+0]. */
5907 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5908 && base_reg && !index_reg && !disp
5909 && REG_P (base_reg)
5910 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
5911 disp = const0_rtx;
5912
5913 /* Special case: encode reg+reg instead of reg*2. */
5914 if (!base && index && scale && scale == 2)
5915 base = index, base_reg = index_reg, scale = 1;
5916
5917 /* Special case: scaling cannot be encoded without base or displacement. */
5918 if (!base && !disp && index && scale != 1)
5919 disp = const0_rtx;
5920
5921 out->base = base;
5922 out->index = index;
5923 out->disp = disp;
5924 out->scale = scale;
5925 out->seg = seg;
5926
5927 return retval;
5928 }
5929 \f
5930 /* Return cost of the memory address x.
5931 For i386, it is better to use a complex address than let gcc copy
5932 the address into a reg and make a new pseudo. But not if the address
5933 requires to two regs - that would mean more pseudos with longer
5934 lifetimes. */
5935 static int
5936 ix86_address_cost (rtx x)
5937 {
5938 struct ix86_address parts;
5939 int cost = 1;
5940 int ok = ix86_decompose_address (x, &parts);
5941
5942 gcc_assert (ok);
5943
5944 if (parts.base && GET_CODE (parts.base) == SUBREG)
5945 parts.base = SUBREG_REG (parts.base);
5946 if (parts.index && GET_CODE (parts.index) == SUBREG)
5947 parts.index = SUBREG_REG (parts.index);
5948
5949 /* More complex memory references are better. */
5950 if (parts.disp && parts.disp != const0_rtx)
5951 cost--;
5952 if (parts.seg != SEG_DEFAULT)
5953 cost--;
5954
5955 /* Attempt to minimize number of registers in the address. */
5956 if ((parts.base
5957 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5958 || (parts.index
5959 && (!REG_P (parts.index)
5960 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5961 cost++;
5962
5963 if (parts.base
5964 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5965 && parts.index
5966 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5967 && parts.base != parts.index)
5968 cost++;
5969
5970 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5971 since it's predecode logic can't detect the length of instructions
5972 and it degenerates to vector decoded. Increase cost of such
5973 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5974 to split such addresses or even refuse such addresses at all.
5975
5976 Following addressing modes are affected:
5977 [base+scale*index]
5978 [scale*index+disp]
5979 [base+index]
5980
5981 The first and last case may be avoidable by explicitly coding the zero in
5982 memory address, but I don't have AMD-K6 machine handy to check this
5983 theory. */
5984
5985 if (TARGET_K6
5986 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5987 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5988 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5989 cost += 10;
5990
5991 return cost;
5992 }
5993 \f
5994 /* If X is a machine specific address (i.e. a symbol or label being
5995 referenced as a displacement from the GOT implemented using an
5996 UNSPEC), then return the base term. Otherwise return X. */
5997
5998 rtx
5999 ix86_find_base_term (rtx x)
6000 {
6001 rtx term;
6002
6003 if (TARGET_64BIT)
6004 {
6005 if (GET_CODE (x) != CONST)
6006 return x;
6007 term = XEXP (x, 0);
6008 if (GET_CODE (term) == PLUS
6009 && (GET_CODE (XEXP (term, 1)) == CONST_INT
6010 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
6011 term = XEXP (term, 0);
6012 if (GET_CODE (term) != UNSPEC
6013 || XINT (term, 1) != UNSPEC_GOTPCREL)
6014 return x;
6015
6016 term = XVECEXP (term, 0, 0);
6017
6018 if (GET_CODE (term) != SYMBOL_REF
6019 && GET_CODE (term) != LABEL_REF)
6020 return x;
6021
6022 return term;
6023 }
6024
6025 term = ix86_delegitimize_address (x);
6026
6027 if (GET_CODE (term) != SYMBOL_REF
6028 && GET_CODE (term) != LABEL_REF)
6029 return x;
6030
6031 return term;
6032 }
6033
6034 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6035 this is used for to form addresses to local data when -fPIC is in
6036 use. */
6037
6038 static bool
6039 darwin_local_data_pic (rtx disp)
6040 {
6041 if (GET_CODE (disp) == MINUS)
6042 {
6043 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6044 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6045 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6046 {
6047 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6048 if (! strcmp (sym_name, "<pic base>"))
6049 return true;
6050 }
6051 }
6052
6053 return false;
6054 }
6055 \f
6056 /* Determine if a given RTX is a valid constant. We already know this
6057 satisfies CONSTANT_P. */
6058
6059 bool
6060 legitimate_constant_p (rtx x)
6061 {
6062 switch (GET_CODE (x))
6063 {
6064 case CONST:
6065 x = XEXP (x, 0);
6066
6067 if (GET_CODE (x) == PLUS)
6068 {
6069 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6070 return false;
6071 x = XEXP (x, 0);
6072 }
6073
6074 if (TARGET_MACHO && darwin_local_data_pic (x))
6075 return true;
6076
6077 /* Only some unspecs are valid as "constants". */
6078 if (GET_CODE (x) == UNSPEC)
6079 switch (XINT (x, 1))
6080 {
6081 case UNSPEC_GOTOFF:
6082 return TARGET_64BIT;
6083 case UNSPEC_TPOFF:
6084 case UNSPEC_NTPOFF:
6085 x = XVECEXP (x, 0, 0);
6086 return (GET_CODE (x) == SYMBOL_REF
6087 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6088 case UNSPEC_DTPOFF:
6089 x = XVECEXP (x, 0, 0);
6090 return (GET_CODE (x) == SYMBOL_REF
6091 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6092 default:
6093 return false;
6094 }
6095
6096 /* We must have drilled down to a symbol. */
6097 if (GET_CODE (x) == LABEL_REF)
6098 return true;
6099 if (GET_CODE (x) != SYMBOL_REF)
6100 return false;
6101 /* FALLTHRU */
6102
6103 case SYMBOL_REF:
6104 /* TLS symbols are never valid. */
6105 if (SYMBOL_REF_TLS_MODEL (x))
6106 return false;
6107 break;
6108
6109 case CONST_DOUBLE:
6110 if (GET_MODE (x) == TImode
6111 && x != CONST0_RTX (TImode)
6112 && !TARGET_64BIT)
6113 return false;
6114 break;
6115
6116 case CONST_VECTOR:
6117 if (x == CONST0_RTX (GET_MODE (x)))
6118 return true;
6119 return false;
6120
6121 default:
6122 break;
6123 }
6124
6125 /* Otherwise we handle everything else in the move patterns. */
6126 return true;
6127 }
6128
6129 /* Determine if it's legal to put X into the constant pool. This
6130 is not possible for the address of thread-local symbols, which
6131 is checked above. */
6132
6133 static bool
6134 ix86_cannot_force_const_mem (rtx x)
6135 {
6136 /* We can always put integral constants and vectors in memory. */
6137 switch (GET_CODE (x))
6138 {
6139 case CONST_INT:
6140 case CONST_DOUBLE:
6141 case CONST_VECTOR:
6142 return false;
6143
6144 default:
6145 break;
6146 }
6147 return !legitimate_constant_p (x);
6148 }
6149
6150 /* Determine if a given RTX is a valid constant address. */
6151
6152 bool
6153 constant_address_p (rtx x)
6154 {
6155 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6156 }
6157
6158 /* Nonzero if the constant value X is a legitimate general operand
6159 when generating PIC code. It is given that flag_pic is on and
6160 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6161
6162 bool
6163 legitimate_pic_operand_p (rtx x)
6164 {
6165 rtx inner;
6166
6167 switch (GET_CODE (x))
6168 {
6169 case CONST:
6170 inner = XEXP (x, 0);
6171 if (GET_CODE (inner) == PLUS
6172 && GET_CODE (XEXP (inner, 1)) == CONST_INT)
6173 inner = XEXP (inner, 0);
6174
6175 /* Only some unspecs are valid as "constants". */
6176 if (GET_CODE (inner) == UNSPEC)
6177 switch (XINT (inner, 1))
6178 {
6179 case UNSPEC_GOTOFF:
6180 return TARGET_64BIT;
6181 case UNSPEC_TPOFF:
6182 x = XVECEXP (inner, 0, 0);
6183 return (GET_CODE (x) == SYMBOL_REF
6184 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6185 default:
6186 return false;
6187 }
6188 /* FALLTHRU */
6189
6190 case SYMBOL_REF:
6191 case LABEL_REF:
6192 return legitimate_pic_address_disp_p (x);
6193
6194 default:
6195 return true;
6196 }
6197 }
6198
6199 /* Determine if a given CONST RTX is a valid memory displacement
6200 in PIC mode. */
6201
6202 int
6203 legitimate_pic_address_disp_p (rtx disp)
6204 {
6205 bool saw_plus;
6206
6207 /* In 64bit mode we can allow direct addresses of symbols and labels
6208 when they are not dynamic symbols. */
6209 if (TARGET_64BIT)
6210 {
6211 rtx op0 = disp, op1;
6212
6213 switch (GET_CODE (disp))
6214 {
6215 case LABEL_REF:
6216 return true;
6217
6218 case CONST:
6219 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6220 break;
6221 op0 = XEXP (XEXP (disp, 0), 0);
6222 op1 = XEXP (XEXP (disp, 0), 1);
6223 if (GET_CODE (op1) != CONST_INT
6224 || INTVAL (op1) >= 16*1024*1024
6225 || INTVAL (op1) < -16*1024*1024)
6226 break;
6227 if (GET_CODE (op0) == LABEL_REF)
6228 return true;
6229 if (GET_CODE (op0) != SYMBOL_REF)
6230 break;
6231 /* FALLTHRU */
6232
6233 case SYMBOL_REF:
6234 /* TLS references should always be enclosed in UNSPEC. */
6235 if (SYMBOL_REF_TLS_MODEL (op0))
6236 return false;
6237 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
6238 return true;
6239 break;
6240
6241 default:
6242 break;
6243 }
6244 }
6245 if (GET_CODE (disp) != CONST)
6246 return 0;
6247 disp = XEXP (disp, 0);
6248
6249 if (TARGET_64BIT)
6250 {
6251 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6252 of GOT tables. We should not need these anyway. */
6253 if (GET_CODE (disp) != UNSPEC
6254 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6255 && XINT (disp, 1) != UNSPEC_GOTOFF))
6256 return 0;
6257
6258 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6259 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6260 return 0;
6261 return 1;
6262 }
6263
6264 saw_plus = false;
6265 if (GET_CODE (disp) == PLUS)
6266 {
6267 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6268 return 0;
6269 disp = XEXP (disp, 0);
6270 saw_plus = true;
6271 }
6272
6273 if (TARGET_MACHO && darwin_local_data_pic (disp))
6274 return 1;
6275
6276 if (GET_CODE (disp) != UNSPEC)
6277 return 0;
6278
6279 switch (XINT (disp, 1))
6280 {
6281 case UNSPEC_GOT:
6282 if (saw_plus)
6283 return false;
6284 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6285 case UNSPEC_GOTOFF:
6286 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6287 While ABI specify also 32bit relocation but we don't produce it in
6288 small PIC model at all. */
6289 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6290 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6291 && !TARGET_64BIT)
6292 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6293 return false;
6294 case UNSPEC_GOTTPOFF:
6295 case UNSPEC_GOTNTPOFF:
6296 case UNSPEC_INDNTPOFF:
6297 if (saw_plus)
6298 return false;
6299 disp = XVECEXP (disp, 0, 0);
6300 return (GET_CODE (disp) == SYMBOL_REF
6301 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6302 case UNSPEC_NTPOFF:
6303 disp = XVECEXP (disp, 0, 0);
6304 return (GET_CODE (disp) == SYMBOL_REF
6305 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6306 case UNSPEC_DTPOFF:
6307 disp = XVECEXP (disp, 0, 0);
6308 return (GET_CODE (disp) == SYMBOL_REF
6309 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6310 }
6311
6312 return 0;
6313 }
6314
6315 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6316 memory address for an instruction. The MODE argument is the machine mode
6317 for the MEM expression that wants to use this address.
6318
6319 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6320 convert common non-canonical forms to canonical form so that they will
6321 be recognized. */
6322
6323 int
6324 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6325 {
6326 struct ix86_address parts;
6327 rtx base, index, disp;
6328 HOST_WIDE_INT scale;
6329 const char *reason = NULL;
6330 rtx reason_rtx = NULL_RTX;
6331
6332 if (TARGET_DEBUG_ADDR)
6333 {
6334 fprintf (stderr,
6335 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6336 GET_MODE_NAME (mode), strict);
6337 debug_rtx (addr);
6338 }
6339
6340 if (ix86_decompose_address (addr, &parts) <= 0)
6341 {
6342 reason = "decomposition failed";
6343 goto report_error;
6344 }
6345
6346 base = parts.base;
6347 index = parts.index;
6348 disp = parts.disp;
6349 scale = parts.scale;
6350
6351 /* Validate base register.
6352
6353 Don't allow SUBREG's that span more than a word here. It can lead to spill
6354 failures when the base is one word out of a two word structure, which is
6355 represented internally as a DImode int. */
6356
6357 if (base)
6358 {
6359 rtx reg;
6360 reason_rtx = base;
6361
6362 if (REG_P (base))
6363 reg = base;
6364 else if (GET_CODE (base) == SUBREG
6365 && REG_P (SUBREG_REG (base))
6366 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6367 <= UNITS_PER_WORD)
6368 reg = SUBREG_REG (base);
6369 else
6370 {
6371 reason = "base is not a register";
6372 goto report_error;
6373 }
6374
6375 if (GET_MODE (base) != Pmode)
6376 {
6377 reason = "base is not in Pmode";
6378 goto report_error;
6379 }
6380
6381 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6382 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6383 {
6384 reason = "base is not valid";
6385 goto report_error;
6386 }
6387 }
6388
6389 /* Validate index register.
6390
6391 Don't allow SUBREG's that span more than a word here -- same as above. */
6392
6393 if (index)
6394 {
6395 rtx reg;
6396 reason_rtx = index;
6397
6398 if (REG_P (index))
6399 reg = index;
6400 else if (GET_CODE (index) == SUBREG
6401 && REG_P (SUBREG_REG (index))
6402 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6403 <= UNITS_PER_WORD)
6404 reg = SUBREG_REG (index);
6405 else
6406 {
6407 reason = "index is not a register";
6408 goto report_error;
6409 }
6410
6411 if (GET_MODE (index) != Pmode)
6412 {
6413 reason = "index is not in Pmode";
6414 goto report_error;
6415 }
6416
6417 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6418 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6419 {
6420 reason = "index is not valid";
6421 goto report_error;
6422 }
6423 }
6424
6425 /* Validate scale factor. */
6426 if (scale != 1)
6427 {
6428 reason_rtx = GEN_INT (scale);
6429 if (!index)
6430 {
6431 reason = "scale without index";
6432 goto report_error;
6433 }
6434
6435 if (scale != 2 && scale != 4 && scale != 8)
6436 {
6437 reason = "scale is not a valid multiplier";
6438 goto report_error;
6439 }
6440 }
6441
6442 /* Validate displacement. */
6443 if (disp)
6444 {
6445 reason_rtx = disp;
6446
6447 if (GET_CODE (disp) == CONST
6448 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6449 switch (XINT (XEXP (disp, 0), 1))
6450 {
6451 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6452 used. While ABI specify also 32bit relocations, we don't produce
6453 them at all and use IP relative instead. */
6454 case UNSPEC_GOT:
6455 case UNSPEC_GOTOFF:
6456 gcc_assert (flag_pic);
6457 if (!TARGET_64BIT)
6458 goto is_legitimate_pic;
6459 reason = "64bit address unspec";
6460 goto report_error;
6461
6462 case UNSPEC_GOTPCREL:
6463 gcc_assert (flag_pic);
6464 goto is_legitimate_pic;
6465
6466 case UNSPEC_GOTTPOFF:
6467 case UNSPEC_GOTNTPOFF:
6468 case UNSPEC_INDNTPOFF:
6469 case UNSPEC_NTPOFF:
6470 case UNSPEC_DTPOFF:
6471 break;
6472
6473 default:
6474 reason = "invalid address unspec";
6475 goto report_error;
6476 }
6477
6478 else if (SYMBOLIC_CONST (disp)
6479 && (flag_pic
6480 || (TARGET_MACHO
6481 #if TARGET_MACHO
6482 && MACHOPIC_INDIRECT
6483 && !machopic_operand_p (disp)
6484 #endif
6485 )))
6486 {
6487
6488 is_legitimate_pic:
6489 if (TARGET_64BIT && (index || base))
6490 {
6491 /* foo@dtpoff(%rX) is ok. */
6492 if (GET_CODE (disp) != CONST
6493 || GET_CODE (XEXP (disp, 0)) != PLUS
6494 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6495 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6496 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6497 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6498 {
6499 reason = "non-constant pic memory reference";
6500 goto report_error;
6501 }
6502 }
6503 else if (! legitimate_pic_address_disp_p (disp))
6504 {
6505 reason = "displacement is an invalid pic construct";
6506 goto report_error;
6507 }
6508
6509 /* This code used to verify that a symbolic pic displacement
6510 includes the pic_offset_table_rtx register.
6511
6512 While this is good idea, unfortunately these constructs may
6513 be created by "adds using lea" optimization for incorrect
6514 code like:
6515
6516 int a;
6517 int foo(int i)
6518 {
6519 return *(&a+i);
6520 }
6521
6522 This code is nonsensical, but results in addressing
6523 GOT table with pic_offset_table_rtx base. We can't
6524 just refuse it easily, since it gets matched by
6525 "addsi3" pattern, that later gets split to lea in the
6526 case output register differs from input. While this
6527 can be handled by separate addsi pattern for this case
6528 that never results in lea, this seems to be easier and
6529 correct fix for crash to disable this test. */
6530 }
6531 else if (GET_CODE (disp) != LABEL_REF
6532 && GET_CODE (disp) != CONST_INT
6533 && (GET_CODE (disp) != CONST
6534 || !legitimate_constant_p (disp))
6535 && (GET_CODE (disp) != SYMBOL_REF
6536 || !legitimate_constant_p (disp)))
6537 {
6538 reason = "displacement is not constant";
6539 goto report_error;
6540 }
6541 else if (TARGET_64BIT
6542 && !x86_64_immediate_operand (disp, VOIDmode))
6543 {
6544 reason = "displacement is out of range";
6545 goto report_error;
6546 }
6547 }
6548
6549 /* Everything looks valid. */
6550 if (TARGET_DEBUG_ADDR)
6551 fprintf (stderr, "Success.\n");
6552 return TRUE;
6553
6554 report_error:
6555 if (TARGET_DEBUG_ADDR)
6556 {
6557 fprintf (stderr, "Error: %s\n", reason);
6558 debug_rtx (reason_rtx);
6559 }
6560 return FALSE;
6561 }
6562 \f
6563 /* Return a unique alias set for the GOT. */
6564
6565 static HOST_WIDE_INT
6566 ix86_GOT_alias_set (void)
6567 {
6568 static HOST_WIDE_INT set = -1;
6569 if (set == -1)
6570 set = new_alias_set ();
6571 return set;
6572 }
6573
6574 /* Return a legitimate reference for ORIG (an address) using the
6575 register REG. If REG is 0, a new pseudo is generated.
6576
6577 There are two types of references that must be handled:
6578
6579 1. Global data references must load the address from the GOT, via
6580 the PIC reg. An insn is emitted to do this load, and the reg is
6581 returned.
6582
6583 2. Static data references, constant pool addresses, and code labels
6584 compute the address as an offset from the GOT, whose base is in
6585 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6586 differentiate them from global data objects. The returned
6587 address is the PIC reg + an unspec constant.
6588
6589 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6590 reg also appears in the address. */
6591
6592 static rtx
6593 legitimize_pic_address (rtx orig, rtx reg)
6594 {
6595 rtx addr = orig;
6596 rtx new = orig;
6597 rtx base;
6598
6599 #if TARGET_MACHO
6600 if (TARGET_MACHO && !TARGET_64BIT)
6601 {
6602 if (reg == 0)
6603 reg = gen_reg_rtx (Pmode);
6604 /* Use the generic Mach-O PIC machinery. */
6605 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6606 }
6607 #endif
6608
6609 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6610 new = addr;
6611 else if (TARGET_64BIT
6612 && ix86_cmodel != CM_SMALL_PIC
6613 && local_symbolic_operand (addr, Pmode))
6614 {
6615 rtx tmpreg;
6616 /* This symbol may be referenced via a displacement from the PIC
6617 base address (@GOTOFF). */
6618
6619 if (reload_in_progress)
6620 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6621 if (GET_CODE (addr) == CONST)
6622 addr = XEXP (addr, 0);
6623 if (GET_CODE (addr) == PLUS)
6624 {
6625 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6626 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6627 }
6628 else
6629 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6630 new = gen_rtx_CONST (Pmode, new);
6631 if (!reg)
6632 tmpreg = gen_reg_rtx (Pmode);
6633 else
6634 tmpreg = reg;
6635 emit_move_insn (tmpreg, new);
6636
6637 if (reg != 0)
6638 {
6639 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6640 tmpreg, 1, OPTAB_DIRECT);
6641 new = reg;
6642 }
6643 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6644 }
6645 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6646 {
6647 /* This symbol may be referenced via a displacement from the PIC
6648 base address (@GOTOFF). */
6649
6650 if (reload_in_progress)
6651 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6652 if (GET_CODE (addr) == CONST)
6653 addr = XEXP (addr, 0);
6654 if (GET_CODE (addr) == PLUS)
6655 {
6656 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6657 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6658 }
6659 else
6660 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6661 new = gen_rtx_CONST (Pmode, new);
6662 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6663
6664 if (reg != 0)
6665 {
6666 emit_move_insn (reg, new);
6667 new = reg;
6668 }
6669 }
6670 else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
6671 {
6672 if (TARGET_64BIT)
6673 {
6674 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6675 new = gen_rtx_CONST (Pmode, new);
6676 new = gen_const_mem (Pmode, new);
6677 set_mem_alias_set (new, ix86_GOT_alias_set ());
6678
6679 if (reg == 0)
6680 reg = gen_reg_rtx (Pmode);
6681 /* Use directly gen_movsi, otherwise the address is loaded
6682 into register for CSE. We don't want to CSE this addresses,
6683 instead we CSE addresses from the GOT table, so skip this. */
6684 emit_insn (gen_movsi (reg, new));
6685 new = reg;
6686 }
6687 else
6688 {
6689 /* This symbol must be referenced via a load from the
6690 Global Offset Table (@GOT). */
6691
6692 if (reload_in_progress)
6693 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6694 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6695 new = gen_rtx_CONST (Pmode, new);
6696 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6697 new = gen_const_mem (Pmode, new);
6698 set_mem_alias_set (new, ix86_GOT_alias_set ());
6699
6700 if (reg == 0)
6701 reg = gen_reg_rtx (Pmode);
6702 emit_move_insn (reg, new);
6703 new = reg;
6704 }
6705 }
6706 else
6707 {
6708 if (GET_CODE (addr) == CONST_INT
6709 && !x86_64_immediate_operand (addr, VOIDmode))
6710 {
6711 if (reg)
6712 {
6713 emit_move_insn (reg, addr);
6714 new = reg;
6715 }
6716 else
6717 new = force_reg (Pmode, addr);
6718 }
6719 else if (GET_CODE (addr) == CONST)
6720 {
6721 addr = XEXP (addr, 0);
6722
6723 /* We must match stuff we generate before. Assume the only
6724 unspecs that can get here are ours. Not that we could do
6725 anything with them anyway.... */
6726 if (GET_CODE (addr) == UNSPEC
6727 || (GET_CODE (addr) == PLUS
6728 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6729 return orig;
6730 gcc_assert (GET_CODE (addr) == PLUS);
6731 }
6732 if (GET_CODE (addr) == PLUS)
6733 {
6734 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6735
6736 /* Check first to see if this is a constant offset from a @GOTOFF
6737 symbol reference. */
6738 if (local_symbolic_operand (op0, Pmode)
6739 && GET_CODE (op1) == CONST_INT)
6740 {
6741 if (!TARGET_64BIT)
6742 {
6743 if (reload_in_progress)
6744 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6745 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6746 UNSPEC_GOTOFF);
6747 new = gen_rtx_PLUS (Pmode, new, op1);
6748 new = gen_rtx_CONST (Pmode, new);
6749 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6750
6751 if (reg != 0)
6752 {
6753 emit_move_insn (reg, new);
6754 new = reg;
6755 }
6756 }
6757 else
6758 {
6759 if (INTVAL (op1) < -16*1024*1024
6760 || INTVAL (op1) >= 16*1024*1024)
6761 {
6762 if (!x86_64_immediate_operand (op1, Pmode))
6763 op1 = force_reg (Pmode, op1);
6764 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
6765 }
6766 }
6767 }
6768 else
6769 {
6770 base = legitimize_pic_address (XEXP (addr, 0), reg);
6771 new = legitimize_pic_address (XEXP (addr, 1),
6772 base == reg ? NULL_RTX : reg);
6773
6774 if (GET_CODE (new) == CONST_INT)
6775 new = plus_constant (base, INTVAL (new));
6776 else
6777 {
6778 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6779 {
6780 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6781 new = XEXP (new, 1);
6782 }
6783 new = gen_rtx_PLUS (Pmode, base, new);
6784 }
6785 }
6786 }
6787 }
6788 return new;
6789 }
6790 \f
6791 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6792
6793 static rtx
6794 get_thread_pointer (int to_reg)
6795 {
6796 rtx tp, reg, insn;
6797
6798 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6799 if (!to_reg)
6800 return tp;
6801
6802 reg = gen_reg_rtx (Pmode);
6803 insn = gen_rtx_SET (VOIDmode, reg, tp);
6804 insn = emit_insn (insn);
6805
6806 return reg;
6807 }
6808
6809 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6810 false if we expect this to be used for a memory address and true if
6811 we expect to load the address into a register. */
6812
6813 static rtx
6814 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6815 {
6816 rtx dest, base, off, pic, tp;
6817 int type;
6818
6819 switch (model)
6820 {
6821 case TLS_MODEL_GLOBAL_DYNAMIC:
6822 dest = gen_reg_rtx (Pmode);
6823 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6824
6825 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6826 {
6827 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6828
6829 start_sequence ();
6830 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6831 insns = get_insns ();
6832 end_sequence ();
6833
6834 emit_libcall_block (insns, dest, rax, x);
6835 }
6836 else if (TARGET_64BIT && TARGET_GNU2_TLS)
6837 emit_insn (gen_tls_global_dynamic_64 (dest, x));
6838 else
6839 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6840
6841 if (TARGET_GNU2_TLS)
6842 {
6843 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
6844
6845 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6846 }
6847 break;
6848
6849 case TLS_MODEL_LOCAL_DYNAMIC:
6850 base = gen_reg_rtx (Pmode);
6851 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6852
6853 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6854 {
6855 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6856
6857 start_sequence ();
6858 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6859 insns = get_insns ();
6860 end_sequence ();
6861
6862 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6863 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6864 emit_libcall_block (insns, base, rax, note);
6865 }
6866 else if (TARGET_64BIT && TARGET_GNU2_TLS)
6867 emit_insn (gen_tls_local_dynamic_base_64 (base));
6868 else
6869 emit_insn (gen_tls_local_dynamic_base_32 (base));
6870
6871 if (TARGET_GNU2_TLS)
6872 {
6873 rtx x = ix86_tls_module_base ();
6874
6875 set_unique_reg_note (get_last_insn (), REG_EQUIV,
6876 gen_rtx_MINUS (Pmode, x, tp));
6877 }
6878
6879 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6880 off = gen_rtx_CONST (Pmode, off);
6881
6882 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
6883
6884 if (TARGET_GNU2_TLS)
6885 {
6886 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
6887
6888 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6889 }
6890
6891 break;
6892
6893 case TLS_MODEL_INITIAL_EXEC:
6894 if (TARGET_64BIT)
6895 {
6896 pic = NULL;
6897 type = UNSPEC_GOTNTPOFF;
6898 }
6899 else if (flag_pic)
6900 {
6901 if (reload_in_progress)
6902 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6903 pic = pic_offset_table_rtx;
6904 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6905 }
6906 else if (!TARGET_ANY_GNU_TLS)
6907 {
6908 pic = gen_reg_rtx (Pmode);
6909 emit_insn (gen_set_got (pic));
6910 type = UNSPEC_GOTTPOFF;
6911 }
6912 else
6913 {
6914 pic = NULL;
6915 type = UNSPEC_INDNTPOFF;
6916 }
6917
6918 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6919 off = gen_rtx_CONST (Pmode, off);
6920 if (pic)
6921 off = gen_rtx_PLUS (Pmode, pic, off);
6922 off = gen_const_mem (Pmode, off);
6923 set_mem_alias_set (off, ix86_GOT_alias_set ());
6924
6925 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6926 {
6927 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6928 off = force_reg (Pmode, off);
6929 return gen_rtx_PLUS (Pmode, base, off);
6930 }
6931 else
6932 {
6933 base = get_thread_pointer (true);
6934 dest = gen_reg_rtx (Pmode);
6935 emit_insn (gen_subsi3 (dest, base, off));
6936 }
6937 break;
6938
6939 case TLS_MODEL_LOCAL_EXEC:
6940 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6941 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6942 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6943 off = gen_rtx_CONST (Pmode, off);
6944
6945 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6946 {
6947 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6948 return gen_rtx_PLUS (Pmode, base, off);
6949 }
6950 else
6951 {
6952 base = get_thread_pointer (true);
6953 dest = gen_reg_rtx (Pmode);
6954 emit_insn (gen_subsi3 (dest, base, off));
6955 }
6956 break;
6957
6958 default:
6959 gcc_unreachable ();
6960 }
6961
6962 return dest;
6963 }
6964
6965 /* Try machine-dependent ways of modifying an illegitimate address
6966 to be legitimate. If we find one, return the new, valid address.
6967 This macro is used in only one place: `memory_address' in explow.c.
6968
6969 OLDX is the address as it was before break_out_memory_refs was called.
6970 In some cases it is useful to look at this to decide what needs to be done.
6971
6972 MODE and WIN are passed so that this macro can use
6973 GO_IF_LEGITIMATE_ADDRESS.
6974
6975 It is always safe for this macro to do nothing. It exists to recognize
6976 opportunities to optimize the output.
6977
6978 For the 80386, we handle X+REG by loading X into a register R and
6979 using R+REG. R will go in a general reg and indexing will be used.
6980 However, if REG is a broken-out memory address or multiplication,
6981 nothing needs to be done because REG can certainly go in a general reg.
6982
6983 When -fpic is used, special handling is needed for symbolic references.
6984 See comments by legitimize_pic_address in i386.c for details. */
6985
6986 rtx
6987 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6988 {
6989 int changed = 0;
6990 unsigned log;
6991
6992 if (TARGET_DEBUG_ADDR)
6993 {
6994 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6995 GET_MODE_NAME (mode));
6996 debug_rtx (x);
6997 }
6998
6999 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7000 if (log)
7001 return legitimize_tls_address (x, log, false);
7002 if (GET_CODE (x) == CONST
7003 && GET_CODE (XEXP (x, 0)) == PLUS
7004 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7005 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7006 {
7007 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7008 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7009 }
7010
7011 if (flag_pic && SYMBOLIC_CONST (x))
7012 return legitimize_pic_address (x, 0);
7013
7014 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7015 if (GET_CODE (x) == ASHIFT
7016 && GET_CODE (XEXP (x, 1)) == CONST_INT
7017 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7018 {
7019 changed = 1;
7020 log = INTVAL (XEXP (x, 1));
7021 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7022 GEN_INT (1 << log));
7023 }
7024
7025 if (GET_CODE (x) == PLUS)
7026 {
7027 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7028
7029 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7030 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7031 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7032 {
7033 changed = 1;
7034 log = INTVAL (XEXP (XEXP (x, 0), 1));
7035 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7036 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7037 GEN_INT (1 << log));
7038 }
7039
7040 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7041 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
7042 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7043 {
7044 changed = 1;
7045 log = INTVAL (XEXP (XEXP (x, 1), 1));
7046 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7047 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7048 GEN_INT (1 << log));
7049 }
7050
7051 /* Put multiply first if it isn't already. */
7052 if (GET_CODE (XEXP (x, 1)) == MULT)
7053 {
7054 rtx tmp = XEXP (x, 0);
7055 XEXP (x, 0) = XEXP (x, 1);
7056 XEXP (x, 1) = tmp;
7057 changed = 1;
7058 }
7059
7060 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7061 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7062 created by virtual register instantiation, register elimination, and
7063 similar optimizations. */
7064 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7065 {
7066 changed = 1;
7067 x = gen_rtx_PLUS (Pmode,
7068 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7069 XEXP (XEXP (x, 1), 0)),
7070 XEXP (XEXP (x, 1), 1));
7071 }
7072
7073 /* Canonicalize
7074 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7075 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7076 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7077 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7078 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7079 && CONSTANT_P (XEXP (x, 1)))
7080 {
7081 rtx constant;
7082 rtx other = NULL_RTX;
7083
7084 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7085 {
7086 constant = XEXP (x, 1);
7087 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7088 }
7089 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
7090 {
7091 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7092 other = XEXP (x, 1);
7093 }
7094 else
7095 constant = 0;
7096
7097 if (constant)
7098 {
7099 changed = 1;
7100 x = gen_rtx_PLUS (Pmode,
7101 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7102 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7103 plus_constant (other, INTVAL (constant)));
7104 }
7105 }
7106
7107 if (changed && legitimate_address_p (mode, x, FALSE))
7108 return x;
7109
7110 if (GET_CODE (XEXP (x, 0)) == MULT)
7111 {
7112 changed = 1;
7113 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7114 }
7115
7116 if (GET_CODE (XEXP (x, 1)) == MULT)
7117 {
7118 changed = 1;
7119 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7120 }
7121
7122 if (changed
7123 && GET_CODE (XEXP (x, 1)) == REG
7124 && GET_CODE (XEXP (x, 0)) == REG)
7125 return x;
7126
7127 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7128 {
7129 changed = 1;
7130 x = legitimize_pic_address (x, 0);
7131 }
7132
7133 if (changed && legitimate_address_p (mode, x, FALSE))
7134 return x;
7135
7136 if (GET_CODE (XEXP (x, 0)) == REG)
7137 {
7138 rtx temp = gen_reg_rtx (Pmode);
7139 rtx val = force_operand (XEXP (x, 1), temp);
7140 if (val != temp)
7141 emit_move_insn (temp, val);
7142
7143 XEXP (x, 1) = temp;
7144 return x;
7145 }
7146
7147 else if (GET_CODE (XEXP (x, 1)) == REG)
7148 {
7149 rtx temp = gen_reg_rtx (Pmode);
7150 rtx val = force_operand (XEXP (x, 0), temp);
7151 if (val != temp)
7152 emit_move_insn (temp, val);
7153
7154 XEXP (x, 0) = temp;
7155 return x;
7156 }
7157 }
7158
7159 return x;
7160 }
7161 \f
7162 /* Print an integer constant expression in assembler syntax. Addition
7163 and subtraction are the only arithmetic that may appear in these
7164 expressions. FILE is the stdio stream to write to, X is the rtx, and
7165 CODE is the operand print code from the output string. */
7166
7167 static void
7168 output_pic_addr_const (FILE *file, rtx x, int code)
7169 {
7170 char buf[256];
7171
7172 switch (GET_CODE (x))
7173 {
7174 case PC:
7175 gcc_assert (flag_pic);
7176 putc ('.', file);
7177 break;
7178
7179 case SYMBOL_REF:
7180 output_addr_const (file, x);
7181 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7182 fputs ("@PLT", file);
7183 break;
7184
7185 case LABEL_REF:
7186 x = XEXP (x, 0);
7187 /* FALLTHRU */
7188 case CODE_LABEL:
7189 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7190 assemble_name (asm_out_file, buf);
7191 break;
7192
7193 case CONST_INT:
7194 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7195 break;
7196
7197 case CONST:
7198 /* This used to output parentheses around the expression,
7199 but that does not work on the 386 (either ATT or BSD assembler). */
7200 output_pic_addr_const (file, XEXP (x, 0), code);
7201 break;
7202
7203 case CONST_DOUBLE:
7204 if (GET_MODE (x) == VOIDmode)
7205 {
7206 /* We can use %d if the number is <32 bits and positive. */
7207 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7208 fprintf (file, "0x%lx%08lx",
7209 (unsigned long) CONST_DOUBLE_HIGH (x),
7210 (unsigned long) CONST_DOUBLE_LOW (x));
7211 else
7212 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7213 }
7214 else
7215 /* We can't handle floating point constants;
7216 PRINT_OPERAND must handle them. */
7217 output_operand_lossage ("floating constant misused");
7218 break;
7219
7220 case PLUS:
7221 /* Some assemblers need integer constants to appear first. */
7222 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
7223 {
7224 output_pic_addr_const (file, XEXP (x, 0), code);
7225 putc ('+', file);
7226 output_pic_addr_const (file, XEXP (x, 1), code);
7227 }
7228 else
7229 {
7230 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
7231 output_pic_addr_const (file, XEXP (x, 1), code);
7232 putc ('+', file);
7233 output_pic_addr_const (file, XEXP (x, 0), code);
7234 }
7235 break;
7236
7237 case MINUS:
7238 if (!TARGET_MACHO)
7239 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7240 output_pic_addr_const (file, XEXP (x, 0), code);
7241 putc ('-', file);
7242 output_pic_addr_const (file, XEXP (x, 1), code);
7243 if (!TARGET_MACHO)
7244 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7245 break;
7246
7247 case UNSPEC:
7248 gcc_assert (XVECLEN (x, 0) == 1);
7249 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7250 switch (XINT (x, 1))
7251 {
7252 case UNSPEC_GOT:
7253 fputs ("@GOT", file);
7254 break;
7255 case UNSPEC_GOTOFF:
7256 fputs ("@GOTOFF", file);
7257 break;
7258 case UNSPEC_GOTPCREL:
7259 fputs ("@GOTPCREL(%rip)", file);
7260 break;
7261 case UNSPEC_GOTTPOFF:
7262 /* FIXME: This might be @TPOFF in Sun ld too. */
7263 fputs ("@GOTTPOFF", file);
7264 break;
7265 case UNSPEC_TPOFF:
7266 fputs ("@TPOFF", file);
7267 break;
7268 case UNSPEC_NTPOFF:
7269 if (TARGET_64BIT)
7270 fputs ("@TPOFF", file);
7271 else
7272 fputs ("@NTPOFF", file);
7273 break;
7274 case UNSPEC_DTPOFF:
7275 fputs ("@DTPOFF", file);
7276 break;
7277 case UNSPEC_GOTNTPOFF:
7278 if (TARGET_64BIT)
7279 fputs ("@GOTTPOFF(%rip)", file);
7280 else
7281 fputs ("@GOTNTPOFF", file);
7282 break;
7283 case UNSPEC_INDNTPOFF:
7284 fputs ("@INDNTPOFF", file);
7285 break;
7286 default:
7287 output_operand_lossage ("invalid UNSPEC as operand");
7288 break;
7289 }
7290 break;
7291
7292 default:
7293 output_operand_lossage ("invalid expression as operand");
7294 }
7295 }
7296
7297 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7298 We need to emit DTP-relative relocations. */
7299
7300 static void
7301 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7302 {
7303 fputs (ASM_LONG, file);
7304 output_addr_const (file, x);
7305 fputs ("@DTPOFF", file);
7306 switch (size)
7307 {
7308 case 4:
7309 break;
7310 case 8:
7311 fputs (", 0", file);
7312 break;
7313 default:
7314 gcc_unreachable ();
7315 }
7316 }
7317
7318 /* In the name of slightly smaller debug output, and to cater to
7319 general assembler lossage, recognize PIC+GOTOFF and turn it back
7320 into a direct symbol reference.
7321
7322 On Darwin, this is necessary to avoid a crash, because Darwin
7323 has a different PIC label for each routine but the DWARF debugging
7324 information is not associated with any particular routine, so it's
7325 necessary to remove references to the PIC label from RTL stored by
7326 the DWARF output code. */
7327
7328 static rtx
7329 ix86_delegitimize_address (rtx orig_x)
7330 {
7331 rtx x = orig_x;
7332 /* reg_addend is NULL or a multiple of some register. */
7333 rtx reg_addend = NULL_RTX;
7334 /* const_addend is NULL or a const_int. */
7335 rtx const_addend = NULL_RTX;
7336 /* This is the result, or NULL. */
7337 rtx result = NULL_RTX;
7338
7339 if (GET_CODE (x) == MEM)
7340 x = XEXP (x, 0);
7341
7342 if (TARGET_64BIT)
7343 {
7344 if (GET_CODE (x) != CONST
7345 || GET_CODE (XEXP (x, 0)) != UNSPEC
7346 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7347 || GET_CODE (orig_x) != MEM)
7348 return orig_x;
7349 return XVECEXP (XEXP (x, 0), 0, 0);
7350 }
7351
7352 if (GET_CODE (x) != PLUS
7353 || GET_CODE (XEXP (x, 1)) != CONST)
7354 return orig_x;
7355
7356 if (GET_CODE (XEXP (x, 0)) == REG
7357 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7358 /* %ebx + GOT/GOTOFF */
7359 ;
7360 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7361 {
7362 /* %ebx + %reg * scale + GOT/GOTOFF */
7363 reg_addend = XEXP (x, 0);
7364 if (GET_CODE (XEXP (reg_addend, 0)) == REG
7365 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7366 reg_addend = XEXP (reg_addend, 1);
7367 else if (GET_CODE (XEXP (reg_addend, 1)) == REG
7368 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7369 reg_addend = XEXP (reg_addend, 0);
7370 else
7371 return orig_x;
7372 if (GET_CODE (reg_addend) != REG
7373 && GET_CODE (reg_addend) != MULT
7374 && GET_CODE (reg_addend) != ASHIFT)
7375 return orig_x;
7376 }
7377 else
7378 return orig_x;
7379
7380 x = XEXP (XEXP (x, 1), 0);
7381 if (GET_CODE (x) == PLUS
7382 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7383 {
7384 const_addend = XEXP (x, 1);
7385 x = XEXP (x, 0);
7386 }
7387
7388 if (GET_CODE (x) == UNSPEC
7389 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7390 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7391 result = XVECEXP (x, 0, 0);
7392
7393 if (TARGET_MACHO && darwin_local_data_pic (x)
7394 && GET_CODE (orig_x) != MEM)
7395 result = XEXP (x, 0);
7396
7397 if (! result)
7398 return orig_x;
7399
7400 if (const_addend)
7401 result = gen_rtx_PLUS (Pmode, result, const_addend);
7402 if (reg_addend)
7403 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7404 return result;
7405 }
7406 \f
7407 static void
7408 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7409 int fp, FILE *file)
7410 {
7411 const char *suffix;
7412
7413 if (mode == CCFPmode || mode == CCFPUmode)
7414 {
7415 enum rtx_code second_code, bypass_code;
7416 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7417 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7418 code = ix86_fp_compare_code_to_integer (code);
7419 mode = CCmode;
7420 }
7421 if (reverse)
7422 code = reverse_condition (code);
7423
7424 switch (code)
7425 {
7426 case EQ:
7427 suffix = "e";
7428 break;
7429 case NE:
7430 suffix = "ne";
7431 break;
7432 case GT:
7433 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7434 suffix = "g";
7435 break;
7436 case GTU:
7437 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7438 Those same assemblers have the same but opposite lossage on cmov. */
7439 gcc_assert (mode == CCmode);
7440 suffix = fp ? "nbe" : "a";
7441 break;
7442 case LT:
7443 switch (mode)
7444 {
7445 case CCNOmode:
7446 case CCGOCmode:
7447 suffix = "s";
7448 break;
7449
7450 case CCmode:
7451 case CCGCmode:
7452 suffix = "l";
7453 break;
7454
7455 default:
7456 gcc_unreachable ();
7457 }
7458 break;
7459 case LTU:
7460 gcc_assert (mode == CCmode);
7461 suffix = "b";
7462 break;
7463 case GE:
7464 switch (mode)
7465 {
7466 case CCNOmode:
7467 case CCGOCmode:
7468 suffix = "ns";
7469 break;
7470
7471 case CCmode:
7472 case CCGCmode:
7473 suffix = "ge";
7474 break;
7475
7476 default:
7477 gcc_unreachable ();
7478 }
7479 break;
7480 case GEU:
7481 /* ??? As above. */
7482 gcc_assert (mode == CCmode);
7483 suffix = fp ? "nb" : "ae";
7484 break;
7485 case LE:
7486 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7487 suffix = "le";
7488 break;
7489 case LEU:
7490 gcc_assert (mode == CCmode);
7491 suffix = "be";
7492 break;
7493 case UNORDERED:
7494 suffix = fp ? "u" : "p";
7495 break;
7496 case ORDERED:
7497 suffix = fp ? "nu" : "np";
7498 break;
7499 default:
7500 gcc_unreachable ();
7501 }
7502 fputs (suffix, file);
7503 }
7504
7505 /* Print the name of register X to FILE based on its machine mode and number.
7506 If CODE is 'w', pretend the mode is HImode.
7507 If CODE is 'b', pretend the mode is QImode.
7508 If CODE is 'k', pretend the mode is SImode.
7509 If CODE is 'q', pretend the mode is DImode.
7510 If CODE is 'h', pretend the reg is the 'high' byte register.
7511 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7512
7513 void
7514 print_reg (rtx x, int code, FILE *file)
7515 {
7516 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7517 && REGNO (x) != FRAME_POINTER_REGNUM
7518 && REGNO (x) != FLAGS_REG
7519 && REGNO (x) != FPSR_REG
7520 && REGNO (x) != FPCR_REG);
7521
7522 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7523 putc ('%', file);
7524
7525 if (code == 'w' || MMX_REG_P (x))
7526 code = 2;
7527 else if (code == 'b')
7528 code = 1;
7529 else if (code == 'k')
7530 code = 4;
7531 else if (code == 'q')
7532 code = 8;
7533 else if (code == 'y')
7534 code = 3;
7535 else if (code == 'h')
7536 code = 0;
7537 else
7538 code = GET_MODE_SIZE (GET_MODE (x));
7539
7540 /* Irritatingly, AMD extended registers use different naming convention
7541 from the normal registers. */
7542 if (REX_INT_REG_P (x))
7543 {
7544 gcc_assert (TARGET_64BIT);
7545 switch (code)
7546 {
7547 case 0:
7548 error ("extended registers have no high halves");
7549 break;
7550 case 1:
7551 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7552 break;
7553 case 2:
7554 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7555 break;
7556 case 4:
7557 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7558 break;
7559 case 8:
7560 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7561 break;
7562 default:
7563 error ("unsupported operand size for extended register");
7564 break;
7565 }
7566 return;
7567 }
7568 switch (code)
7569 {
7570 case 3:
7571 if (STACK_TOP_P (x))
7572 {
7573 fputs ("st(0)", file);
7574 break;
7575 }
7576 /* FALLTHRU */
7577 case 8:
7578 case 4:
7579 case 12:
7580 if (! ANY_FP_REG_P (x))
7581 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7582 /* FALLTHRU */
7583 case 16:
7584 case 2:
7585 normal:
7586 fputs (hi_reg_name[REGNO (x)], file);
7587 break;
7588 case 1:
7589 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7590 goto normal;
7591 fputs (qi_reg_name[REGNO (x)], file);
7592 break;
7593 case 0:
7594 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7595 goto normal;
7596 fputs (qi_high_reg_name[REGNO (x)], file);
7597 break;
7598 default:
7599 gcc_unreachable ();
7600 }
7601 }
7602
7603 /* Locate some local-dynamic symbol still in use by this function
7604 so that we can print its name in some tls_local_dynamic_base
7605 pattern. */
7606
7607 static const char *
7608 get_some_local_dynamic_name (void)
7609 {
7610 rtx insn;
7611
7612 if (cfun->machine->some_ld_name)
7613 return cfun->machine->some_ld_name;
7614
7615 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7616 if (INSN_P (insn)
7617 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7618 return cfun->machine->some_ld_name;
7619
7620 gcc_unreachable ();
7621 }
7622
7623 static int
7624 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7625 {
7626 rtx x = *px;
7627
7628 if (GET_CODE (x) == SYMBOL_REF
7629 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
7630 {
7631 cfun->machine->some_ld_name = XSTR (x, 0);
7632 return 1;
7633 }
7634
7635 return 0;
7636 }
7637
7638 /* Meaning of CODE:
7639 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7640 C -- print opcode suffix for set/cmov insn.
7641 c -- like C, but print reversed condition
7642 F,f -- likewise, but for floating-point.
7643 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7644 otherwise nothing
7645 R -- print the prefix for register names.
7646 z -- print the opcode suffix for the size of the current operand.
7647 * -- print a star (in certain assembler syntax)
7648 A -- print an absolute memory reference.
7649 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7650 s -- print a shift double count, followed by the assemblers argument
7651 delimiter.
7652 b -- print the QImode name of the register for the indicated operand.
7653 %b0 would print %al if operands[0] is reg 0.
7654 w -- likewise, print the HImode name of the register.
7655 k -- likewise, print the SImode name of the register.
7656 q -- likewise, print the DImode name of the register.
7657 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7658 y -- print "st(0)" instead of "st" as a register.
7659 D -- print condition for SSE cmp instruction.
7660 P -- if PIC, print an @PLT suffix.
7661 X -- don't print any sort of PIC '@' suffix for a symbol.
7662 & -- print some in-use local-dynamic symbol name.
7663 H -- print a memory address offset by 8; used for sse high-parts
7664 */
7665
7666 void
7667 print_operand (FILE *file, rtx x, int code)
7668 {
7669 if (code)
7670 {
7671 switch (code)
7672 {
7673 case '*':
7674 if (ASSEMBLER_DIALECT == ASM_ATT)
7675 putc ('*', file);
7676 return;
7677
7678 case '&':
7679 assemble_name (file, get_some_local_dynamic_name ());
7680 return;
7681
7682 case 'A':
7683 switch (ASSEMBLER_DIALECT)
7684 {
7685 case ASM_ATT:
7686 putc ('*', file);
7687 break;
7688
7689 case ASM_INTEL:
7690 /* Intel syntax. For absolute addresses, registers should not
7691 be surrounded by braces. */
7692 if (GET_CODE (x) != REG)
7693 {
7694 putc ('[', file);
7695 PRINT_OPERAND (file, x, 0);
7696 putc (']', file);
7697 return;
7698 }
7699 break;
7700
7701 default:
7702 gcc_unreachable ();
7703 }
7704
7705 PRINT_OPERAND (file, x, 0);
7706 return;
7707
7708
7709 case 'L':
7710 if (ASSEMBLER_DIALECT == ASM_ATT)
7711 putc ('l', file);
7712 return;
7713
7714 case 'W':
7715 if (ASSEMBLER_DIALECT == ASM_ATT)
7716 putc ('w', file);
7717 return;
7718
7719 case 'B':
7720 if (ASSEMBLER_DIALECT == ASM_ATT)
7721 putc ('b', file);
7722 return;
7723
7724 case 'Q':
7725 if (ASSEMBLER_DIALECT == ASM_ATT)
7726 putc ('l', file);
7727 return;
7728
7729 case 'S':
7730 if (ASSEMBLER_DIALECT == ASM_ATT)
7731 putc ('s', file);
7732 return;
7733
7734 case 'T':
7735 if (ASSEMBLER_DIALECT == ASM_ATT)
7736 putc ('t', file);
7737 return;
7738
7739 case 'z':
7740 /* 387 opcodes don't get size suffixes if the operands are
7741 registers. */
7742 if (STACK_REG_P (x))
7743 return;
7744
7745 /* Likewise if using Intel opcodes. */
7746 if (ASSEMBLER_DIALECT == ASM_INTEL)
7747 return;
7748
7749 /* This is the size of op from size of operand. */
7750 switch (GET_MODE_SIZE (GET_MODE (x)))
7751 {
7752 case 2:
7753 #ifdef HAVE_GAS_FILDS_FISTS
7754 putc ('s', file);
7755 #endif
7756 return;
7757
7758 case 4:
7759 if (GET_MODE (x) == SFmode)
7760 {
7761 putc ('s', file);
7762 return;
7763 }
7764 else
7765 putc ('l', file);
7766 return;
7767
7768 case 12:
7769 case 16:
7770 putc ('t', file);
7771 return;
7772
7773 case 8:
7774 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7775 {
7776 #ifdef GAS_MNEMONICS
7777 putc ('q', file);
7778 #else
7779 putc ('l', file);
7780 putc ('l', file);
7781 #endif
7782 }
7783 else
7784 putc ('l', file);
7785 return;
7786
7787 default:
7788 gcc_unreachable ();
7789 }
7790
7791 case 'b':
7792 case 'w':
7793 case 'k':
7794 case 'q':
7795 case 'h':
7796 case 'y':
7797 case 'X':
7798 case 'P':
7799 break;
7800
7801 case 's':
7802 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7803 {
7804 PRINT_OPERAND (file, x, 0);
7805 putc (',', file);
7806 }
7807 return;
7808
7809 case 'D':
7810 /* Little bit of braindamage here. The SSE compare instructions
7811 does use completely different names for the comparisons that the
7812 fp conditional moves. */
7813 switch (GET_CODE (x))
7814 {
7815 case EQ:
7816 case UNEQ:
7817 fputs ("eq", file);
7818 break;
7819 case LT:
7820 case UNLT:
7821 fputs ("lt", file);
7822 break;
7823 case LE:
7824 case UNLE:
7825 fputs ("le", file);
7826 break;
7827 case UNORDERED:
7828 fputs ("unord", file);
7829 break;
7830 case NE:
7831 case LTGT:
7832 fputs ("neq", file);
7833 break;
7834 case UNGE:
7835 case GE:
7836 fputs ("nlt", file);
7837 break;
7838 case UNGT:
7839 case GT:
7840 fputs ("nle", file);
7841 break;
7842 case ORDERED:
7843 fputs ("ord", file);
7844 break;
7845 default:
7846 gcc_unreachable ();
7847 }
7848 return;
7849 case 'O':
7850 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7851 if (ASSEMBLER_DIALECT == ASM_ATT)
7852 {
7853 switch (GET_MODE (x))
7854 {
7855 case HImode: putc ('w', file); break;
7856 case SImode:
7857 case SFmode: putc ('l', file); break;
7858 case DImode:
7859 case DFmode: putc ('q', file); break;
7860 default: gcc_unreachable ();
7861 }
7862 putc ('.', file);
7863 }
7864 #endif
7865 return;
7866 case 'C':
7867 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7868 return;
7869 case 'F':
7870 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7871 if (ASSEMBLER_DIALECT == ASM_ATT)
7872 putc ('.', file);
7873 #endif
7874 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7875 return;
7876
7877 /* Like above, but reverse condition */
7878 case 'c':
7879 /* Check to see if argument to %c is really a constant
7880 and not a condition code which needs to be reversed. */
7881 if (!COMPARISON_P (x))
7882 {
7883 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7884 return;
7885 }
7886 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7887 return;
7888 case 'f':
7889 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7890 if (ASSEMBLER_DIALECT == ASM_ATT)
7891 putc ('.', file);
7892 #endif
7893 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7894 return;
7895
7896 case 'H':
7897 /* It doesn't actually matter what mode we use here, as we're
7898 only going to use this for printing. */
7899 x = adjust_address_nv (x, DImode, 8);
7900 break;
7901
7902 case '+':
7903 {
7904 rtx x;
7905
7906 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7907 return;
7908
7909 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7910 if (x)
7911 {
7912 int pred_val = INTVAL (XEXP (x, 0));
7913
7914 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7915 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7916 {
7917 int taken = pred_val > REG_BR_PROB_BASE / 2;
7918 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7919
7920 /* Emit hints only in the case default branch prediction
7921 heuristics would fail. */
7922 if (taken != cputaken)
7923 {
7924 /* We use 3e (DS) prefix for taken branches and
7925 2e (CS) prefix for not taken branches. */
7926 if (taken)
7927 fputs ("ds ; ", file);
7928 else
7929 fputs ("cs ; ", file);
7930 }
7931 }
7932 }
7933 return;
7934 }
7935 default:
7936 output_operand_lossage ("invalid operand code '%c'", code);
7937 }
7938 }
7939
7940 if (GET_CODE (x) == REG)
7941 print_reg (x, code, file);
7942
7943 else if (GET_CODE (x) == MEM)
7944 {
7945 /* No `byte ptr' prefix for call instructions. */
7946 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7947 {
7948 const char * size;
7949 switch (GET_MODE_SIZE (GET_MODE (x)))
7950 {
7951 case 1: size = "BYTE"; break;
7952 case 2: size = "WORD"; break;
7953 case 4: size = "DWORD"; break;
7954 case 8: size = "QWORD"; break;
7955 case 12: size = "XWORD"; break;
7956 case 16: size = "XMMWORD"; break;
7957 default:
7958 gcc_unreachable ();
7959 }
7960
7961 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7962 if (code == 'b')
7963 size = "BYTE";
7964 else if (code == 'w')
7965 size = "WORD";
7966 else if (code == 'k')
7967 size = "DWORD";
7968
7969 fputs (size, file);
7970 fputs (" PTR ", file);
7971 }
7972
7973 x = XEXP (x, 0);
7974 /* Avoid (%rip) for call operands. */
7975 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7976 && GET_CODE (x) != CONST_INT)
7977 output_addr_const (file, x);
7978 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7979 output_operand_lossage ("invalid constraints for operand");
7980 else
7981 output_address (x);
7982 }
7983
7984 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7985 {
7986 REAL_VALUE_TYPE r;
7987 long l;
7988
7989 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7990 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7991
7992 if (ASSEMBLER_DIALECT == ASM_ATT)
7993 putc ('$', file);
7994 fprintf (file, "0x%08lx", l);
7995 }
7996
7997 /* These float cases don't actually occur as immediate operands. */
7998 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7999 {
8000 char dstr[30];
8001
8002 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8003 fprintf (file, "%s", dstr);
8004 }
8005
8006 else if (GET_CODE (x) == CONST_DOUBLE
8007 && GET_MODE (x) == XFmode)
8008 {
8009 char dstr[30];
8010
8011 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8012 fprintf (file, "%s", dstr);
8013 }
8014
8015 else
8016 {
8017 /* We have patterns that allow zero sets of memory, for instance.
8018 In 64-bit mode, we should probably support all 8-byte vectors,
8019 since we can in fact encode that into an immediate. */
8020 if (GET_CODE (x) == CONST_VECTOR)
8021 {
8022 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8023 x = const0_rtx;
8024 }
8025
8026 if (code != 'P')
8027 {
8028 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8029 {
8030 if (ASSEMBLER_DIALECT == ASM_ATT)
8031 putc ('$', file);
8032 }
8033 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8034 || GET_CODE (x) == LABEL_REF)
8035 {
8036 if (ASSEMBLER_DIALECT == ASM_ATT)
8037 putc ('$', file);
8038 else
8039 fputs ("OFFSET FLAT:", file);
8040 }
8041 }
8042 if (GET_CODE (x) == CONST_INT)
8043 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8044 else if (flag_pic)
8045 output_pic_addr_const (file, x, code);
8046 else
8047 output_addr_const (file, x);
8048 }
8049 }
8050 \f
8051 /* Print a memory operand whose address is ADDR. */
8052
8053 void
8054 print_operand_address (FILE *file, rtx addr)
8055 {
8056 struct ix86_address parts;
8057 rtx base, index, disp;
8058 int scale;
8059 int ok = ix86_decompose_address (addr, &parts);
8060
8061 gcc_assert (ok);
8062
8063 base = parts.base;
8064 index = parts.index;
8065 disp = parts.disp;
8066 scale = parts.scale;
8067
8068 switch (parts.seg)
8069 {
8070 case SEG_DEFAULT:
8071 break;
8072 case SEG_FS:
8073 case SEG_GS:
8074 if (USER_LABEL_PREFIX[0] == 0)
8075 putc ('%', file);
8076 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8077 break;
8078 default:
8079 gcc_unreachable ();
8080 }
8081
8082 if (!base && !index)
8083 {
8084 /* Displacement only requires special attention. */
8085
8086 if (GET_CODE (disp) == CONST_INT)
8087 {
8088 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8089 {
8090 if (USER_LABEL_PREFIX[0] == 0)
8091 putc ('%', file);
8092 fputs ("ds:", file);
8093 }
8094 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8095 }
8096 else if (flag_pic)
8097 output_pic_addr_const (file, disp, 0);
8098 else
8099 output_addr_const (file, disp);
8100
8101 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8102 if (TARGET_64BIT)
8103 {
8104 if (GET_CODE (disp) == CONST
8105 && GET_CODE (XEXP (disp, 0)) == PLUS
8106 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8107 disp = XEXP (XEXP (disp, 0), 0);
8108 if (GET_CODE (disp) == LABEL_REF
8109 || (GET_CODE (disp) == SYMBOL_REF
8110 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8111 fputs ("(%rip)", file);
8112 }
8113 }
8114 else
8115 {
8116 if (ASSEMBLER_DIALECT == ASM_ATT)
8117 {
8118 if (disp)
8119 {
8120 if (flag_pic)
8121 output_pic_addr_const (file, disp, 0);
8122 else if (GET_CODE (disp) == LABEL_REF)
8123 output_asm_label (disp);
8124 else
8125 output_addr_const (file, disp);
8126 }
8127
8128 putc ('(', file);
8129 if (base)
8130 print_reg (base, 0, file);
8131 if (index)
8132 {
8133 putc (',', file);
8134 print_reg (index, 0, file);
8135 if (scale != 1)
8136 fprintf (file, ",%d", scale);
8137 }
8138 putc (')', file);
8139 }
8140 else
8141 {
8142 rtx offset = NULL_RTX;
8143
8144 if (disp)
8145 {
8146 /* Pull out the offset of a symbol; print any symbol itself. */
8147 if (GET_CODE (disp) == CONST
8148 && GET_CODE (XEXP (disp, 0)) == PLUS
8149 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8150 {
8151 offset = XEXP (XEXP (disp, 0), 1);
8152 disp = gen_rtx_CONST (VOIDmode,
8153 XEXP (XEXP (disp, 0), 0));
8154 }
8155
8156 if (flag_pic)
8157 output_pic_addr_const (file, disp, 0);
8158 else if (GET_CODE (disp) == LABEL_REF)
8159 output_asm_label (disp);
8160 else if (GET_CODE (disp) == CONST_INT)
8161 offset = disp;
8162 else
8163 output_addr_const (file, disp);
8164 }
8165
8166 putc ('[', file);
8167 if (base)
8168 {
8169 print_reg (base, 0, file);
8170 if (offset)
8171 {
8172 if (INTVAL (offset) >= 0)
8173 putc ('+', file);
8174 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8175 }
8176 }
8177 else if (offset)
8178 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8179 else
8180 putc ('0', file);
8181
8182 if (index)
8183 {
8184 putc ('+', file);
8185 print_reg (index, 0, file);
8186 if (scale != 1)
8187 fprintf (file, "*%d", scale);
8188 }
8189 putc (']', file);
8190 }
8191 }
8192 }
8193
8194 bool
8195 output_addr_const_extra (FILE *file, rtx x)
8196 {
8197 rtx op;
8198
8199 if (GET_CODE (x) != UNSPEC)
8200 return false;
8201
8202 op = XVECEXP (x, 0, 0);
8203 switch (XINT (x, 1))
8204 {
8205 case UNSPEC_GOTTPOFF:
8206 output_addr_const (file, op);
8207 /* FIXME: This might be @TPOFF in Sun ld. */
8208 fputs ("@GOTTPOFF", file);
8209 break;
8210 case UNSPEC_TPOFF:
8211 output_addr_const (file, op);
8212 fputs ("@TPOFF", file);
8213 break;
8214 case UNSPEC_NTPOFF:
8215 output_addr_const (file, op);
8216 if (TARGET_64BIT)
8217 fputs ("@TPOFF", file);
8218 else
8219 fputs ("@NTPOFF", file);
8220 break;
8221 case UNSPEC_DTPOFF:
8222 output_addr_const (file, op);
8223 fputs ("@DTPOFF", file);
8224 break;
8225 case UNSPEC_GOTNTPOFF:
8226 output_addr_const (file, op);
8227 if (TARGET_64BIT)
8228 fputs ("@GOTTPOFF(%rip)", file);
8229 else
8230 fputs ("@GOTNTPOFF", file);
8231 break;
8232 case UNSPEC_INDNTPOFF:
8233 output_addr_const (file, op);
8234 fputs ("@INDNTPOFF", file);
8235 break;
8236
8237 default:
8238 return false;
8239 }
8240
8241 return true;
8242 }
8243 \f
8244 /* Split one or more DImode RTL references into pairs of SImode
8245 references. The RTL can be REG, offsettable MEM, integer constant, or
8246 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8247 split and "num" is its length. lo_half and hi_half are output arrays
8248 that parallel "operands". */
8249
8250 void
8251 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8252 {
8253 while (num--)
8254 {
8255 rtx op = operands[num];
8256
8257 /* simplify_subreg refuse to split volatile memory addresses,
8258 but we still have to handle it. */
8259 if (GET_CODE (op) == MEM)
8260 {
8261 lo_half[num] = adjust_address (op, SImode, 0);
8262 hi_half[num] = adjust_address (op, SImode, 4);
8263 }
8264 else
8265 {
8266 lo_half[num] = simplify_gen_subreg (SImode, op,
8267 GET_MODE (op) == VOIDmode
8268 ? DImode : GET_MODE (op), 0);
8269 hi_half[num] = simplify_gen_subreg (SImode, op,
8270 GET_MODE (op) == VOIDmode
8271 ? DImode : GET_MODE (op), 4);
8272 }
8273 }
8274 }
8275 /* Split one or more TImode RTL references into pairs of DImode
8276 references. The RTL can be REG, offsettable MEM, integer constant, or
8277 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8278 split and "num" is its length. lo_half and hi_half are output arrays
8279 that parallel "operands". */
8280
8281 void
8282 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8283 {
8284 while (num--)
8285 {
8286 rtx op = operands[num];
8287
8288 /* simplify_subreg refuse to split volatile memory addresses, but we
8289 still have to handle it. */
8290 if (GET_CODE (op) == MEM)
8291 {
8292 lo_half[num] = adjust_address (op, DImode, 0);
8293 hi_half[num] = adjust_address (op, DImode, 8);
8294 }
8295 else
8296 {
8297 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8298 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8299 }
8300 }
8301 }
8302 \f
8303 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8304 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8305 is the expression of the binary operation. The output may either be
8306 emitted here, or returned to the caller, like all output_* functions.
8307
8308 There is no guarantee that the operands are the same mode, as they
8309 might be within FLOAT or FLOAT_EXTEND expressions. */
8310
8311 #ifndef SYSV386_COMPAT
8312 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8313 wants to fix the assemblers because that causes incompatibility
8314 with gcc. No-one wants to fix gcc because that causes
8315 incompatibility with assemblers... You can use the option of
8316 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8317 #define SYSV386_COMPAT 1
8318 #endif
8319
8320 const char *
8321 output_387_binary_op (rtx insn, rtx *operands)
8322 {
8323 static char buf[30];
8324 const char *p;
8325 const char *ssep;
8326 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8327
8328 #ifdef ENABLE_CHECKING
8329 /* Even if we do not want to check the inputs, this documents input
8330 constraints. Which helps in understanding the following code. */
8331 if (STACK_REG_P (operands[0])
8332 && ((REG_P (operands[1])
8333 && REGNO (operands[0]) == REGNO (operands[1])
8334 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
8335 || (REG_P (operands[2])
8336 && REGNO (operands[0]) == REGNO (operands[2])
8337 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
8338 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8339 ; /* ok */
8340 else
8341 gcc_assert (is_sse);
8342 #endif
8343
8344 switch (GET_CODE (operands[3]))
8345 {
8346 case PLUS:
8347 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8348 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8349 p = "fiadd";
8350 else
8351 p = "fadd";
8352 ssep = "add";
8353 break;
8354
8355 case MINUS:
8356 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8357 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8358 p = "fisub";
8359 else
8360 p = "fsub";
8361 ssep = "sub";
8362 break;
8363
8364 case MULT:
8365 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8366 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8367 p = "fimul";
8368 else
8369 p = "fmul";
8370 ssep = "mul";
8371 break;
8372
8373 case DIV:
8374 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8375 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8376 p = "fidiv";
8377 else
8378 p = "fdiv";
8379 ssep = "div";
8380 break;
8381
8382 default:
8383 gcc_unreachable ();
8384 }
8385
8386 if (is_sse)
8387 {
8388 strcpy (buf, ssep);
8389 if (GET_MODE (operands[0]) == SFmode)
8390 strcat (buf, "ss\t{%2, %0|%0, %2}");
8391 else
8392 strcat (buf, "sd\t{%2, %0|%0, %2}");
8393 return buf;
8394 }
8395 strcpy (buf, p);
8396
8397 switch (GET_CODE (operands[3]))
8398 {
8399 case MULT:
8400 case PLUS:
8401 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8402 {
8403 rtx temp = operands[2];
8404 operands[2] = operands[1];
8405 operands[1] = temp;
8406 }
8407
8408 /* know operands[0] == operands[1]. */
8409
8410 if (GET_CODE (operands[2]) == MEM)
8411 {
8412 p = "%z2\t%2";
8413 break;
8414 }
8415
8416 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8417 {
8418 if (STACK_TOP_P (operands[0]))
8419 /* How is it that we are storing to a dead operand[2]?
8420 Well, presumably operands[1] is dead too. We can't
8421 store the result to st(0) as st(0) gets popped on this
8422 instruction. Instead store to operands[2] (which I
8423 think has to be st(1)). st(1) will be popped later.
8424 gcc <= 2.8.1 didn't have this check and generated
8425 assembly code that the Unixware assembler rejected. */
8426 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8427 else
8428 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8429 break;
8430 }
8431
8432 if (STACK_TOP_P (operands[0]))
8433 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8434 else
8435 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8436 break;
8437
8438 case MINUS:
8439 case DIV:
8440 if (GET_CODE (operands[1]) == MEM)
8441 {
8442 p = "r%z1\t%1";
8443 break;
8444 }
8445
8446 if (GET_CODE (operands[2]) == MEM)
8447 {
8448 p = "%z2\t%2";
8449 break;
8450 }
8451
8452 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8453 {
8454 #if SYSV386_COMPAT
8455 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8456 derived assemblers, confusingly reverse the direction of
8457 the operation for fsub{r} and fdiv{r} when the
8458 destination register is not st(0). The Intel assembler
8459 doesn't have this brain damage. Read !SYSV386_COMPAT to
8460 figure out what the hardware really does. */
8461 if (STACK_TOP_P (operands[0]))
8462 p = "{p\t%0, %2|rp\t%2, %0}";
8463 else
8464 p = "{rp\t%2, %0|p\t%0, %2}";
8465 #else
8466 if (STACK_TOP_P (operands[0]))
8467 /* As above for fmul/fadd, we can't store to st(0). */
8468 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8469 else
8470 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8471 #endif
8472 break;
8473 }
8474
8475 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8476 {
8477 #if SYSV386_COMPAT
8478 if (STACK_TOP_P (operands[0]))
8479 p = "{rp\t%0, %1|p\t%1, %0}";
8480 else
8481 p = "{p\t%1, %0|rp\t%0, %1}";
8482 #else
8483 if (STACK_TOP_P (operands[0]))
8484 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8485 else
8486 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8487 #endif
8488 break;
8489 }
8490
8491 if (STACK_TOP_P (operands[0]))
8492 {
8493 if (STACK_TOP_P (operands[1]))
8494 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8495 else
8496 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8497 break;
8498 }
8499 else if (STACK_TOP_P (operands[1]))
8500 {
8501 #if SYSV386_COMPAT
8502 p = "{\t%1, %0|r\t%0, %1}";
8503 #else
8504 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8505 #endif
8506 }
8507 else
8508 {
8509 #if SYSV386_COMPAT
8510 p = "{r\t%2, %0|\t%0, %2}";
8511 #else
8512 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8513 #endif
8514 }
8515 break;
8516
8517 default:
8518 gcc_unreachable ();
8519 }
8520
8521 strcat (buf, p);
8522 return buf;
8523 }
8524
8525 /* Return needed mode for entity in optimize_mode_switching pass. */
8526
8527 int
8528 ix86_mode_needed (int entity, rtx insn)
8529 {
8530 enum attr_i387_cw mode;
8531
8532 /* The mode UNINITIALIZED is used to store control word after a
8533 function call or ASM pattern. The mode ANY specify that function
8534 has no requirements on the control word and make no changes in the
8535 bits we are interested in. */
8536
8537 if (CALL_P (insn)
8538 || (NONJUMP_INSN_P (insn)
8539 && (asm_noperands (PATTERN (insn)) >= 0
8540 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8541 return I387_CW_UNINITIALIZED;
8542
8543 if (recog_memoized (insn) < 0)
8544 return I387_CW_ANY;
8545
8546 mode = get_attr_i387_cw (insn);
8547
8548 switch (entity)
8549 {
8550 case I387_TRUNC:
8551 if (mode == I387_CW_TRUNC)
8552 return mode;
8553 break;
8554
8555 case I387_FLOOR:
8556 if (mode == I387_CW_FLOOR)
8557 return mode;
8558 break;
8559
8560 case I387_CEIL:
8561 if (mode == I387_CW_CEIL)
8562 return mode;
8563 break;
8564
8565 case I387_MASK_PM:
8566 if (mode == I387_CW_MASK_PM)
8567 return mode;
8568 break;
8569
8570 default:
8571 gcc_unreachable ();
8572 }
8573
8574 return I387_CW_ANY;
8575 }
8576
8577 /* Output code to initialize control word copies used by trunc?f?i and
8578 rounding patterns. CURRENT_MODE is set to current control word,
8579 while NEW_MODE is set to new control word. */
8580
8581 void
8582 emit_i387_cw_initialization (int mode)
8583 {
8584 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8585 rtx new_mode;
8586
8587 int slot;
8588
8589 rtx reg = gen_reg_rtx (HImode);
8590
8591 emit_insn (gen_x86_fnstcw_1 (stored_mode));
8592 emit_move_insn (reg, copy_rtx (stored_mode));
8593
8594 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
8595 {
8596 switch (mode)
8597 {
8598 case I387_CW_TRUNC:
8599 /* round toward zero (truncate) */
8600 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8601 slot = SLOT_CW_TRUNC;
8602 break;
8603
8604 case I387_CW_FLOOR:
8605 /* round down toward -oo */
8606 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8607 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8608 slot = SLOT_CW_FLOOR;
8609 break;
8610
8611 case I387_CW_CEIL:
8612 /* round up toward +oo */
8613 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8614 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
8615 slot = SLOT_CW_CEIL;
8616 break;
8617
8618 case I387_CW_MASK_PM:
8619 /* mask precision exception for nearbyint() */
8620 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8621 slot = SLOT_CW_MASK_PM;
8622 break;
8623
8624 default:
8625 gcc_unreachable ();
8626 }
8627 }
8628 else
8629 {
8630 switch (mode)
8631 {
8632 case I387_CW_TRUNC:
8633 /* round toward zero (truncate) */
8634 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8635 slot = SLOT_CW_TRUNC;
8636 break;
8637
8638 case I387_CW_FLOOR:
8639 /* round down toward -oo */
8640 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
8641 slot = SLOT_CW_FLOOR;
8642 break;
8643
8644 case I387_CW_CEIL:
8645 /* round up toward +oo */
8646 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
8647 slot = SLOT_CW_CEIL;
8648 break;
8649
8650 case I387_CW_MASK_PM:
8651 /* mask precision exception for nearbyint() */
8652 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8653 slot = SLOT_CW_MASK_PM;
8654 break;
8655
8656 default:
8657 gcc_unreachable ();
8658 }
8659 }
8660
8661 gcc_assert (slot < MAX_386_STACK_LOCALS);
8662
8663 new_mode = assign_386_stack_local (HImode, slot);
8664 emit_move_insn (new_mode, reg);
8665 }
8666
8667 /* Output code for INSN to convert a float to a signed int. OPERANDS
8668 are the insn operands. The output may be [HSD]Imode and the input
8669 operand may be [SDX]Fmode. */
8670
8671 const char *
8672 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
8673 {
8674 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8675 int dimode_p = GET_MODE (operands[0]) == DImode;
8676 int round_mode = get_attr_i387_cw (insn);
8677
8678 /* Jump through a hoop or two for DImode, since the hardware has no
8679 non-popping instruction. We used to do this a different way, but
8680 that was somewhat fragile and broke with post-reload splitters. */
8681 if ((dimode_p || fisttp) && !stack_top_dies)
8682 output_asm_insn ("fld\t%y1", operands);
8683
8684 gcc_assert (STACK_TOP_P (operands[1]));
8685 gcc_assert (GET_CODE (operands[0]) == MEM);
8686
8687 if (fisttp)
8688 output_asm_insn ("fisttp%z0\t%0", operands);
8689 else
8690 {
8691 if (round_mode != I387_CW_ANY)
8692 output_asm_insn ("fldcw\t%3", operands);
8693 if (stack_top_dies || dimode_p)
8694 output_asm_insn ("fistp%z0\t%0", operands);
8695 else
8696 output_asm_insn ("fist%z0\t%0", operands);
8697 if (round_mode != I387_CW_ANY)
8698 output_asm_insn ("fldcw\t%2", operands);
8699 }
8700
8701 return "";
8702 }
8703
8704 /* Output code for x87 ffreep insn. The OPNO argument, which may only
8705 have the values zero or one, indicates the ffreep insn's operand
8706 from the OPERANDS array. */
8707
8708 static const char *
8709 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
8710 {
8711 if (TARGET_USE_FFREEP)
8712 #if HAVE_AS_IX86_FFREEP
8713 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
8714 #else
8715 {
8716 static char retval[] = ".word\t0xc_df";
8717 int regno = REGNO (operands[opno]);
8718
8719 gcc_assert (FP_REGNO_P (regno));
8720
8721 retval[9] = '0' + (regno - FIRST_STACK_REG);
8722 return retval;
8723 }
8724 #endif
8725
8726 return opno ? "fstp\t%y1" : "fstp\t%y0";
8727 }
8728
8729
8730 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8731 should be used. UNORDERED_P is true when fucom should be used. */
8732
8733 const char *
8734 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8735 {
8736 int stack_top_dies;
8737 rtx cmp_op0, cmp_op1;
8738 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
8739
8740 if (eflags_p)
8741 {
8742 cmp_op0 = operands[0];
8743 cmp_op1 = operands[1];
8744 }
8745 else
8746 {
8747 cmp_op0 = operands[1];
8748 cmp_op1 = operands[2];
8749 }
8750
8751 if (is_sse)
8752 {
8753 if (GET_MODE (operands[0]) == SFmode)
8754 if (unordered_p)
8755 return "ucomiss\t{%1, %0|%0, %1}";
8756 else
8757 return "comiss\t{%1, %0|%0, %1}";
8758 else
8759 if (unordered_p)
8760 return "ucomisd\t{%1, %0|%0, %1}";
8761 else
8762 return "comisd\t{%1, %0|%0, %1}";
8763 }
8764
8765 gcc_assert (STACK_TOP_P (cmp_op0));
8766
8767 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8768
8769 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
8770 {
8771 if (stack_top_dies)
8772 {
8773 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
8774 return output_387_ffreep (operands, 1);
8775 }
8776 else
8777 return "ftst\n\tfnstsw\t%0";
8778 }
8779
8780 if (STACK_REG_P (cmp_op1)
8781 && stack_top_dies
8782 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8783 && REGNO (cmp_op1) != FIRST_STACK_REG)
8784 {
8785 /* If both the top of the 387 stack dies, and the other operand
8786 is also a stack register that dies, then this must be a
8787 `fcompp' float compare */
8788
8789 if (eflags_p)
8790 {
8791 /* There is no double popping fcomi variant. Fortunately,
8792 eflags is immune from the fstp's cc clobbering. */
8793 if (unordered_p)
8794 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8795 else
8796 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8797 return output_387_ffreep (operands, 0);
8798 }
8799 else
8800 {
8801 if (unordered_p)
8802 return "fucompp\n\tfnstsw\t%0";
8803 else
8804 return "fcompp\n\tfnstsw\t%0";
8805 }
8806 }
8807 else
8808 {
8809 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8810
8811 static const char * const alt[16] =
8812 {
8813 "fcom%z2\t%y2\n\tfnstsw\t%0",
8814 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8815 "fucom%z2\t%y2\n\tfnstsw\t%0",
8816 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8817
8818 "ficom%z2\t%y2\n\tfnstsw\t%0",
8819 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8820 NULL,
8821 NULL,
8822
8823 "fcomi\t{%y1, %0|%0, %y1}",
8824 "fcomip\t{%y1, %0|%0, %y1}",
8825 "fucomi\t{%y1, %0|%0, %y1}",
8826 "fucomip\t{%y1, %0|%0, %y1}",
8827
8828 NULL,
8829 NULL,
8830 NULL,
8831 NULL
8832 };
8833
8834 int mask;
8835 const char *ret;
8836
8837 mask = eflags_p << 3;
8838 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
8839 mask |= unordered_p << 1;
8840 mask |= stack_top_dies;
8841
8842 gcc_assert (mask < 16);
8843 ret = alt[mask];
8844 gcc_assert (ret);
8845
8846 return ret;
8847 }
8848 }
8849
8850 void
8851 ix86_output_addr_vec_elt (FILE *file, int value)
8852 {
8853 const char *directive = ASM_LONG;
8854
8855 #ifdef ASM_QUAD
8856 if (TARGET_64BIT)
8857 directive = ASM_QUAD;
8858 #else
8859 gcc_assert (!TARGET_64BIT);
8860 #endif
8861
8862 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8863 }
8864
8865 void
8866 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8867 {
8868 if (TARGET_64BIT)
8869 fprintf (file, "%s%s%d-%s%d\n",
8870 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8871 else if (HAVE_AS_GOTOFF_IN_DATA)
8872 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8873 #if TARGET_MACHO
8874 else if (TARGET_MACHO)
8875 {
8876 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8877 machopic_output_function_base_name (file);
8878 fprintf(file, "\n");
8879 }
8880 #endif
8881 else
8882 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8883 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8884 }
8885 \f
8886 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8887 for the target. */
8888
8889 void
8890 ix86_expand_clear (rtx dest)
8891 {
8892 rtx tmp;
8893
8894 /* We play register width games, which are only valid after reload. */
8895 gcc_assert (reload_completed);
8896
8897 /* Avoid HImode and its attendant prefix byte. */
8898 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8899 dest = gen_rtx_REG (SImode, REGNO (dest));
8900
8901 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8902
8903 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8904 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8905 {
8906 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8907 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8908 }
8909
8910 emit_insn (tmp);
8911 }
8912
8913 /* X is an unchanging MEM. If it is a constant pool reference, return
8914 the constant pool rtx, else NULL. */
8915
8916 rtx
8917 maybe_get_pool_constant (rtx x)
8918 {
8919 x = ix86_delegitimize_address (XEXP (x, 0));
8920
8921 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8922 return get_pool_constant (x);
8923
8924 return NULL_RTX;
8925 }
8926
8927 void
8928 ix86_expand_move (enum machine_mode mode, rtx operands[])
8929 {
8930 int strict = (reload_in_progress || reload_completed);
8931 rtx op0, op1;
8932 enum tls_model model;
8933
8934 op0 = operands[0];
8935 op1 = operands[1];
8936
8937 if (GET_CODE (op1) == SYMBOL_REF)
8938 {
8939 model = SYMBOL_REF_TLS_MODEL (op1);
8940 if (model)
8941 {
8942 op1 = legitimize_tls_address (op1, model, true);
8943 op1 = force_operand (op1, op0);
8944 if (op1 == op0)
8945 return;
8946 }
8947 }
8948 else if (GET_CODE (op1) == CONST
8949 && GET_CODE (XEXP (op1, 0)) == PLUS
8950 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
8951 {
8952 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
8953 if (model)
8954 {
8955 rtx addend = XEXP (XEXP (op1, 0), 1);
8956 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
8957 op1 = force_operand (op1, NULL);
8958 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
8959 op0, 1, OPTAB_DIRECT);
8960 if (op1 == op0)
8961 return;
8962 }
8963 }
8964
8965 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8966 {
8967 if (TARGET_MACHO && !TARGET_64BIT)
8968 {
8969 #if TARGET_MACHO
8970 if (MACHOPIC_PURE)
8971 {
8972 rtx temp = ((reload_in_progress
8973 || ((op0 && GET_CODE (op0) == REG)
8974 && mode == Pmode))
8975 ? op0 : gen_reg_rtx (Pmode));
8976 op1 = machopic_indirect_data_reference (op1, temp);
8977 op1 = machopic_legitimize_pic_address (op1, mode,
8978 temp == op1 ? 0 : temp);
8979 }
8980 else if (MACHOPIC_INDIRECT)
8981 op1 = machopic_indirect_data_reference (op1, 0);
8982 if (op0 == op1)
8983 return;
8984 #endif
8985 }
8986 else
8987 {
8988 if (GET_CODE (op0) == MEM)
8989 op1 = force_reg (Pmode, op1);
8990 else
8991 op1 = legitimize_address (op1, op1, Pmode);
8992 }
8993 }
8994 else
8995 {
8996 if (GET_CODE (op0) == MEM
8997 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8998 || !push_operand (op0, mode))
8999 && GET_CODE (op1) == MEM)
9000 op1 = force_reg (mode, op1);
9001
9002 if (push_operand (op0, mode)
9003 && ! general_no_elim_operand (op1, mode))
9004 op1 = copy_to_mode_reg (mode, op1);
9005
9006 /* Force large constants in 64bit compilation into register
9007 to get them CSEed. */
9008 if (TARGET_64BIT && mode == DImode
9009 && immediate_operand (op1, mode)
9010 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9011 && !register_operand (op0, mode)
9012 && optimize && !reload_completed && !reload_in_progress)
9013 op1 = copy_to_mode_reg (mode, op1);
9014
9015 if (FLOAT_MODE_P (mode))
9016 {
9017 /* If we are loading a floating point constant to a register,
9018 force the value to memory now, since we'll get better code
9019 out the back end. */
9020
9021 if (strict)
9022 ;
9023 else if (GET_CODE (op1) == CONST_DOUBLE)
9024 {
9025 op1 = validize_mem (force_const_mem (mode, op1));
9026 if (!register_operand (op0, mode))
9027 {
9028 rtx temp = gen_reg_rtx (mode);
9029 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9030 emit_move_insn (op0, temp);
9031 return;
9032 }
9033 }
9034 }
9035 }
9036
9037 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9038 }
9039
9040 void
9041 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9042 {
9043 rtx op0 = operands[0], op1 = operands[1];
9044
9045 /* Force constants other than zero into memory. We do not know how
9046 the instructions used to build constants modify the upper 64 bits
9047 of the register, once we have that information we may be able
9048 to handle some of them more efficiently. */
9049 if ((reload_in_progress | reload_completed) == 0
9050 && register_operand (op0, mode)
9051 && CONSTANT_P (op1)
9052 && standard_sse_constant_p (op1) <= 0)
9053 op1 = validize_mem (force_const_mem (mode, op1));
9054
9055 /* Make operand1 a register if it isn't already. */
9056 if (!no_new_pseudos
9057 && !register_operand (op0, mode)
9058 && !register_operand (op1, mode))
9059 {
9060 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9061 return;
9062 }
9063
9064 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9065 }
9066
9067 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9068 straight to ix86_expand_vector_move. */
9069
9070 void
9071 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9072 {
9073 rtx op0, op1, m;
9074
9075 op0 = operands[0];
9076 op1 = operands[1];
9077
9078 if (MEM_P (op1))
9079 {
9080 /* If we're optimizing for size, movups is the smallest. */
9081 if (optimize_size)
9082 {
9083 op0 = gen_lowpart (V4SFmode, op0);
9084 op1 = gen_lowpart (V4SFmode, op1);
9085 emit_insn (gen_sse_movups (op0, op1));
9086 return;
9087 }
9088
9089 /* ??? If we have typed data, then it would appear that using
9090 movdqu is the only way to get unaligned data loaded with
9091 integer type. */
9092 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9093 {
9094 op0 = gen_lowpart (V16QImode, op0);
9095 op1 = gen_lowpart (V16QImode, op1);
9096 emit_insn (gen_sse2_movdqu (op0, op1));
9097 return;
9098 }
9099
9100 if (TARGET_SSE2 && mode == V2DFmode)
9101 {
9102 rtx zero;
9103
9104 /* When SSE registers are split into halves, we can avoid
9105 writing to the top half twice. */
9106 if (TARGET_SSE_SPLIT_REGS)
9107 {
9108 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9109 zero = op0;
9110 }
9111 else
9112 {
9113 /* ??? Not sure about the best option for the Intel chips.
9114 The following would seem to satisfy; the register is
9115 entirely cleared, breaking the dependency chain. We
9116 then store to the upper half, with a dependency depth
9117 of one. A rumor has it that Intel recommends two movsd
9118 followed by an unpacklpd, but this is unconfirmed. And
9119 given that the dependency depth of the unpacklpd would
9120 still be one, I'm not sure why this would be better. */
9121 zero = CONST0_RTX (V2DFmode);
9122 }
9123
9124 m = adjust_address (op1, DFmode, 0);
9125 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9126 m = adjust_address (op1, DFmode, 8);
9127 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9128 }
9129 else
9130 {
9131 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9132 emit_move_insn (op0, CONST0_RTX (mode));
9133 else
9134 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9135
9136 if (mode != V4SFmode)
9137 op0 = gen_lowpart (V4SFmode, op0);
9138 m = adjust_address (op1, V2SFmode, 0);
9139 emit_insn (gen_sse_loadlps (op0, op0, m));
9140 m = adjust_address (op1, V2SFmode, 8);
9141 emit_insn (gen_sse_loadhps (op0, op0, m));
9142 }
9143 }
9144 else if (MEM_P (op0))
9145 {
9146 /* If we're optimizing for size, movups is the smallest. */
9147 if (optimize_size)
9148 {
9149 op0 = gen_lowpart (V4SFmode, op0);
9150 op1 = gen_lowpart (V4SFmode, op1);
9151 emit_insn (gen_sse_movups (op0, op1));
9152 return;
9153 }
9154
9155 /* ??? Similar to above, only less clear because of quote
9156 typeless stores unquote. */
9157 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9158 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9159 {
9160 op0 = gen_lowpart (V16QImode, op0);
9161 op1 = gen_lowpart (V16QImode, op1);
9162 emit_insn (gen_sse2_movdqu (op0, op1));
9163 return;
9164 }
9165
9166 if (TARGET_SSE2 && mode == V2DFmode)
9167 {
9168 m = adjust_address (op0, DFmode, 0);
9169 emit_insn (gen_sse2_storelpd (m, op1));
9170 m = adjust_address (op0, DFmode, 8);
9171 emit_insn (gen_sse2_storehpd (m, op1));
9172 }
9173 else
9174 {
9175 if (mode != V4SFmode)
9176 op1 = gen_lowpart (V4SFmode, op1);
9177 m = adjust_address (op0, V2SFmode, 0);
9178 emit_insn (gen_sse_storelps (m, op1));
9179 m = adjust_address (op0, V2SFmode, 8);
9180 emit_insn (gen_sse_storehps (m, op1));
9181 }
9182 }
9183 else
9184 gcc_unreachable ();
9185 }
9186
9187 /* Expand a push in MODE. This is some mode for which we do not support
9188 proper push instructions, at least from the registers that we expect
9189 the value to live in. */
9190
9191 void
9192 ix86_expand_push (enum machine_mode mode, rtx x)
9193 {
9194 rtx tmp;
9195
9196 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9197 GEN_INT (-GET_MODE_SIZE (mode)),
9198 stack_pointer_rtx, 1, OPTAB_DIRECT);
9199 if (tmp != stack_pointer_rtx)
9200 emit_move_insn (stack_pointer_rtx, tmp);
9201
9202 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9203 emit_move_insn (tmp, x);
9204 }
9205
9206 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9207 destination to use for the operation. If different from the true
9208 destination in operands[0], a copy operation will be required. */
9209
9210 rtx
9211 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9212 rtx operands[])
9213 {
9214 int matching_memory;
9215 rtx src1, src2, dst;
9216
9217 dst = operands[0];
9218 src1 = operands[1];
9219 src2 = operands[2];
9220
9221 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
9222 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9223 && (rtx_equal_p (dst, src2)
9224 || immediate_operand (src1, mode)))
9225 {
9226 rtx temp = src1;
9227 src1 = src2;
9228 src2 = temp;
9229 }
9230
9231 /* If the destination is memory, and we do not have matching source
9232 operands, do things in registers. */
9233 matching_memory = 0;
9234 if (GET_CODE (dst) == MEM)
9235 {
9236 if (rtx_equal_p (dst, src1))
9237 matching_memory = 1;
9238 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9239 && rtx_equal_p (dst, src2))
9240 matching_memory = 2;
9241 else
9242 dst = gen_reg_rtx (mode);
9243 }
9244
9245 /* Both source operands cannot be in memory. */
9246 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
9247 {
9248 if (matching_memory != 2)
9249 src2 = force_reg (mode, src2);
9250 else
9251 src1 = force_reg (mode, src1);
9252 }
9253
9254 /* If the operation is not commutable, source 1 cannot be a constant
9255 or non-matching memory. */
9256 if ((CONSTANT_P (src1)
9257 || (!matching_memory && GET_CODE (src1) == MEM))
9258 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9259 src1 = force_reg (mode, src1);
9260
9261 src1 = operands[1] = src1;
9262 src2 = operands[2] = src2;
9263 return dst;
9264 }
9265
9266 /* Similarly, but assume that the destination has already been
9267 set up properly. */
9268
9269 void
9270 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9271 enum machine_mode mode, rtx operands[])
9272 {
9273 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9274 gcc_assert (dst == operands[0]);
9275 }
9276
9277 /* Attempt to expand a binary operator. Make the expansion closer to the
9278 actual machine, then just general_operand, which will allow 3 separate
9279 memory references (one output, two input) in a single insn. */
9280
9281 void
9282 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9283 rtx operands[])
9284 {
9285 rtx src1, src2, dst, op, clob;
9286
9287 dst = ix86_fixup_binary_operands (code, mode, operands);
9288 src1 = operands[1];
9289 src2 = operands[2];
9290
9291 /* Emit the instruction. */
9292
9293 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9294 if (reload_in_progress)
9295 {
9296 /* Reload doesn't know about the flags register, and doesn't know that
9297 it doesn't want to clobber it. We can only do this with PLUS. */
9298 gcc_assert (code == PLUS);
9299 emit_insn (op);
9300 }
9301 else
9302 {
9303 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9304 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9305 }
9306
9307 /* Fix up the destination if needed. */
9308 if (dst != operands[0])
9309 emit_move_insn (operands[0], dst);
9310 }
9311
9312 /* Return TRUE or FALSE depending on whether the binary operator meets the
9313 appropriate constraints. */
9314
9315 int
9316 ix86_binary_operator_ok (enum rtx_code code,
9317 enum machine_mode mode ATTRIBUTE_UNUSED,
9318 rtx operands[3])
9319 {
9320 /* Both source operands cannot be in memory. */
9321 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
9322 return 0;
9323 /* If the operation is not commutable, source 1 cannot be a constant. */
9324 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9325 return 0;
9326 /* If the destination is memory, we must have a matching source operand. */
9327 if (GET_CODE (operands[0]) == MEM
9328 && ! (rtx_equal_p (operands[0], operands[1])
9329 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9330 && rtx_equal_p (operands[0], operands[2]))))
9331 return 0;
9332 /* If the operation is not commutable and the source 1 is memory, we must
9333 have a matching destination. */
9334 if (GET_CODE (operands[1]) == MEM
9335 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
9336 && ! rtx_equal_p (operands[0], operands[1]))
9337 return 0;
9338 return 1;
9339 }
9340
9341 /* Attempt to expand a unary operator. Make the expansion closer to the
9342 actual machine, then just general_operand, which will allow 2 separate
9343 memory references (one output, one input) in a single insn. */
9344
9345 void
9346 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9347 rtx operands[])
9348 {
9349 int matching_memory;
9350 rtx src, dst, op, clob;
9351
9352 dst = operands[0];
9353 src = operands[1];
9354
9355 /* If the destination is memory, and we do not have matching source
9356 operands, do things in registers. */
9357 matching_memory = 0;
9358 if (MEM_P (dst))
9359 {
9360 if (rtx_equal_p (dst, src))
9361 matching_memory = 1;
9362 else
9363 dst = gen_reg_rtx (mode);
9364 }
9365
9366 /* When source operand is memory, destination must match. */
9367 if (MEM_P (src) && !matching_memory)
9368 src = force_reg (mode, src);
9369
9370 /* Emit the instruction. */
9371
9372 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9373 if (reload_in_progress || code == NOT)
9374 {
9375 /* Reload doesn't know about the flags register, and doesn't know that
9376 it doesn't want to clobber it. */
9377 gcc_assert (code == NOT);
9378 emit_insn (op);
9379 }
9380 else
9381 {
9382 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9383 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9384 }
9385
9386 /* Fix up the destination if needed. */
9387 if (dst != operands[0])
9388 emit_move_insn (operands[0], dst);
9389 }
9390
9391 /* Return TRUE or FALSE depending on whether the unary operator meets the
9392 appropriate constraints. */
9393
9394 int
9395 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9396 enum machine_mode mode ATTRIBUTE_UNUSED,
9397 rtx operands[2] ATTRIBUTE_UNUSED)
9398 {
9399 /* If one of operands is memory, source and destination must match. */
9400 if ((GET_CODE (operands[0]) == MEM
9401 || GET_CODE (operands[1]) == MEM)
9402 && ! rtx_equal_p (operands[0], operands[1]))
9403 return FALSE;
9404 return TRUE;
9405 }
9406
9407 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9408 Create a mask for the sign bit in MODE for an SSE register. If VECT is
9409 true, then replicate the mask for all elements of the vector register.
9410 If INVERT is true, then create a mask excluding the sign bit. */
9411
9412 rtx
9413 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
9414 {
9415 enum machine_mode vec_mode;
9416 HOST_WIDE_INT hi, lo;
9417 int shift = 63;
9418 rtvec v;
9419 rtx mask;
9420
9421 /* Find the sign bit, sign extended to 2*HWI. */
9422 if (mode == SFmode)
9423 lo = 0x80000000, hi = lo < 0;
9424 else if (HOST_BITS_PER_WIDE_INT >= 64)
9425 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
9426 else
9427 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
9428
9429 if (invert)
9430 lo = ~lo, hi = ~hi;
9431
9432 /* Force this value into the low part of a fp vector constant. */
9433 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
9434 mask = gen_lowpart (mode, mask);
9435
9436 if (mode == SFmode)
9437 {
9438 if (vect)
9439 v = gen_rtvec (4, mask, mask, mask, mask);
9440 else
9441 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
9442 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9443 vec_mode = V4SFmode;
9444 }
9445 else
9446 {
9447 if (vect)
9448 v = gen_rtvec (2, mask, mask);
9449 else
9450 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
9451 vec_mode = V2DFmode;
9452 }
9453
9454 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
9455 }
9456
9457 /* Generate code for floating point ABS or NEG. */
9458
9459 void
9460 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
9461 rtx operands[])
9462 {
9463 rtx mask, set, use, clob, dst, src;
9464 bool matching_memory;
9465 bool use_sse = false;
9466 bool vector_mode = VECTOR_MODE_P (mode);
9467 enum machine_mode elt_mode = mode;
9468
9469 if (vector_mode)
9470 {
9471 elt_mode = GET_MODE_INNER (mode);
9472 use_sse = true;
9473 }
9474 else if (TARGET_SSE_MATH)
9475 use_sse = SSE_FLOAT_MODE_P (mode);
9476
9477 /* NEG and ABS performed with SSE use bitwise mask operations.
9478 Create the appropriate mask now. */
9479 if (use_sse)
9480 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
9481 else
9482 mask = NULL_RTX;
9483
9484 dst = operands[0];
9485 src = operands[1];
9486
9487 /* If the destination is memory, and we don't have matching source
9488 operands or we're using the x87, do things in registers. */
9489 matching_memory = false;
9490 if (MEM_P (dst))
9491 {
9492 if (use_sse && rtx_equal_p (dst, src))
9493 matching_memory = true;
9494 else
9495 dst = gen_reg_rtx (mode);
9496 }
9497 if (MEM_P (src) && !matching_memory)
9498 src = force_reg (mode, src);
9499
9500 if (vector_mode)
9501 {
9502 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
9503 set = gen_rtx_SET (VOIDmode, dst, set);
9504 emit_insn (set);
9505 }
9506 else
9507 {
9508 set = gen_rtx_fmt_e (code, mode, src);
9509 set = gen_rtx_SET (VOIDmode, dst, set);
9510 if (mask)
9511 {
9512 use = gen_rtx_USE (VOIDmode, mask);
9513 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9514 emit_insn (gen_rtx_PARALLEL (VOIDmode,
9515 gen_rtvec (3, set, use, clob)));
9516 }
9517 else
9518 emit_insn (set);
9519 }
9520
9521 if (dst != operands[0])
9522 emit_move_insn (operands[0], dst);
9523 }
9524
9525 /* Expand a copysign operation. Special case operand 0 being a constant. */
9526
9527 void
9528 ix86_expand_copysign (rtx operands[])
9529 {
9530 enum machine_mode mode, vmode;
9531 rtx dest, op0, op1, mask, nmask;
9532
9533 dest = operands[0];
9534 op0 = operands[1];
9535 op1 = operands[2];
9536
9537 mode = GET_MODE (dest);
9538 vmode = mode == SFmode ? V4SFmode : V2DFmode;
9539
9540 if (GET_CODE (op0) == CONST_DOUBLE)
9541 {
9542 rtvec v;
9543
9544 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
9545 op0 = simplify_unary_operation (ABS, mode, op0, mode);
9546
9547 if (op0 == CONST0_RTX (mode))
9548 op0 = CONST0_RTX (vmode);
9549 else
9550 {
9551 if (mode == SFmode)
9552 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
9553 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9554 else
9555 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
9556 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
9557 }
9558
9559 mask = ix86_build_signbit_mask (mode, 0, 0);
9560
9561 if (mode == SFmode)
9562 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
9563 else
9564 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
9565 }
9566 else
9567 {
9568 nmask = ix86_build_signbit_mask (mode, 0, 1);
9569 mask = ix86_build_signbit_mask (mode, 0, 0);
9570
9571 if (mode == SFmode)
9572 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
9573 else
9574 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
9575 }
9576 }
9577
9578 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
9579 be a constant, and so has already been expanded into a vector constant. */
9580
9581 void
9582 ix86_split_copysign_const (rtx operands[])
9583 {
9584 enum machine_mode mode, vmode;
9585 rtx dest, op0, op1, mask, x;
9586
9587 dest = operands[0];
9588 op0 = operands[1];
9589 op1 = operands[2];
9590 mask = operands[3];
9591
9592 mode = GET_MODE (dest);
9593 vmode = GET_MODE (mask);
9594
9595 dest = simplify_gen_subreg (vmode, dest, mode, 0);
9596 x = gen_rtx_AND (vmode, dest, mask);
9597 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9598
9599 if (op0 != CONST0_RTX (vmode))
9600 {
9601 x = gen_rtx_IOR (vmode, dest, op0);
9602 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9603 }
9604 }
9605
9606 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
9607 so we have to do two masks. */
9608
9609 void
9610 ix86_split_copysign_var (rtx operands[])
9611 {
9612 enum machine_mode mode, vmode;
9613 rtx dest, scratch, op0, op1, mask, nmask, x;
9614
9615 dest = operands[0];
9616 scratch = operands[1];
9617 op0 = operands[2];
9618 op1 = operands[3];
9619 nmask = operands[4];
9620 mask = operands[5];
9621
9622 mode = GET_MODE (dest);
9623 vmode = GET_MODE (mask);
9624
9625 if (rtx_equal_p (op0, op1))
9626 {
9627 /* Shouldn't happen often (it's useless, obviously), but when it does
9628 we'd generate incorrect code if we continue below. */
9629 emit_move_insn (dest, op0);
9630 return;
9631 }
9632
9633 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
9634 {
9635 gcc_assert (REGNO (op1) == REGNO (scratch));
9636
9637 x = gen_rtx_AND (vmode, scratch, mask);
9638 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9639
9640 dest = mask;
9641 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9642 x = gen_rtx_NOT (vmode, dest);
9643 x = gen_rtx_AND (vmode, x, op0);
9644 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9645 }
9646 else
9647 {
9648 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
9649 {
9650 x = gen_rtx_AND (vmode, scratch, mask);
9651 }
9652 else /* alternative 2,4 */
9653 {
9654 gcc_assert (REGNO (mask) == REGNO (scratch));
9655 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
9656 x = gen_rtx_AND (vmode, scratch, op1);
9657 }
9658 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9659
9660 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
9661 {
9662 dest = simplify_gen_subreg (vmode, op0, mode, 0);
9663 x = gen_rtx_AND (vmode, dest, nmask);
9664 }
9665 else /* alternative 3,4 */
9666 {
9667 gcc_assert (REGNO (nmask) == REGNO (dest));
9668 dest = nmask;
9669 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9670 x = gen_rtx_AND (vmode, dest, op0);
9671 }
9672 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9673 }
9674
9675 x = gen_rtx_IOR (vmode, dest, scratch);
9676 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9677 }
9678
9679 /* Return TRUE or FALSE depending on whether the first SET in INSN
9680 has source and destination with matching CC modes, and that the
9681 CC mode is at least as constrained as REQ_MODE. */
9682
9683 int
9684 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
9685 {
9686 rtx set;
9687 enum machine_mode set_mode;
9688
9689 set = PATTERN (insn);
9690 if (GET_CODE (set) == PARALLEL)
9691 set = XVECEXP (set, 0, 0);
9692 gcc_assert (GET_CODE (set) == SET);
9693 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
9694
9695 set_mode = GET_MODE (SET_DEST (set));
9696 switch (set_mode)
9697 {
9698 case CCNOmode:
9699 if (req_mode != CCNOmode
9700 && (req_mode != CCmode
9701 || XEXP (SET_SRC (set), 1) != const0_rtx))
9702 return 0;
9703 break;
9704 case CCmode:
9705 if (req_mode == CCGCmode)
9706 return 0;
9707 /* FALLTHRU */
9708 case CCGCmode:
9709 if (req_mode == CCGOCmode || req_mode == CCNOmode)
9710 return 0;
9711 /* FALLTHRU */
9712 case CCGOCmode:
9713 if (req_mode == CCZmode)
9714 return 0;
9715 /* FALLTHRU */
9716 case CCZmode:
9717 break;
9718
9719 default:
9720 gcc_unreachable ();
9721 }
9722
9723 return (GET_MODE (SET_SRC (set)) == set_mode);
9724 }
9725
9726 /* Generate insn patterns to do an integer compare of OPERANDS. */
9727
9728 static rtx
9729 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
9730 {
9731 enum machine_mode cmpmode;
9732 rtx tmp, flags;
9733
9734 cmpmode = SELECT_CC_MODE (code, op0, op1);
9735 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
9736
9737 /* This is very simple, but making the interface the same as in the
9738 FP case makes the rest of the code easier. */
9739 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
9740 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
9741
9742 /* Return the test that should be put into the flags user, i.e.
9743 the bcc, scc, or cmov instruction. */
9744 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
9745 }
9746
9747 /* Figure out whether to use ordered or unordered fp comparisons.
9748 Return the appropriate mode to use. */
9749
9750 enum machine_mode
9751 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
9752 {
9753 /* ??? In order to make all comparisons reversible, we do all comparisons
9754 non-trapping when compiling for IEEE. Once gcc is able to distinguish
9755 all forms trapping and nontrapping comparisons, we can make inequality
9756 comparisons trapping again, since it results in better code when using
9757 FCOM based compares. */
9758 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
9759 }
9760
9761 enum machine_mode
9762 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9763 {
9764 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
9765 return ix86_fp_compare_mode (code);
9766 switch (code)
9767 {
9768 /* Only zero flag is needed. */
9769 case EQ: /* ZF=0 */
9770 case NE: /* ZF!=0 */
9771 return CCZmode;
9772 /* Codes needing carry flag. */
9773 case GEU: /* CF=0 */
9774 case GTU: /* CF=0 & ZF=0 */
9775 case LTU: /* CF=1 */
9776 case LEU: /* CF=1 | ZF=1 */
9777 return CCmode;
9778 /* Codes possibly doable only with sign flag when
9779 comparing against zero. */
9780 case GE: /* SF=OF or SF=0 */
9781 case LT: /* SF<>OF or SF=1 */
9782 if (op1 == const0_rtx)
9783 return CCGOCmode;
9784 else
9785 /* For other cases Carry flag is not required. */
9786 return CCGCmode;
9787 /* Codes doable only with sign flag when comparing
9788 against zero, but we miss jump instruction for it
9789 so we need to use relational tests against overflow
9790 that thus needs to be zero. */
9791 case GT: /* ZF=0 & SF=OF */
9792 case LE: /* ZF=1 | SF<>OF */
9793 if (op1 == const0_rtx)
9794 return CCNOmode;
9795 else
9796 return CCGCmode;
9797 /* strcmp pattern do (use flags) and combine may ask us for proper
9798 mode. */
9799 case USE:
9800 return CCmode;
9801 default:
9802 gcc_unreachable ();
9803 }
9804 }
9805
9806 /* Return the fixed registers used for condition codes. */
9807
9808 static bool
9809 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
9810 {
9811 *p1 = FLAGS_REG;
9812 *p2 = FPSR_REG;
9813 return true;
9814 }
9815
9816 /* If two condition code modes are compatible, return a condition code
9817 mode which is compatible with both. Otherwise, return
9818 VOIDmode. */
9819
9820 static enum machine_mode
9821 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
9822 {
9823 if (m1 == m2)
9824 return m1;
9825
9826 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
9827 return VOIDmode;
9828
9829 if ((m1 == CCGCmode && m2 == CCGOCmode)
9830 || (m1 == CCGOCmode && m2 == CCGCmode))
9831 return CCGCmode;
9832
9833 switch (m1)
9834 {
9835 default:
9836 gcc_unreachable ();
9837
9838 case CCmode:
9839 case CCGCmode:
9840 case CCGOCmode:
9841 case CCNOmode:
9842 case CCZmode:
9843 switch (m2)
9844 {
9845 default:
9846 return VOIDmode;
9847
9848 case CCmode:
9849 case CCGCmode:
9850 case CCGOCmode:
9851 case CCNOmode:
9852 case CCZmode:
9853 return CCmode;
9854 }
9855
9856 case CCFPmode:
9857 case CCFPUmode:
9858 /* These are only compatible with themselves, which we already
9859 checked above. */
9860 return VOIDmode;
9861 }
9862 }
9863
9864 /* Return true if we should use an FCOMI instruction for this fp comparison. */
9865
9866 int
9867 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
9868 {
9869 enum rtx_code swapped_code = swap_condition (code);
9870 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
9871 || (ix86_fp_comparison_cost (swapped_code)
9872 == ix86_fp_comparison_fcomi_cost (swapped_code)));
9873 }
9874
9875 /* Swap, force into registers, or otherwise massage the two operands
9876 to a fp comparison. The operands are updated in place; the new
9877 comparison code is returned. */
9878
9879 static enum rtx_code
9880 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
9881 {
9882 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
9883 rtx op0 = *pop0, op1 = *pop1;
9884 enum machine_mode op_mode = GET_MODE (op0);
9885 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
9886
9887 /* All of the unordered compare instructions only work on registers.
9888 The same is true of the fcomi compare instructions. The XFmode
9889 compare instructions require registers except when comparing
9890 against zero or when converting operand 1 from fixed point to
9891 floating point. */
9892
9893 if (!is_sse
9894 && (fpcmp_mode == CCFPUmode
9895 || (op_mode == XFmode
9896 && ! (standard_80387_constant_p (op0) == 1
9897 || standard_80387_constant_p (op1) == 1)
9898 && GET_CODE (op1) != FLOAT)
9899 || ix86_use_fcomi_compare (code)))
9900 {
9901 op0 = force_reg (op_mode, op0);
9902 op1 = force_reg (op_mode, op1);
9903 }
9904 else
9905 {
9906 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
9907 things around if they appear profitable, otherwise force op0
9908 into a register. */
9909
9910 if (standard_80387_constant_p (op0) == 0
9911 || (GET_CODE (op0) == MEM
9912 && ! (standard_80387_constant_p (op1) == 0
9913 || GET_CODE (op1) == MEM)))
9914 {
9915 rtx tmp;
9916 tmp = op0, op0 = op1, op1 = tmp;
9917 code = swap_condition (code);
9918 }
9919
9920 if (GET_CODE (op0) != REG)
9921 op0 = force_reg (op_mode, op0);
9922
9923 if (CONSTANT_P (op1))
9924 {
9925 int tmp = standard_80387_constant_p (op1);
9926 if (tmp == 0)
9927 op1 = validize_mem (force_const_mem (op_mode, op1));
9928 else if (tmp == 1)
9929 {
9930 if (TARGET_CMOVE)
9931 op1 = force_reg (op_mode, op1);
9932 }
9933 else
9934 op1 = force_reg (op_mode, op1);
9935 }
9936 }
9937
9938 /* Try to rearrange the comparison to make it cheaper. */
9939 if (ix86_fp_comparison_cost (code)
9940 > ix86_fp_comparison_cost (swap_condition (code))
9941 && (GET_CODE (op1) == REG || !no_new_pseudos))
9942 {
9943 rtx tmp;
9944 tmp = op0, op0 = op1, op1 = tmp;
9945 code = swap_condition (code);
9946 if (GET_CODE (op0) != REG)
9947 op0 = force_reg (op_mode, op0);
9948 }
9949
9950 *pop0 = op0;
9951 *pop1 = op1;
9952 return code;
9953 }
9954
9955 /* Convert comparison codes we use to represent FP comparison to integer
9956 code that will result in proper branch. Return UNKNOWN if no such code
9957 is available. */
9958
9959 enum rtx_code
9960 ix86_fp_compare_code_to_integer (enum rtx_code code)
9961 {
9962 switch (code)
9963 {
9964 case GT:
9965 return GTU;
9966 case GE:
9967 return GEU;
9968 case ORDERED:
9969 case UNORDERED:
9970 return code;
9971 break;
9972 case UNEQ:
9973 return EQ;
9974 break;
9975 case UNLT:
9976 return LTU;
9977 break;
9978 case UNLE:
9979 return LEU;
9980 break;
9981 case LTGT:
9982 return NE;
9983 break;
9984 default:
9985 return UNKNOWN;
9986 }
9987 }
9988
9989 /* Split comparison code CODE into comparisons we can do using branch
9990 instructions. BYPASS_CODE is comparison code for branch that will
9991 branch around FIRST_CODE and SECOND_CODE. If some of branches
9992 is not required, set value to UNKNOWN.
9993 We never require more than two branches. */
9994
9995 void
9996 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
9997 enum rtx_code *first_code,
9998 enum rtx_code *second_code)
9999 {
10000 *first_code = code;
10001 *bypass_code = UNKNOWN;
10002 *second_code = UNKNOWN;
10003
10004 /* The fcomi comparison sets flags as follows:
10005
10006 cmp ZF PF CF
10007 > 0 0 0
10008 < 0 0 1
10009 = 1 0 0
10010 un 1 1 1 */
10011
10012 switch (code)
10013 {
10014 case GT: /* GTU - CF=0 & ZF=0 */
10015 case GE: /* GEU - CF=0 */
10016 case ORDERED: /* PF=0 */
10017 case UNORDERED: /* PF=1 */
10018 case UNEQ: /* EQ - ZF=1 */
10019 case UNLT: /* LTU - CF=1 */
10020 case UNLE: /* LEU - CF=1 | ZF=1 */
10021 case LTGT: /* EQ - ZF=0 */
10022 break;
10023 case LT: /* LTU - CF=1 - fails on unordered */
10024 *first_code = UNLT;
10025 *bypass_code = UNORDERED;
10026 break;
10027 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
10028 *first_code = UNLE;
10029 *bypass_code = UNORDERED;
10030 break;
10031 case EQ: /* EQ - ZF=1 - fails on unordered */
10032 *first_code = UNEQ;
10033 *bypass_code = UNORDERED;
10034 break;
10035 case NE: /* NE - ZF=0 - fails on unordered */
10036 *first_code = LTGT;
10037 *second_code = UNORDERED;
10038 break;
10039 case UNGE: /* GEU - CF=0 - fails on unordered */
10040 *first_code = GE;
10041 *second_code = UNORDERED;
10042 break;
10043 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
10044 *first_code = GT;
10045 *second_code = UNORDERED;
10046 break;
10047 default:
10048 gcc_unreachable ();
10049 }
10050 if (!TARGET_IEEE_FP)
10051 {
10052 *second_code = UNKNOWN;
10053 *bypass_code = UNKNOWN;
10054 }
10055 }
10056
10057 /* Return cost of comparison done fcom + arithmetics operations on AX.
10058 All following functions do use number of instructions as a cost metrics.
10059 In future this should be tweaked to compute bytes for optimize_size and
10060 take into account performance of various instructions on various CPUs. */
10061 static int
10062 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10063 {
10064 if (!TARGET_IEEE_FP)
10065 return 4;
10066 /* The cost of code output by ix86_expand_fp_compare. */
10067 switch (code)
10068 {
10069 case UNLE:
10070 case UNLT:
10071 case LTGT:
10072 case GT:
10073 case GE:
10074 case UNORDERED:
10075 case ORDERED:
10076 case UNEQ:
10077 return 4;
10078 break;
10079 case LT:
10080 case NE:
10081 case EQ:
10082 case UNGE:
10083 return 5;
10084 break;
10085 case LE:
10086 case UNGT:
10087 return 6;
10088 break;
10089 default:
10090 gcc_unreachable ();
10091 }
10092 }
10093
10094 /* Return cost of comparison done using fcomi operation.
10095 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10096 static int
10097 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10098 {
10099 enum rtx_code bypass_code, first_code, second_code;
10100 /* Return arbitrarily high cost when instruction is not supported - this
10101 prevents gcc from using it. */
10102 if (!TARGET_CMOVE)
10103 return 1024;
10104 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10105 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10106 }
10107
10108 /* Return cost of comparison done using sahf operation.
10109 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10110 static int
10111 ix86_fp_comparison_sahf_cost (enum rtx_code code)
10112 {
10113 enum rtx_code bypass_code, first_code, second_code;
10114 /* Return arbitrarily high cost when instruction is not preferred - this
10115 avoids gcc from using it. */
10116 if (!TARGET_USE_SAHF && !optimize_size)
10117 return 1024;
10118 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10119 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10120 }
10121
10122 /* Compute cost of the comparison done using any method.
10123 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10124 static int
10125 ix86_fp_comparison_cost (enum rtx_code code)
10126 {
10127 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10128 int min;
10129
10130 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10131 sahf_cost = ix86_fp_comparison_sahf_cost (code);
10132
10133 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10134 if (min > sahf_cost)
10135 min = sahf_cost;
10136 if (min > fcomi_cost)
10137 min = fcomi_cost;
10138 return min;
10139 }
10140
10141 /* Generate insn patterns to do a floating point compare of OPERANDS. */
10142
10143 static rtx
10144 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
10145 rtx *second_test, rtx *bypass_test)
10146 {
10147 enum machine_mode fpcmp_mode, intcmp_mode;
10148 rtx tmp, tmp2;
10149 int cost = ix86_fp_comparison_cost (code);
10150 enum rtx_code bypass_code, first_code, second_code;
10151
10152 fpcmp_mode = ix86_fp_compare_mode (code);
10153 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
10154
10155 if (second_test)
10156 *second_test = NULL_RTX;
10157 if (bypass_test)
10158 *bypass_test = NULL_RTX;
10159
10160 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10161
10162 /* Do fcomi/sahf based test when profitable. */
10163 if ((bypass_code == UNKNOWN || bypass_test)
10164 && (second_code == UNKNOWN || second_test)
10165 && ix86_fp_comparison_arithmetics_cost (code) > cost)
10166 {
10167 if (TARGET_CMOVE)
10168 {
10169 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10170 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
10171 tmp);
10172 emit_insn (tmp);
10173 }
10174 else
10175 {
10176 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10177 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10178 if (!scratch)
10179 scratch = gen_reg_rtx (HImode);
10180 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10181 emit_insn (gen_x86_sahf_1 (scratch));
10182 }
10183
10184 /* The FP codes work out to act like unsigned. */
10185 intcmp_mode = fpcmp_mode;
10186 code = first_code;
10187 if (bypass_code != UNKNOWN)
10188 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
10189 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10190 const0_rtx);
10191 if (second_code != UNKNOWN)
10192 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
10193 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10194 const0_rtx);
10195 }
10196 else
10197 {
10198 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10199 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10200 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10201 if (!scratch)
10202 scratch = gen_reg_rtx (HImode);
10203 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10204
10205 /* In the unordered case, we have to check C2 for NaN's, which
10206 doesn't happen to work out to anything nice combination-wise.
10207 So do some bit twiddling on the value we've got in AH to come
10208 up with an appropriate set of condition codes. */
10209
10210 intcmp_mode = CCNOmode;
10211 switch (code)
10212 {
10213 case GT:
10214 case UNGT:
10215 if (code == GT || !TARGET_IEEE_FP)
10216 {
10217 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10218 code = EQ;
10219 }
10220 else
10221 {
10222 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10223 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10224 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
10225 intcmp_mode = CCmode;
10226 code = GEU;
10227 }
10228 break;
10229 case LT:
10230 case UNLT:
10231 if (code == LT && TARGET_IEEE_FP)
10232 {
10233 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10234 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
10235 intcmp_mode = CCmode;
10236 code = EQ;
10237 }
10238 else
10239 {
10240 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
10241 code = NE;
10242 }
10243 break;
10244 case GE:
10245 case UNGE:
10246 if (code == GE || !TARGET_IEEE_FP)
10247 {
10248 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
10249 code = EQ;
10250 }
10251 else
10252 {
10253 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10254 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10255 GEN_INT (0x01)));
10256 code = NE;
10257 }
10258 break;
10259 case LE:
10260 case UNLE:
10261 if (code == LE && TARGET_IEEE_FP)
10262 {
10263 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10264 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10265 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10266 intcmp_mode = CCmode;
10267 code = LTU;
10268 }
10269 else
10270 {
10271 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10272 code = NE;
10273 }
10274 break;
10275 case EQ:
10276 case UNEQ:
10277 if (code == EQ && TARGET_IEEE_FP)
10278 {
10279 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10280 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10281 intcmp_mode = CCmode;
10282 code = EQ;
10283 }
10284 else
10285 {
10286 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10287 code = NE;
10288 break;
10289 }
10290 break;
10291 case NE:
10292 case LTGT:
10293 if (code == NE && TARGET_IEEE_FP)
10294 {
10295 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10296 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10297 GEN_INT (0x40)));
10298 code = NE;
10299 }
10300 else
10301 {
10302 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10303 code = EQ;
10304 }
10305 break;
10306
10307 case UNORDERED:
10308 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10309 code = NE;
10310 break;
10311 case ORDERED:
10312 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10313 code = EQ;
10314 break;
10315
10316 default:
10317 gcc_unreachable ();
10318 }
10319 }
10320
10321 /* Return the test that should be put into the flags user, i.e.
10322 the bcc, scc, or cmov instruction. */
10323 return gen_rtx_fmt_ee (code, VOIDmode,
10324 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10325 const0_rtx);
10326 }
10327
10328 rtx
10329 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
10330 {
10331 rtx op0, op1, ret;
10332 op0 = ix86_compare_op0;
10333 op1 = ix86_compare_op1;
10334
10335 if (second_test)
10336 *second_test = NULL_RTX;
10337 if (bypass_test)
10338 *bypass_test = NULL_RTX;
10339
10340 if (ix86_compare_emitted)
10341 {
10342 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
10343 ix86_compare_emitted = NULL_RTX;
10344 }
10345 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10346 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10347 second_test, bypass_test);
10348 else
10349 ret = ix86_expand_int_compare (code, op0, op1);
10350
10351 return ret;
10352 }
10353
10354 /* Return true if the CODE will result in nontrivial jump sequence. */
10355 bool
10356 ix86_fp_jump_nontrivial_p (enum rtx_code code)
10357 {
10358 enum rtx_code bypass_code, first_code, second_code;
10359 if (!TARGET_CMOVE)
10360 return true;
10361 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10362 return bypass_code != UNKNOWN || second_code != UNKNOWN;
10363 }
10364
10365 void
10366 ix86_expand_branch (enum rtx_code code, rtx label)
10367 {
10368 rtx tmp;
10369
10370 /* If we have emitted a compare insn, go straight to simple.
10371 ix86_expand_compare won't emit anything if ix86_compare_emitted
10372 is non NULL. */
10373 if (ix86_compare_emitted)
10374 goto simple;
10375
10376 switch (GET_MODE (ix86_compare_op0))
10377 {
10378 case QImode:
10379 case HImode:
10380 case SImode:
10381 simple:
10382 tmp = ix86_expand_compare (code, NULL, NULL);
10383 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10384 gen_rtx_LABEL_REF (VOIDmode, label),
10385 pc_rtx);
10386 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10387 return;
10388
10389 case SFmode:
10390 case DFmode:
10391 case XFmode:
10392 {
10393 rtvec vec;
10394 int use_fcomi;
10395 enum rtx_code bypass_code, first_code, second_code;
10396
10397 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
10398 &ix86_compare_op1);
10399
10400 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10401
10402 /* Check whether we will use the natural sequence with one jump. If
10403 so, we can expand jump early. Otherwise delay expansion by
10404 creating compound insn to not confuse optimizers. */
10405 if (bypass_code == UNKNOWN && second_code == UNKNOWN
10406 && TARGET_CMOVE)
10407 {
10408 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
10409 gen_rtx_LABEL_REF (VOIDmode, label),
10410 pc_rtx, NULL_RTX, NULL_RTX);
10411 }
10412 else
10413 {
10414 tmp = gen_rtx_fmt_ee (code, VOIDmode,
10415 ix86_compare_op0, ix86_compare_op1);
10416 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10417 gen_rtx_LABEL_REF (VOIDmode, label),
10418 pc_rtx);
10419 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
10420
10421 use_fcomi = ix86_use_fcomi_compare (code);
10422 vec = rtvec_alloc (3 + !use_fcomi);
10423 RTVEC_ELT (vec, 0) = tmp;
10424 RTVEC_ELT (vec, 1)
10425 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
10426 RTVEC_ELT (vec, 2)
10427 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
10428 if (! use_fcomi)
10429 RTVEC_ELT (vec, 3)
10430 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
10431
10432 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
10433 }
10434 return;
10435 }
10436
10437 case DImode:
10438 if (TARGET_64BIT)
10439 goto simple;
10440 case TImode:
10441 /* Expand DImode branch into multiple compare+branch. */
10442 {
10443 rtx lo[2], hi[2], label2;
10444 enum rtx_code code1, code2, code3;
10445 enum machine_mode submode;
10446
10447 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
10448 {
10449 tmp = ix86_compare_op0;
10450 ix86_compare_op0 = ix86_compare_op1;
10451 ix86_compare_op1 = tmp;
10452 code = swap_condition (code);
10453 }
10454 if (GET_MODE (ix86_compare_op0) == DImode)
10455 {
10456 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
10457 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
10458 submode = SImode;
10459 }
10460 else
10461 {
10462 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
10463 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
10464 submode = DImode;
10465 }
10466
10467 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10468 avoid two branches. This costs one extra insn, so disable when
10469 optimizing for size. */
10470
10471 if ((code == EQ || code == NE)
10472 && (!optimize_size
10473 || hi[1] == const0_rtx || lo[1] == const0_rtx))
10474 {
10475 rtx xor0, xor1;
10476
10477 xor1 = hi[0];
10478 if (hi[1] != const0_rtx)
10479 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
10480 NULL_RTX, 0, OPTAB_WIDEN);
10481
10482 xor0 = lo[0];
10483 if (lo[1] != const0_rtx)
10484 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
10485 NULL_RTX, 0, OPTAB_WIDEN);
10486
10487 tmp = expand_binop (submode, ior_optab, xor1, xor0,
10488 NULL_RTX, 0, OPTAB_WIDEN);
10489
10490 ix86_compare_op0 = tmp;
10491 ix86_compare_op1 = const0_rtx;
10492 ix86_expand_branch (code, label);
10493 return;
10494 }
10495
10496 /* Otherwise, if we are doing less-than or greater-or-equal-than,
10497 op1 is a constant and the low word is zero, then we can just
10498 examine the high word. */
10499
10500 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
10501 switch (code)
10502 {
10503 case LT: case LTU: case GE: case GEU:
10504 ix86_compare_op0 = hi[0];
10505 ix86_compare_op1 = hi[1];
10506 ix86_expand_branch (code, label);
10507 return;
10508 default:
10509 break;
10510 }
10511
10512 /* Otherwise, we need two or three jumps. */
10513
10514 label2 = gen_label_rtx ();
10515
10516 code1 = code;
10517 code2 = swap_condition (code);
10518 code3 = unsigned_condition (code);
10519
10520 switch (code)
10521 {
10522 case LT: case GT: case LTU: case GTU:
10523 break;
10524
10525 case LE: code1 = LT; code2 = GT; break;
10526 case GE: code1 = GT; code2 = LT; break;
10527 case LEU: code1 = LTU; code2 = GTU; break;
10528 case GEU: code1 = GTU; code2 = LTU; break;
10529
10530 case EQ: code1 = UNKNOWN; code2 = NE; break;
10531 case NE: code2 = UNKNOWN; break;
10532
10533 default:
10534 gcc_unreachable ();
10535 }
10536
10537 /*
10538 * a < b =>
10539 * if (hi(a) < hi(b)) goto true;
10540 * if (hi(a) > hi(b)) goto false;
10541 * if (lo(a) < lo(b)) goto true;
10542 * false:
10543 */
10544
10545 ix86_compare_op0 = hi[0];
10546 ix86_compare_op1 = hi[1];
10547
10548 if (code1 != UNKNOWN)
10549 ix86_expand_branch (code1, label);
10550 if (code2 != UNKNOWN)
10551 ix86_expand_branch (code2, label2);
10552
10553 ix86_compare_op0 = lo[0];
10554 ix86_compare_op1 = lo[1];
10555 ix86_expand_branch (code3, label);
10556
10557 if (code2 != UNKNOWN)
10558 emit_label (label2);
10559 return;
10560 }
10561
10562 default:
10563 gcc_unreachable ();
10564 }
10565 }
10566
10567 /* Split branch based on floating point condition. */
10568 void
10569 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
10570 rtx target1, rtx target2, rtx tmp, rtx pushed)
10571 {
10572 rtx second, bypass;
10573 rtx label = NULL_RTX;
10574 rtx condition;
10575 int bypass_probability = -1, second_probability = -1, probability = -1;
10576 rtx i;
10577
10578 if (target2 != pc_rtx)
10579 {
10580 rtx tmp = target2;
10581 code = reverse_condition_maybe_unordered (code);
10582 target2 = target1;
10583 target1 = tmp;
10584 }
10585
10586 condition = ix86_expand_fp_compare (code, op1, op2,
10587 tmp, &second, &bypass);
10588
10589 /* Remove pushed operand from stack. */
10590 if (pushed)
10591 ix86_free_from_memory (GET_MODE (pushed));
10592
10593 if (split_branch_probability >= 0)
10594 {
10595 /* Distribute the probabilities across the jumps.
10596 Assume the BYPASS and SECOND to be always test
10597 for UNORDERED. */
10598 probability = split_branch_probability;
10599
10600 /* Value of 1 is low enough to make no need for probability
10601 to be updated. Later we may run some experiments and see
10602 if unordered values are more frequent in practice. */
10603 if (bypass)
10604 bypass_probability = 1;
10605 if (second)
10606 second_probability = 1;
10607 }
10608 if (bypass != NULL_RTX)
10609 {
10610 label = gen_label_rtx ();
10611 i = emit_jump_insn (gen_rtx_SET
10612 (VOIDmode, pc_rtx,
10613 gen_rtx_IF_THEN_ELSE (VOIDmode,
10614 bypass,
10615 gen_rtx_LABEL_REF (VOIDmode,
10616 label),
10617 pc_rtx)));
10618 if (bypass_probability >= 0)
10619 REG_NOTES (i)
10620 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10621 GEN_INT (bypass_probability),
10622 REG_NOTES (i));
10623 }
10624 i = emit_jump_insn (gen_rtx_SET
10625 (VOIDmode, pc_rtx,
10626 gen_rtx_IF_THEN_ELSE (VOIDmode,
10627 condition, target1, target2)));
10628 if (probability >= 0)
10629 REG_NOTES (i)
10630 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10631 GEN_INT (probability),
10632 REG_NOTES (i));
10633 if (second != NULL_RTX)
10634 {
10635 i = emit_jump_insn (gen_rtx_SET
10636 (VOIDmode, pc_rtx,
10637 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
10638 target2)));
10639 if (second_probability >= 0)
10640 REG_NOTES (i)
10641 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10642 GEN_INT (second_probability),
10643 REG_NOTES (i));
10644 }
10645 if (label != NULL_RTX)
10646 emit_label (label);
10647 }
10648
10649 int
10650 ix86_expand_setcc (enum rtx_code code, rtx dest)
10651 {
10652 rtx ret, tmp, tmpreg, equiv;
10653 rtx second_test, bypass_test;
10654
10655 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
10656 return 0; /* FAIL */
10657
10658 gcc_assert (GET_MODE (dest) == QImode);
10659
10660 ret = ix86_expand_compare (code, &second_test, &bypass_test);
10661 PUT_MODE (ret, QImode);
10662
10663 tmp = dest;
10664 tmpreg = dest;
10665
10666 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
10667 if (bypass_test || second_test)
10668 {
10669 rtx test = second_test;
10670 int bypass = 0;
10671 rtx tmp2 = gen_reg_rtx (QImode);
10672 if (bypass_test)
10673 {
10674 gcc_assert (!second_test);
10675 test = bypass_test;
10676 bypass = 1;
10677 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
10678 }
10679 PUT_MODE (test, QImode);
10680 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
10681
10682 if (bypass)
10683 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
10684 else
10685 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
10686 }
10687
10688 /* Attach a REG_EQUAL note describing the comparison result. */
10689 if (ix86_compare_op0 && ix86_compare_op1)
10690 {
10691 equiv = simplify_gen_relational (code, QImode,
10692 GET_MODE (ix86_compare_op0),
10693 ix86_compare_op0, ix86_compare_op1);
10694 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
10695 }
10696
10697 return 1; /* DONE */
10698 }
10699
10700 /* Expand comparison setting or clearing carry flag. Return true when
10701 successful and set pop for the operation. */
10702 static bool
10703 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
10704 {
10705 enum machine_mode mode =
10706 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
10707
10708 /* Do not handle DImode compares that go through special path. Also we can't
10709 deal with FP compares yet. This is possible to add. */
10710 if (mode == (TARGET_64BIT ? TImode : DImode))
10711 return false;
10712 if (FLOAT_MODE_P (mode))
10713 {
10714 rtx second_test = NULL, bypass_test = NULL;
10715 rtx compare_op, compare_seq;
10716
10717 /* Shortcut: following common codes never translate into carry flag compares. */
10718 if (code == EQ || code == NE || code == UNEQ || code == LTGT
10719 || code == ORDERED || code == UNORDERED)
10720 return false;
10721
10722 /* These comparisons require zero flag; swap operands so they won't. */
10723 if ((code == GT || code == UNLE || code == LE || code == UNGT)
10724 && !TARGET_IEEE_FP)
10725 {
10726 rtx tmp = op0;
10727 op0 = op1;
10728 op1 = tmp;
10729 code = swap_condition (code);
10730 }
10731
10732 /* Try to expand the comparison and verify that we end up with carry flag
10733 based comparison. This is fails to be true only when we decide to expand
10734 comparison using arithmetic that is not too common scenario. */
10735 start_sequence ();
10736 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10737 &second_test, &bypass_test);
10738 compare_seq = get_insns ();
10739 end_sequence ();
10740
10741 if (second_test || bypass_test)
10742 return false;
10743 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10744 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10745 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
10746 else
10747 code = GET_CODE (compare_op);
10748 if (code != LTU && code != GEU)
10749 return false;
10750 emit_insn (compare_seq);
10751 *pop = compare_op;
10752 return true;
10753 }
10754 if (!INTEGRAL_MODE_P (mode))
10755 return false;
10756 switch (code)
10757 {
10758 case LTU:
10759 case GEU:
10760 break;
10761
10762 /* Convert a==0 into (unsigned)a<1. */
10763 case EQ:
10764 case NE:
10765 if (op1 != const0_rtx)
10766 return false;
10767 op1 = const1_rtx;
10768 code = (code == EQ ? LTU : GEU);
10769 break;
10770
10771 /* Convert a>b into b<a or a>=b-1. */
10772 case GTU:
10773 case LEU:
10774 if (GET_CODE (op1) == CONST_INT)
10775 {
10776 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
10777 /* Bail out on overflow. We still can swap operands but that
10778 would force loading of the constant into register. */
10779 if (op1 == const0_rtx
10780 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
10781 return false;
10782 code = (code == GTU ? GEU : LTU);
10783 }
10784 else
10785 {
10786 rtx tmp = op1;
10787 op1 = op0;
10788 op0 = tmp;
10789 code = (code == GTU ? LTU : GEU);
10790 }
10791 break;
10792
10793 /* Convert a>=0 into (unsigned)a<0x80000000. */
10794 case LT:
10795 case GE:
10796 if (mode == DImode || op1 != const0_rtx)
10797 return false;
10798 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10799 code = (code == LT ? GEU : LTU);
10800 break;
10801 case LE:
10802 case GT:
10803 if (mode == DImode || op1 != constm1_rtx)
10804 return false;
10805 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10806 code = (code == LE ? GEU : LTU);
10807 break;
10808
10809 default:
10810 return false;
10811 }
10812 /* Swapping operands may cause constant to appear as first operand. */
10813 if (!nonimmediate_operand (op0, VOIDmode))
10814 {
10815 if (no_new_pseudos)
10816 return false;
10817 op0 = force_reg (mode, op0);
10818 }
10819 ix86_compare_op0 = op0;
10820 ix86_compare_op1 = op1;
10821 *pop = ix86_expand_compare (code, NULL, NULL);
10822 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
10823 return true;
10824 }
10825
10826 int
10827 ix86_expand_int_movcc (rtx operands[])
10828 {
10829 enum rtx_code code = GET_CODE (operands[1]), compare_code;
10830 rtx compare_seq, compare_op;
10831 rtx second_test, bypass_test;
10832 enum machine_mode mode = GET_MODE (operands[0]);
10833 bool sign_bit_compare_p = false;;
10834
10835 start_sequence ();
10836 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10837 compare_seq = get_insns ();
10838 end_sequence ();
10839
10840 compare_code = GET_CODE (compare_op);
10841
10842 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
10843 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
10844 sign_bit_compare_p = true;
10845
10846 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
10847 HImode insns, we'd be swallowed in word prefix ops. */
10848
10849 if ((mode != HImode || TARGET_FAST_PREFIX)
10850 && (mode != (TARGET_64BIT ? TImode : DImode))
10851 && GET_CODE (operands[2]) == CONST_INT
10852 && GET_CODE (operands[3]) == CONST_INT)
10853 {
10854 rtx out = operands[0];
10855 HOST_WIDE_INT ct = INTVAL (operands[2]);
10856 HOST_WIDE_INT cf = INTVAL (operands[3]);
10857 HOST_WIDE_INT diff;
10858
10859 diff = ct - cf;
10860 /* Sign bit compares are better done using shifts than we do by using
10861 sbb. */
10862 if (sign_bit_compare_p
10863 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10864 ix86_compare_op1, &compare_op))
10865 {
10866 /* Detect overlap between destination and compare sources. */
10867 rtx tmp = out;
10868
10869 if (!sign_bit_compare_p)
10870 {
10871 bool fpcmp = false;
10872
10873 compare_code = GET_CODE (compare_op);
10874
10875 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10876 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10877 {
10878 fpcmp = true;
10879 compare_code = ix86_fp_compare_code_to_integer (compare_code);
10880 }
10881
10882 /* To simplify rest of code, restrict to the GEU case. */
10883 if (compare_code == LTU)
10884 {
10885 HOST_WIDE_INT tmp = ct;
10886 ct = cf;
10887 cf = tmp;
10888 compare_code = reverse_condition (compare_code);
10889 code = reverse_condition (code);
10890 }
10891 else
10892 {
10893 if (fpcmp)
10894 PUT_CODE (compare_op,
10895 reverse_condition_maybe_unordered
10896 (GET_CODE (compare_op)));
10897 else
10898 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10899 }
10900 diff = ct - cf;
10901
10902 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
10903 || reg_overlap_mentioned_p (out, ix86_compare_op1))
10904 tmp = gen_reg_rtx (mode);
10905
10906 if (mode == DImode)
10907 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
10908 else
10909 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
10910 }
10911 else
10912 {
10913 if (code == GT || code == GE)
10914 code = reverse_condition (code);
10915 else
10916 {
10917 HOST_WIDE_INT tmp = ct;
10918 ct = cf;
10919 cf = tmp;
10920 diff = ct - cf;
10921 }
10922 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
10923 ix86_compare_op1, VOIDmode, 0, -1);
10924 }
10925
10926 if (diff == 1)
10927 {
10928 /*
10929 * cmpl op0,op1
10930 * sbbl dest,dest
10931 * [addl dest, ct]
10932 *
10933 * Size 5 - 8.
10934 */
10935 if (ct)
10936 tmp = expand_simple_binop (mode, PLUS,
10937 tmp, GEN_INT (ct),
10938 copy_rtx (tmp), 1, OPTAB_DIRECT);
10939 }
10940 else if (cf == -1)
10941 {
10942 /*
10943 * cmpl op0,op1
10944 * sbbl dest,dest
10945 * orl $ct, dest
10946 *
10947 * Size 8.
10948 */
10949 tmp = expand_simple_binop (mode, IOR,
10950 tmp, GEN_INT (ct),
10951 copy_rtx (tmp), 1, OPTAB_DIRECT);
10952 }
10953 else if (diff == -1 && ct)
10954 {
10955 /*
10956 * cmpl op0,op1
10957 * sbbl dest,dest
10958 * notl dest
10959 * [addl dest, cf]
10960 *
10961 * Size 8 - 11.
10962 */
10963 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10964 if (cf)
10965 tmp = expand_simple_binop (mode, PLUS,
10966 copy_rtx (tmp), GEN_INT (cf),
10967 copy_rtx (tmp), 1, OPTAB_DIRECT);
10968 }
10969 else
10970 {
10971 /*
10972 * cmpl op0,op1
10973 * sbbl dest,dest
10974 * [notl dest]
10975 * andl cf - ct, dest
10976 * [addl dest, ct]
10977 *
10978 * Size 8 - 11.
10979 */
10980
10981 if (cf == 0)
10982 {
10983 cf = ct;
10984 ct = 0;
10985 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10986 }
10987
10988 tmp = expand_simple_binop (mode, AND,
10989 copy_rtx (tmp),
10990 gen_int_mode (cf - ct, mode),
10991 copy_rtx (tmp), 1, OPTAB_DIRECT);
10992 if (ct)
10993 tmp = expand_simple_binop (mode, PLUS,
10994 copy_rtx (tmp), GEN_INT (ct),
10995 copy_rtx (tmp), 1, OPTAB_DIRECT);
10996 }
10997
10998 if (!rtx_equal_p (tmp, out))
10999 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
11000
11001 return 1; /* DONE */
11002 }
11003
11004 if (diff < 0)
11005 {
11006 HOST_WIDE_INT tmp;
11007 tmp = ct, ct = cf, cf = tmp;
11008 diff = -diff;
11009 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11010 {
11011 /* We may be reversing unordered compare to normal compare, that
11012 is not valid in general (we may convert non-trapping condition
11013 to trapping one), however on i386 we currently emit all
11014 comparisons unordered. */
11015 compare_code = reverse_condition_maybe_unordered (compare_code);
11016 code = reverse_condition_maybe_unordered (code);
11017 }
11018 else
11019 {
11020 compare_code = reverse_condition (compare_code);
11021 code = reverse_condition (code);
11022 }
11023 }
11024
11025 compare_code = UNKNOWN;
11026 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
11027 && GET_CODE (ix86_compare_op1) == CONST_INT)
11028 {
11029 if (ix86_compare_op1 == const0_rtx
11030 && (code == LT || code == GE))
11031 compare_code = code;
11032 else if (ix86_compare_op1 == constm1_rtx)
11033 {
11034 if (code == LE)
11035 compare_code = LT;
11036 else if (code == GT)
11037 compare_code = GE;
11038 }
11039 }
11040
11041 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11042 if (compare_code != UNKNOWN
11043 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11044 && (cf == -1 || ct == -1))
11045 {
11046 /* If lea code below could be used, only optimize
11047 if it results in a 2 insn sequence. */
11048
11049 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11050 || diff == 3 || diff == 5 || diff == 9)
11051 || (compare_code == LT && ct == -1)
11052 || (compare_code == GE && cf == -1))
11053 {
11054 /*
11055 * notl op1 (if necessary)
11056 * sarl $31, op1
11057 * orl cf, op1
11058 */
11059 if (ct != -1)
11060 {
11061 cf = ct;
11062 ct = -1;
11063 code = reverse_condition (code);
11064 }
11065
11066 out = emit_store_flag (out, code, ix86_compare_op0,
11067 ix86_compare_op1, VOIDmode, 0, -1);
11068
11069 out = expand_simple_binop (mode, IOR,
11070 out, GEN_INT (cf),
11071 out, 1, OPTAB_DIRECT);
11072 if (out != operands[0])
11073 emit_move_insn (operands[0], out);
11074
11075 return 1; /* DONE */
11076 }
11077 }
11078
11079
11080 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11081 || diff == 3 || diff == 5 || diff == 9)
11082 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11083 && (mode != DImode
11084 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11085 {
11086 /*
11087 * xorl dest,dest
11088 * cmpl op1,op2
11089 * setcc dest
11090 * lea cf(dest*(ct-cf)),dest
11091 *
11092 * Size 14.
11093 *
11094 * This also catches the degenerate setcc-only case.
11095 */
11096
11097 rtx tmp;
11098 int nops;
11099
11100 out = emit_store_flag (out, code, ix86_compare_op0,
11101 ix86_compare_op1, VOIDmode, 0, 1);
11102
11103 nops = 0;
11104 /* On x86_64 the lea instruction operates on Pmode, so we need
11105 to get arithmetics done in proper mode to match. */
11106 if (diff == 1)
11107 tmp = copy_rtx (out);
11108 else
11109 {
11110 rtx out1;
11111 out1 = copy_rtx (out);
11112 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11113 nops++;
11114 if (diff & 1)
11115 {
11116 tmp = gen_rtx_PLUS (mode, tmp, out1);
11117 nops++;
11118 }
11119 }
11120 if (cf != 0)
11121 {
11122 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11123 nops++;
11124 }
11125 if (!rtx_equal_p (tmp, out))
11126 {
11127 if (nops == 1)
11128 out = force_operand (tmp, copy_rtx (out));
11129 else
11130 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
11131 }
11132 if (!rtx_equal_p (out, operands[0]))
11133 emit_move_insn (operands[0], copy_rtx (out));
11134
11135 return 1; /* DONE */
11136 }
11137
11138 /*
11139 * General case: Jumpful:
11140 * xorl dest,dest cmpl op1, op2
11141 * cmpl op1, op2 movl ct, dest
11142 * setcc dest jcc 1f
11143 * decl dest movl cf, dest
11144 * andl (cf-ct),dest 1:
11145 * addl ct,dest
11146 *
11147 * Size 20. Size 14.
11148 *
11149 * This is reasonably steep, but branch mispredict costs are
11150 * high on modern cpus, so consider failing only if optimizing
11151 * for space.
11152 */
11153
11154 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11155 && BRANCH_COST >= 2)
11156 {
11157 if (cf == 0)
11158 {
11159 cf = ct;
11160 ct = 0;
11161 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11162 /* We may be reversing unordered compare to normal compare,
11163 that is not valid in general (we may convert non-trapping
11164 condition to trapping one), however on i386 we currently
11165 emit all comparisons unordered. */
11166 code = reverse_condition_maybe_unordered (code);
11167 else
11168 {
11169 code = reverse_condition (code);
11170 if (compare_code != UNKNOWN)
11171 compare_code = reverse_condition (compare_code);
11172 }
11173 }
11174
11175 if (compare_code != UNKNOWN)
11176 {
11177 /* notl op1 (if needed)
11178 sarl $31, op1
11179 andl (cf-ct), op1
11180 addl ct, op1
11181
11182 For x < 0 (resp. x <= -1) there will be no notl,
11183 so if possible swap the constants to get rid of the
11184 complement.
11185 True/false will be -1/0 while code below (store flag
11186 followed by decrement) is 0/-1, so the constants need
11187 to be exchanged once more. */
11188
11189 if (compare_code == GE || !cf)
11190 {
11191 code = reverse_condition (code);
11192 compare_code = LT;
11193 }
11194 else
11195 {
11196 HOST_WIDE_INT tmp = cf;
11197 cf = ct;
11198 ct = tmp;
11199 }
11200
11201 out = emit_store_flag (out, code, ix86_compare_op0,
11202 ix86_compare_op1, VOIDmode, 0, -1);
11203 }
11204 else
11205 {
11206 out = emit_store_flag (out, code, ix86_compare_op0,
11207 ix86_compare_op1, VOIDmode, 0, 1);
11208
11209 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
11210 copy_rtx (out), 1, OPTAB_DIRECT);
11211 }
11212
11213 out = expand_simple_binop (mode, AND, copy_rtx (out),
11214 gen_int_mode (cf - ct, mode),
11215 copy_rtx (out), 1, OPTAB_DIRECT);
11216 if (ct)
11217 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
11218 copy_rtx (out), 1, OPTAB_DIRECT);
11219 if (!rtx_equal_p (out, operands[0]))
11220 emit_move_insn (operands[0], copy_rtx (out));
11221
11222 return 1; /* DONE */
11223 }
11224 }
11225
11226 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11227 {
11228 /* Try a few things more with specific constants and a variable. */
11229
11230 optab op;
11231 rtx var, orig_out, out, tmp;
11232
11233 if (BRANCH_COST <= 2)
11234 return 0; /* FAIL */
11235
11236 /* If one of the two operands is an interesting constant, load a
11237 constant with the above and mask it in with a logical operation. */
11238
11239 if (GET_CODE (operands[2]) == CONST_INT)
11240 {
11241 var = operands[3];
11242 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
11243 operands[3] = constm1_rtx, op = and_optab;
11244 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
11245 operands[3] = const0_rtx, op = ior_optab;
11246 else
11247 return 0; /* FAIL */
11248 }
11249 else if (GET_CODE (operands[3]) == CONST_INT)
11250 {
11251 var = operands[2];
11252 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
11253 operands[2] = constm1_rtx, op = and_optab;
11254 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
11255 operands[2] = const0_rtx, op = ior_optab;
11256 else
11257 return 0; /* FAIL */
11258 }
11259 else
11260 return 0; /* FAIL */
11261
11262 orig_out = operands[0];
11263 tmp = gen_reg_rtx (mode);
11264 operands[0] = tmp;
11265
11266 /* Recurse to get the constant loaded. */
11267 if (ix86_expand_int_movcc (operands) == 0)
11268 return 0; /* FAIL */
11269
11270 /* Mask in the interesting variable. */
11271 out = expand_binop (mode, op, var, tmp, orig_out, 0,
11272 OPTAB_WIDEN);
11273 if (!rtx_equal_p (out, orig_out))
11274 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
11275
11276 return 1; /* DONE */
11277 }
11278
11279 /*
11280 * For comparison with above,
11281 *
11282 * movl cf,dest
11283 * movl ct,tmp
11284 * cmpl op1,op2
11285 * cmovcc tmp,dest
11286 *
11287 * Size 15.
11288 */
11289
11290 if (! nonimmediate_operand (operands[2], mode))
11291 operands[2] = force_reg (mode, operands[2]);
11292 if (! nonimmediate_operand (operands[3], mode))
11293 operands[3] = force_reg (mode, operands[3]);
11294
11295 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11296 {
11297 rtx tmp = gen_reg_rtx (mode);
11298 emit_move_insn (tmp, operands[3]);
11299 operands[3] = tmp;
11300 }
11301 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11302 {
11303 rtx tmp = gen_reg_rtx (mode);
11304 emit_move_insn (tmp, operands[2]);
11305 operands[2] = tmp;
11306 }
11307
11308 if (! register_operand (operands[2], VOIDmode)
11309 && (mode == QImode
11310 || ! register_operand (operands[3], VOIDmode)))
11311 operands[2] = force_reg (mode, operands[2]);
11312
11313 if (mode == QImode
11314 && ! register_operand (operands[3], VOIDmode))
11315 operands[3] = force_reg (mode, operands[3]);
11316
11317 emit_insn (compare_seq);
11318 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11319 gen_rtx_IF_THEN_ELSE (mode,
11320 compare_op, operands[2],
11321 operands[3])));
11322 if (bypass_test)
11323 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11324 gen_rtx_IF_THEN_ELSE (mode,
11325 bypass_test,
11326 copy_rtx (operands[3]),
11327 copy_rtx (operands[0]))));
11328 if (second_test)
11329 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11330 gen_rtx_IF_THEN_ELSE (mode,
11331 second_test,
11332 copy_rtx (operands[2]),
11333 copy_rtx (operands[0]))));
11334
11335 return 1; /* DONE */
11336 }
11337
11338 /* Swap, force into registers, or otherwise massage the two operands
11339 to an sse comparison with a mask result. Thus we differ a bit from
11340 ix86_prepare_fp_compare_args which expects to produce a flags result.
11341
11342 The DEST operand exists to help determine whether to commute commutative
11343 operators. The POP0/POP1 operands are updated in place. The new
11344 comparison code is returned, or UNKNOWN if not implementable. */
11345
11346 static enum rtx_code
11347 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
11348 rtx *pop0, rtx *pop1)
11349 {
11350 rtx tmp;
11351
11352 switch (code)
11353 {
11354 case LTGT:
11355 case UNEQ:
11356 /* We have no LTGT as an operator. We could implement it with
11357 NE & ORDERED, but this requires an extra temporary. It's
11358 not clear that it's worth it. */
11359 return UNKNOWN;
11360
11361 case LT:
11362 case LE:
11363 case UNGT:
11364 case UNGE:
11365 /* These are supported directly. */
11366 break;
11367
11368 case EQ:
11369 case NE:
11370 case UNORDERED:
11371 case ORDERED:
11372 /* For commutative operators, try to canonicalize the destination
11373 operand to be first in the comparison - this helps reload to
11374 avoid extra moves. */
11375 if (!dest || !rtx_equal_p (dest, *pop1))
11376 break;
11377 /* FALLTHRU */
11378
11379 case GE:
11380 case GT:
11381 case UNLE:
11382 case UNLT:
11383 /* These are not supported directly. Swap the comparison operands
11384 to transform into something that is supported. */
11385 tmp = *pop0;
11386 *pop0 = *pop1;
11387 *pop1 = tmp;
11388 code = swap_condition (code);
11389 break;
11390
11391 default:
11392 gcc_unreachable ();
11393 }
11394
11395 return code;
11396 }
11397
11398 /* Detect conditional moves that exactly match min/max operational
11399 semantics. Note that this is IEEE safe, as long as we don't
11400 interchange the operands.
11401
11402 Returns FALSE if this conditional move doesn't match a MIN/MAX,
11403 and TRUE if the operation is successful and instructions are emitted. */
11404
11405 static bool
11406 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
11407 rtx cmp_op1, rtx if_true, rtx if_false)
11408 {
11409 enum machine_mode mode;
11410 bool is_min;
11411 rtx tmp;
11412
11413 if (code == LT)
11414 ;
11415 else if (code == UNGE)
11416 {
11417 tmp = if_true;
11418 if_true = if_false;
11419 if_false = tmp;
11420 }
11421 else
11422 return false;
11423
11424 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
11425 is_min = true;
11426 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
11427 is_min = false;
11428 else
11429 return false;
11430
11431 mode = GET_MODE (dest);
11432
11433 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11434 but MODE may be a vector mode and thus not appropriate. */
11435 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
11436 {
11437 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
11438 rtvec v;
11439
11440 if_true = force_reg (mode, if_true);
11441 v = gen_rtvec (2, if_true, if_false);
11442 tmp = gen_rtx_UNSPEC (mode, v, u);
11443 }
11444 else
11445 {
11446 code = is_min ? SMIN : SMAX;
11447 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
11448 }
11449
11450 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
11451 return true;
11452 }
11453
11454 /* Expand an sse vector comparison. Return the register with the result. */
11455
11456 static rtx
11457 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
11458 rtx op_true, rtx op_false)
11459 {
11460 enum machine_mode mode = GET_MODE (dest);
11461 rtx x;
11462
11463 cmp_op0 = force_reg (mode, cmp_op0);
11464 if (!nonimmediate_operand (cmp_op1, mode))
11465 cmp_op1 = force_reg (mode, cmp_op1);
11466
11467 if (optimize
11468 || reg_overlap_mentioned_p (dest, op_true)
11469 || reg_overlap_mentioned_p (dest, op_false))
11470 dest = gen_reg_rtx (mode);
11471
11472 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
11473 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11474
11475 return dest;
11476 }
11477
11478 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11479 operations. This is used for both scalar and vector conditional moves. */
11480
11481 static void
11482 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
11483 {
11484 enum machine_mode mode = GET_MODE (dest);
11485 rtx t2, t3, x;
11486
11487 if (op_false == CONST0_RTX (mode))
11488 {
11489 op_true = force_reg (mode, op_true);
11490 x = gen_rtx_AND (mode, cmp, op_true);
11491 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11492 }
11493 else if (op_true == CONST0_RTX (mode))
11494 {
11495 op_false = force_reg (mode, op_false);
11496 x = gen_rtx_NOT (mode, cmp);
11497 x = gen_rtx_AND (mode, x, op_false);
11498 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11499 }
11500 else
11501 {
11502 op_true = force_reg (mode, op_true);
11503 op_false = force_reg (mode, op_false);
11504
11505 t2 = gen_reg_rtx (mode);
11506 if (optimize)
11507 t3 = gen_reg_rtx (mode);
11508 else
11509 t3 = dest;
11510
11511 x = gen_rtx_AND (mode, op_true, cmp);
11512 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
11513
11514 x = gen_rtx_NOT (mode, cmp);
11515 x = gen_rtx_AND (mode, x, op_false);
11516 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
11517
11518 x = gen_rtx_IOR (mode, t3, t2);
11519 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11520 }
11521 }
11522
11523 /* Expand a floating-point conditional move. Return true if successful. */
11524
11525 int
11526 ix86_expand_fp_movcc (rtx operands[])
11527 {
11528 enum machine_mode mode = GET_MODE (operands[0]);
11529 enum rtx_code code = GET_CODE (operands[1]);
11530 rtx tmp, compare_op, second_test, bypass_test;
11531
11532 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
11533 {
11534 enum machine_mode cmode;
11535
11536 /* Since we've no cmove for sse registers, don't force bad register
11537 allocation just to gain access to it. Deny movcc when the
11538 comparison mode doesn't match the move mode. */
11539 cmode = GET_MODE (ix86_compare_op0);
11540 if (cmode == VOIDmode)
11541 cmode = GET_MODE (ix86_compare_op1);
11542 if (cmode != mode)
11543 return 0;
11544
11545 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11546 &ix86_compare_op0,
11547 &ix86_compare_op1);
11548 if (code == UNKNOWN)
11549 return 0;
11550
11551 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
11552 ix86_compare_op1, operands[2],
11553 operands[3]))
11554 return 1;
11555
11556 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
11557 ix86_compare_op1, operands[2], operands[3]);
11558 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
11559 return 1;
11560 }
11561
11562 /* The floating point conditional move instructions don't directly
11563 support conditions resulting from a signed integer comparison. */
11564
11565 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11566
11567 /* The floating point conditional move instructions don't directly
11568 support signed integer comparisons. */
11569
11570 if (!fcmov_comparison_operator (compare_op, VOIDmode))
11571 {
11572 gcc_assert (!second_test && !bypass_test);
11573 tmp = gen_reg_rtx (QImode);
11574 ix86_expand_setcc (code, tmp);
11575 code = NE;
11576 ix86_compare_op0 = tmp;
11577 ix86_compare_op1 = const0_rtx;
11578 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11579 }
11580 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11581 {
11582 tmp = gen_reg_rtx (mode);
11583 emit_move_insn (tmp, operands[3]);
11584 operands[3] = tmp;
11585 }
11586 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11587 {
11588 tmp = gen_reg_rtx (mode);
11589 emit_move_insn (tmp, operands[2]);
11590 operands[2] = tmp;
11591 }
11592
11593 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11594 gen_rtx_IF_THEN_ELSE (mode, compare_op,
11595 operands[2], operands[3])));
11596 if (bypass_test)
11597 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11598 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
11599 operands[3], operands[0])));
11600 if (second_test)
11601 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11602 gen_rtx_IF_THEN_ELSE (mode, second_test,
11603 operands[2], operands[0])));
11604
11605 return 1;
11606 }
11607
11608 /* Expand a floating-point vector conditional move; a vcond operation
11609 rather than a movcc operation. */
11610
11611 bool
11612 ix86_expand_fp_vcond (rtx operands[])
11613 {
11614 enum rtx_code code = GET_CODE (operands[3]);
11615 rtx cmp;
11616
11617 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11618 &operands[4], &operands[5]);
11619 if (code == UNKNOWN)
11620 return false;
11621
11622 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
11623 operands[5], operands[1], operands[2]))
11624 return true;
11625
11626 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
11627 operands[1], operands[2]);
11628 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
11629 return true;
11630 }
11631
11632 /* Expand a signed integral vector conditional move. */
11633
11634 bool
11635 ix86_expand_int_vcond (rtx operands[])
11636 {
11637 enum machine_mode mode = GET_MODE (operands[0]);
11638 enum rtx_code code = GET_CODE (operands[3]);
11639 bool negate = false;
11640 rtx x, cop0, cop1;
11641
11642 cop0 = operands[4];
11643 cop1 = operands[5];
11644
11645 /* Canonicalize the comparison to EQ, GT, GTU. */
11646 switch (code)
11647 {
11648 case EQ:
11649 case GT:
11650 case GTU:
11651 break;
11652
11653 case NE:
11654 case LE:
11655 case LEU:
11656 code = reverse_condition (code);
11657 negate = true;
11658 break;
11659
11660 case GE:
11661 case GEU:
11662 code = reverse_condition (code);
11663 negate = true;
11664 /* FALLTHRU */
11665
11666 case LT:
11667 case LTU:
11668 code = swap_condition (code);
11669 x = cop0, cop0 = cop1, cop1 = x;
11670 break;
11671
11672 default:
11673 gcc_unreachable ();
11674 }
11675
11676 /* Unsigned parallel compare is not supported by the hardware. Play some
11677 tricks to turn this into a signed comparison against 0. */
11678 if (code == GTU)
11679 {
11680 cop0 = force_reg (mode, cop0);
11681
11682 switch (mode)
11683 {
11684 case V4SImode:
11685 {
11686 rtx t1, t2, mask;
11687
11688 /* Perform a parallel modulo subtraction. */
11689 t1 = gen_reg_rtx (mode);
11690 emit_insn (gen_subv4si3 (t1, cop0, cop1));
11691
11692 /* Extract the original sign bit of op0. */
11693 mask = GEN_INT (-0x80000000);
11694 mask = gen_rtx_CONST_VECTOR (mode,
11695 gen_rtvec (4, mask, mask, mask, mask));
11696 mask = force_reg (mode, mask);
11697 t2 = gen_reg_rtx (mode);
11698 emit_insn (gen_andv4si3 (t2, cop0, mask));
11699
11700 /* XOR it back into the result of the subtraction. This results
11701 in the sign bit set iff we saw unsigned underflow. */
11702 x = gen_reg_rtx (mode);
11703 emit_insn (gen_xorv4si3 (x, t1, t2));
11704
11705 code = GT;
11706 }
11707 break;
11708
11709 case V16QImode:
11710 case V8HImode:
11711 /* Perform a parallel unsigned saturating subtraction. */
11712 x = gen_reg_rtx (mode);
11713 emit_insn (gen_rtx_SET (VOIDmode, x,
11714 gen_rtx_US_MINUS (mode, cop0, cop1)));
11715
11716 code = EQ;
11717 negate = !negate;
11718 break;
11719
11720 default:
11721 gcc_unreachable ();
11722 }
11723
11724 cop0 = x;
11725 cop1 = CONST0_RTX (mode);
11726 }
11727
11728 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
11729 operands[1+negate], operands[2-negate]);
11730
11731 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
11732 operands[2-negate]);
11733 return true;
11734 }
11735
11736 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
11737 true if we should do zero extension, else sign extension. HIGH_P is
11738 true if we want the N/2 high elements, else the low elements. */
11739
11740 void
11741 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
11742 {
11743 enum machine_mode imode = GET_MODE (operands[1]);
11744 rtx (*unpack)(rtx, rtx, rtx);
11745 rtx se, dest;
11746
11747 switch (imode)
11748 {
11749 case V16QImode:
11750 if (high_p)
11751 unpack = gen_vec_interleave_highv16qi;
11752 else
11753 unpack = gen_vec_interleave_lowv16qi;
11754 break;
11755 case V8HImode:
11756 if (high_p)
11757 unpack = gen_vec_interleave_highv8hi;
11758 else
11759 unpack = gen_vec_interleave_lowv8hi;
11760 break;
11761 case V4SImode:
11762 if (high_p)
11763 unpack = gen_vec_interleave_highv4si;
11764 else
11765 unpack = gen_vec_interleave_lowv4si;
11766 break;
11767 default:
11768 gcc_unreachable ();
11769 }
11770
11771 dest = gen_lowpart (imode, operands[0]);
11772
11773 if (unsigned_p)
11774 se = force_reg (imode, CONST0_RTX (imode));
11775 else
11776 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
11777 operands[1], pc_rtx, pc_rtx);
11778
11779 emit_insn (unpack (dest, operands[1], se));
11780 }
11781
11782 /* Expand conditional increment or decrement using adb/sbb instructions.
11783 The default case using setcc followed by the conditional move can be
11784 done by generic code. */
11785 int
11786 ix86_expand_int_addcc (rtx operands[])
11787 {
11788 enum rtx_code code = GET_CODE (operands[1]);
11789 rtx compare_op;
11790 rtx val = const0_rtx;
11791 bool fpcmp = false;
11792 enum machine_mode mode = GET_MODE (operands[0]);
11793
11794 if (operands[3] != const1_rtx
11795 && operands[3] != constm1_rtx)
11796 return 0;
11797 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11798 ix86_compare_op1, &compare_op))
11799 return 0;
11800 code = GET_CODE (compare_op);
11801
11802 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11803 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11804 {
11805 fpcmp = true;
11806 code = ix86_fp_compare_code_to_integer (code);
11807 }
11808
11809 if (code != LTU)
11810 {
11811 val = constm1_rtx;
11812 if (fpcmp)
11813 PUT_CODE (compare_op,
11814 reverse_condition_maybe_unordered
11815 (GET_CODE (compare_op)));
11816 else
11817 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11818 }
11819 PUT_MODE (compare_op, mode);
11820
11821 /* Construct either adc or sbb insn. */
11822 if ((code == LTU) == (operands[3] == constm1_rtx))
11823 {
11824 switch (GET_MODE (operands[0]))
11825 {
11826 case QImode:
11827 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
11828 break;
11829 case HImode:
11830 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
11831 break;
11832 case SImode:
11833 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
11834 break;
11835 case DImode:
11836 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11837 break;
11838 default:
11839 gcc_unreachable ();
11840 }
11841 }
11842 else
11843 {
11844 switch (GET_MODE (operands[0]))
11845 {
11846 case QImode:
11847 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
11848 break;
11849 case HImode:
11850 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
11851 break;
11852 case SImode:
11853 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
11854 break;
11855 case DImode:
11856 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11857 break;
11858 default:
11859 gcc_unreachable ();
11860 }
11861 }
11862 return 1; /* DONE */
11863 }
11864
11865
11866 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
11867 works for floating pointer parameters and nonoffsetable memories.
11868 For pushes, it returns just stack offsets; the values will be saved
11869 in the right order. Maximally three parts are generated. */
11870
11871 static int
11872 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
11873 {
11874 int size;
11875
11876 if (!TARGET_64BIT)
11877 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
11878 else
11879 size = (GET_MODE_SIZE (mode) + 4) / 8;
11880
11881 gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
11882 gcc_assert (size >= 2 && size <= 3);
11883
11884 /* Optimize constant pool reference to immediates. This is used by fp
11885 moves, that force all constants to memory to allow combining. */
11886 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
11887 {
11888 rtx tmp = maybe_get_pool_constant (operand);
11889 if (tmp)
11890 operand = tmp;
11891 }
11892
11893 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
11894 {
11895 /* The only non-offsetable memories we handle are pushes. */
11896 int ok = push_operand (operand, VOIDmode);
11897
11898 gcc_assert (ok);
11899
11900 operand = copy_rtx (operand);
11901 PUT_MODE (operand, Pmode);
11902 parts[0] = parts[1] = parts[2] = operand;
11903 return size;
11904 }
11905
11906 if (GET_CODE (operand) == CONST_VECTOR)
11907 {
11908 enum machine_mode imode = int_mode_for_mode (mode);
11909 /* Caution: if we looked through a constant pool memory above,
11910 the operand may actually have a different mode now. That's
11911 ok, since we want to pun this all the way back to an integer. */
11912 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
11913 gcc_assert (operand != NULL);
11914 mode = imode;
11915 }
11916
11917 if (!TARGET_64BIT)
11918 {
11919 if (mode == DImode)
11920 split_di (&operand, 1, &parts[0], &parts[1]);
11921 else
11922 {
11923 if (REG_P (operand))
11924 {
11925 gcc_assert (reload_completed);
11926 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
11927 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
11928 if (size == 3)
11929 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
11930 }
11931 else if (offsettable_memref_p (operand))
11932 {
11933 operand = adjust_address (operand, SImode, 0);
11934 parts[0] = operand;
11935 parts[1] = adjust_address (operand, SImode, 4);
11936 if (size == 3)
11937 parts[2] = adjust_address (operand, SImode, 8);
11938 }
11939 else if (GET_CODE (operand) == CONST_DOUBLE)
11940 {
11941 REAL_VALUE_TYPE r;
11942 long l[4];
11943
11944 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11945 switch (mode)
11946 {
11947 case XFmode:
11948 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
11949 parts[2] = gen_int_mode (l[2], SImode);
11950 break;
11951 case DFmode:
11952 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
11953 break;
11954 default:
11955 gcc_unreachable ();
11956 }
11957 parts[1] = gen_int_mode (l[1], SImode);
11958 parts[0] = gen_int_mode (l[0], SImode);
11959 }
11960 else
11961 gcc_unreachable ();
11962 }
11963 }
11964 else
11965 {
11966 if (mode == TImode)
11967 split_ti (&operand, 1, &parts[0], &parts[1]);
11968 if (mode == XFmode || mode == TFmode)
11969 {
11970 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
11971 if (REG_P (operand))
11972 {
11973 gcc_assert (reload_completed);
11974 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
11975 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
11976 }
11977 else if (offsettable_memref_p (operand))
11978 {
11979 operand = adjust_address (operand, DImode, 0);
11980 parts[0] = operand;
11981 parts[1] = adjust_address (operand, upper_mode, 8);
11982 }
11983 else if (GET_CODE (operand) == CONST_DOUBLE)
11984 {
11985 REAL_VALUE_TYPE r;
11986 long l[4];
11987
11988 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11989 real_to_target (l, &r, mode);
11990
11991 /* Do not use shift by 32 to avoid warning on 32bit systems. */
11992 if (HOST_BITS_PER_WIDE_INT >= 64)
11993 parts[0]
11994 = gen_int_mode
11995 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
11996 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
11997 DImode);
11998 else
11999 parts[0] = immed_double_const (l[0], l[1], DImode);
12000
12001 if (upper_mode == SImode)
12002 parts[1] = gen_int_mode (l[2], SImode);
12003 else if (HOST_BITS_PER_WIDE_INT >= 64)
12004 parts[1]
12005 = gen_int_mode
12006 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
12007 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
12008 DImode);
12009 else
12010 parts[1] = immed_double_const (l[2], l[3], DImode);
12011 }
12012 else
12013 gcc_unreachable ();
12014 }
12015 }
12016
12017 return size;
12018 }
12019
12020 /* Emit insns to perform a move or push of DI, DF, and XF values.
12021 Return false when normal moves are needed; true when all required
12022 insns have been emitted. Operands 2-4 contain the input values
12023 int the correct order; operands 5-7 contain the output values. */
12024
12025 void
12026 ix86_split_long_move (rtx operands[])
12027 {
12028 rtx part[2][3];
12029 int nparts;
12030 int push = 0;
12031 int collisions = 0;
12032 enum machine_mode mode = GET_MODE (operands[0]);
12033
12034 /* The DFmode expanders may ask us to move double.
12035 For 64bit target this is single move. By hiding the fact
12036 here we simplify i386.md splitters. */
12037 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
12038 {
12039 /* Optimize constant pool reference to immediates. This is used by
12040 fp moves, that force all constants to memory to allow combining. */
12041
12042 if (GET_CODE (operands[1]) == MEM
12043 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
12044 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
12045 operands[1] = get_pool_constant (XEXP (operands[1], 0));
12046 if (push_operand (operands[0], VOIDmode))
12047 {
12048 operands[0] = copy_rtx (operands[0]);
12049 PUT_MODE (operands[0], Pmode);
12050 }
12051 else
12052 operands[0] = gen_lowpart (DImode, operands[0]);
12053 operands[1] = gen_lowpart (DImode, operands[1]);
12054 emit_move_insn (operands[0], operands[1]);
12055 return;
12056 }
12057
12058 /* The only non-offsettable memory we handle is push. */
12059 if (push_operand (operands[0], VOIDmode))
12060 push = 1;
12061 else
12062 gcc_assert (GET_CODE (operands[0]) != MEM
12063 || offsettable_memref_p (operands[0]));
12064
12065 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
12066 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
12067
12068 /* When emitting push, take care for source operands on the stack. */
12069 if (push && GET_CODE (operands[1]) == MEM
12070 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
12071 {
12072 if (nparts == 3)
12073 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
12074 XEXP (part[1][2], 0));
12075 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
12076 XEXP (part[1][1], 0));
12077 }
12078
12079 /* We need to do copy in the right order in case an address register
12080 of the source overlaps the destination. */
12081 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
12082 {
12083 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
12084 collisions++;
12085 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12086 collisions++;
12087 if (nparts == 3
12088 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
12089 collisions++;
12090
12091 /* Collision in the middle part can be handled by reordering. */
12092 if (collisions == 1 && nparts == 3
12093 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12094 {
12095 rtx tmp;
12096 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
12097 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
12098 }
12099
12100 /* If there are more collisions, we can't handle it by reordering.
12101 Do an lea to the last part and use only one colliding move. */
12102 else if (collisions > 1)
12103 {
12104 rtx base;
12105
12106 collisions = 1;
12107
12108 base = part[0][nparts - 1];
12109
12110 /* Handle the case when the last part isn't valid for lea.
12111 Happens in 64-bit mode storing the 12-byte XFmode. */
12112 if (GET_MODE (base) != Pmode)
12113 base = gen_rtx_REG (Pmode, REGNO (base));
12114
12115 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
12116 part[1][0] = replace_equiv_address (part[1][0], base);
12117 part[1][1] = replace_equiv_address (part[1][1],
12118 plus_constant (base, UNITS_PER_WORD));
12119 if (nparts == 3)
12120 part[1][2] = replace_equiv_address (part[1][2],
12121 plus_constant (base, 8));
12122 }
12123 }
12124
12125 if (push)
12126 {
12127 if (!TARGET_64BIT)
12128 {
12129 if (nparts == 3)
12130 {
12131 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
12132 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
12133 emit_move_insn (part[0][2], part[1][2]);
12134 }
12135 }
12136 else
12137 {
12138 /* In 64bit mode we don't have 32bit push available. In case this is
12139 register, it is OK - we will just use larger counterpart. We also
12140 retype memory - these comes from attempt to avoid REX prefix on
12141 moving of second half of TFmode value. */
12142 if (GET_MODE (part[1][1]) == SImode)
12143 {
12144 switch (GET_CODE (part[1][1]))
12145 {
12146 case MEM:
12147 part[1][1] = adjust_address (part[1][1], DImode, 0);
12148 break;
12149
12150 case REG:
12151 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
12152 break;
12153
12154 default:
12155 gcc_unreachable ();
12156 }
12157
12158 if (GET_MODE (part[1][0]) == SImode)
12159 part[1][0] = part[1][1];
12160 }
12161 }
12162 emit_move_insn (part[0][1], part[1][1]);
12163 emit_move_insn (part[0][0], part[1][0]);
12164 return;
12165 }
12166
12167 /* Choose correct order to not overwrite the source before it is copied. */
12168 if ((REG_P (part[0][0])
12169 && REG_P (part[1][1])
12170 && (REGNO (part[0][0]) == REGNO (part[1][1])
12171 || (nparts == 3
12172 && REGNO (part[0][0]) == REGNO (part[1][2]))))
12173 || (collisions > 0
12174 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
12175 {
12176 if (nparts == 3)
12177 {
12178 operands[2] = part[0][2];
12179 operands[3] = part[0][1];
12180 operands[4] = part[0][0];
12181 operands[5] = part[1][2];
12182 operands[6] = part[1][1];
12183 operands[7] = part[1][0];
12184 }
12185 else
12186 {
12187 operands[2] = part[0][1];
12188 operands[3] = part[0][0];
12189 operands[5] = part[1][1];
12190 operands[6] = part[1][0];
12191 }
12192 }
12193 else
12194 {
12195 if (nparts == 3)
12196 {
12197 operands[2] = part[0][0];
12198 operands[3] = part[0][1];
12199 operands[4] = part[0][2];
12200 operands[5] = part[1][0];
12201 operands[6] = part[1][1];
12202 operands[7] = part[1][2];
12203 }
12204 else
12205 {
12206 operands[2] = part[0][0];
12207 operands[3] = part[0][1];
12208 operands[5] = part[1][0];
12209 operands[6] = part[1][1];
12210 }
12211 }
12212
12213 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12214 if (optimize_size)
12215 {
12216 if (GET_CODE (operands[5]) == CONST_INT
12217 && operands[5] != const0_rtx
12218 && REG_P (operands[2]))
12219 {
12220 if (GET_CODE (operands[6]) == CONST_INT
12221 && INTVAL (operands[6]) == INTVAL (operands[5]))
12222 operands[6] = operands[2];
12223
12224 if (nparts == 3
12225 && GET_CODE (operands[7]) == CONST_INT
12226 && INTVAL (operands[7]) == INTVAL (operands[5]))
12227 operands[7] = operands[2];
12228 }
12229
12230 if (nparts == 3
12231 && GET_CODE (operands[6]) == CONST_INT
12232 && operands[6] != const0_rtx
12233 && REG_P (operands[3])
12234 && GET_CODE (operands[7]) == CONST_INT
12235 && INTVAL (operands[7]) == INTVAL (operands[6]))
12236 operands[7] = operands[3];
12237 }
12238
12239 emit_move_insn (operands[2], operands[5]);
12240 emit_move_insn (operands[3], operands[6]);
12241 if (nparts == 3)
12242 emit_move_insn (operands[4], operands[7]);
12243
12244 return;
12245 }
12246
12247 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
12248 left shift by a constant, either using a single shift or
12249 a sequence of add instructions. */
12250
12251 static void
12252 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
12253 {
12254 if (count == 1)
12255 {
12256 emit_insn ((mode == DImode
12257 ? gen_addsi3
12258 : gen_adddi3) (operand, operand, operand));
12259 }
12260 else if (!optimize_size
12261 && count * ix86_cost->add <= ix86_cost->shift_const)
12262 {
12263 int i;
12264 for (i=0; i<count; i++)
12265 {
12266 emit_insn ((mode == DImode
12267 ? gen_addsi3
12268 : gen_adddi3) (operand, operand, operand));
12269 }
12270 }
12271 else
12272 emit_insn ((mode == DImode
12273 ? gen_ashlsi3
12274 : gen_ashldi3) (operand, operand, GEN_INT (count)));
12275 }
12276
12277 void
12278 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
12279 {
12280 rtx low[2], high[2];
12281 int count;
12282 const int single_width = mode == DImode ? 32 : 64;
12283
12284 if (GET_CODE (operands[2]) == CONST_INT)
12285 {
12286 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12287 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12288
12289 if (count >= single_width)
12290 {
12291 emit_move_insn (high[0], low[1]);
12292 emit_move_insn (low[0], const0_rtx);
12293
12294 if (count > single_width)
12295 ix86_expand_ashl_const (high[0], count - single_width, mode);
12296 }
12297 else
12298 {
12299 if (!rtx_equal_p (operands[0], operands[1]))
12300 emit_move_insn (operands[0], operands[1]);
12301 emit_insn ((mode == DImode
12302 ? gen_x86_shld_1
12303 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
12304 ix86_expand_ashl_const (low[0], count, mode);
12305 }
12306 return;
12307 }
12308
12309 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12310
12311 if (operands[1] == const1_rtx)
12312 {
12313 /* Assuming we've chosen a QImode capable registers, then 1 << N
12314 can be done with two 32/64-bit shifts, no branches, no cmoves. */
12315 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
12316 {
12317 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
12318
12319 ix86_expand_clear (low[0]);
12320 ix86_expand_clear (high[0]);
12321 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
12322
12323 d = gen_lowpart (QImode, low[0]);
12324 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12325 s = gen_rtx_EQ (QImode, flags, const0_rtx);
12326 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12327
12328 d = gen_lowpart (QImode, high[0]);
12329 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12330 s = gen_rtx_NE (QImode, flags, const0_rtx);
12331 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12332 }
12333
12334 /* Otherwise, we can get the same results by manually performing
12335 a bit extract operation on bit 5/6, and then performing the two
12336 shifts. The two methods of getting 0/1 into low/high are exactly
12337 the same size. Avoiding the shift in the bit extract case helps
12338 pentium4 a bit; no one else seems to care much either way. */
12339 else
12340 {
12341 rtx x;
12342
12343 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
12344 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
12345 else
12346 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
12347 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
12348
12349 emit_insn ((mode == DImode
12350 ? gen_lshrsi3
12351 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
12352 emit_insn ((mode == DImode
12353 ? gen_andsi3
12354 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
12355 emit_move_insn (low[0], high[0]);
12356 emit_insn ((mode == DImode
12357 ? gen_xorsi3
12358 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
12359 }
12360
12361 emit_insn ((mode == DImode
12362 ? gen_ashlsi3
12363 : gen_ashldi3) (low[0], low[0], operands[2]));
12364 emit_insn ((mode == DImode
12365 ? gen_ashlsi3
12366 : gen_ashldi3) (high[0], high[0], operands[2]));
12367 return;
12368 }
12369
12370 if (operands[1] == constm1_rtx)
12371 {
12372 /* For -1 << N, we can avoid the shld instruction, because we
12373 know that we're shifting 0...31/63 ones into a -1. */
12374 emit_move_insn (low[0], constm1_rtx);
12375 if (optimize_size)
12376 emit_move_insn (high[0], low[0]);
12377 else
12378 emit_move_insn (high[0], constm1_rtx);
12379 }
12380 else
12381 {
12382 if (!rtx_equal_p (operands[0], operands[1]))
12383 emit_move_insn (operands[0], operands[1]);
12384
12385 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12386 emit_insn ((mode == DImode
12387 ? gen_x86_shld_1
12388 : gen_x86_64_shld) (high[0], low[0], operands[2]));
12389 }
12390
12391 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
12392
12393 if (TARGET_CMOVE && scratch)
12394 {
12395 ix86_expand_clear (scratch);
12396 emit_insn ((mode == DImode
12397 ? gen_x86_shift_adj_1
12398 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
12399 }
12400 else
12401 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
12402 }
12403
12404 void
12405 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
12406 {
12407 rtx low[2], high[2];
12408 int count;
12409 const int single_width = mode == DImode ? 32 : 64;
12410
12411 if (GET_CODE (operands[2]) == CONST_INT)
12412 {
12413 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12414 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12415
12416 if (count == single_width * 2 - 1)
12417 {
12418 emit_move_insn (high[0], high[1]);
12419 emit_insn ((mode == DImode
12420 ? gen_ashrsi3
12421 : gen_ashrdi3) (high[0], high[0],
12422 GEN_INT (single_width - 1)));
12423 emit_move_insn (low[0], high[0]);
12424
12425 }
12426 else if (count >= single_width)
12427 {
12428 emit_move_insn (low[0], high[1]);
12429 emit_move_insn (high[0], low[0]);
12430 emit_insn ((mode == DImode
12431 ? gen_ashrsi3
12432 : gen_ashrdi3) (high[0], high[0],
12433 GEN_INT (single_width - 1)));
12434 if (count > single_width)
12435 emit_insn ((mode == DImode
12436 ? gen_ashrsi3
12437 : gen_ashrdi3) (low[0], low[0],
12438 GEN_INT (count - single_width)));
12439 }
12440 else
12441 {
12442 if (!rtx_equal_p (operands[0], operands[1]))
12443 emit_move_insn (operands[0], operands[1]);
12444 emit_insn ((mode == DImode
12445 ? gen_x86_shrd_1
12446 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12447 emit_insn ((mode == DImode
12448 ? gen_ashrsi3
12449 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
12450 }
12451 }
12452 else
12453 {
12454 if (!rtx_equal_p (operands[0], operands[1]))
12455 emit_move_insn (operands[0], operands[1]);
12456
12457 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12458
12459 emit_insn ((mode == DImode
12460 ? gen_x86_shrd_1
12461 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12462 emit_insn ((mode == DImode
12463 ? gen_ashrsi3
12464 : gen_ashrdi3) (high[0], high[0], operands[2]));
12465
12466 if (TARGET_CMOVE && scratch)
12467 {
12468 emit_move_insn (scratch, high[0]);
12469 emit_insn ((mode == DImode
12470 ? gen_ashrsi3
12471 : gen_ashrdi3) (scratch, scratch,
12472 GEN_INT (single_width - 1)));
12473 emit_insn ((mode == DImode
12474 ? gen_x86_shift_adj_1
12475 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12476 scratch));
12477 }
12478 else
12479 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
12480 }
12481 }
12482
12483 void
12484 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
12485 {
12486 rtx low[2], high[2];
12487 int count;
12488 const int single_width = mode == DImode ? 32 : 64;
12489
12490 if (GET_CODE (operands[2]) == CONST_INT)
12491 {
12492 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12493 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12494
12495 if (count >= single_width)
12496 {
12497 emit_move_insn (low[0], high[1]);
12498 ix86_expand_clear (high[0]);
12499
12500 if (count > single_width)
12501 emit_insn ((mode == DImode
12502 ? gen_lshrsi3
12503 : gen_lshrdi3) (low[0], low[0],
12504 GEN_INT (count - single_width)));
12505 }
12506 else
12507 {
12508 if (!rtx_equal_p (operands[0], operands[1]))
12509 emit_move_insn (operands[0], operands[1]);
12510 emit_insn ((mode == DImode
12511 ? gen_x86_shrd_1
12512 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12513 emit_insn ((mode == DImode
12514 ? gen_lshrsi3
12515 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
12516 }
12517 }
12518 else
12519 {
12520 if (!rtx_equal_p (operands[0], operands[1]))
12521 emit_move_insn (operands[0], operands[1]);
12522
12523 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12524
12525 emit_insn ((mode == DImode
12526 ? gen_x86_shrd_1
12527 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12528 emit_insn ((mode == DImode
12529 ? gen_lshrsi3
12530 : gen_lshrdi3) (high[0], high[0], operands[2]));
12531
12532 /* Heh. By reversing the arguments, we can reuse this pattern. */
12533 if (TARGET_CMOVE && scratch)
12534 {
12535 ix86_expand_clear (scratch);
12536 emit_insn ((mode == DImode
12537 ? gen_x86_shift_adj_1
12538 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12539 scratch));
12540 }
12541 else
12542 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
12543 }
12544 }
12545
12546 /* Helper function for the string operations below. Dest VARIABLE whether
12547 it is aligned to VALUE bytes. If true, jump to the label. */
12548 static rtx
12549 ix86_expand_aligntest (rtx variable, int value)
12550 {
12551 rtx label = gen_label_rtx ();
12552 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
12553 if (GET_MODE (variable) == DImode)
12554 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
12555 else
12556 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
12557 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
12558 1, label);
12559 return label;
12560 }
12561
12562 /* Adjust COUNTER by the VALUE. */
12563 static void
12564 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
12565 {
12566 if (GET_MODE (countreg) == DImode)
12567 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
12568 else
12569 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
12570 }
12571
12572 /* Zero extend possibly SImode EXP to Pmode register. */
12573 rtx
12574 ix86_zero_extend_to_Pmode (rtx exp)
12575 {
12576 rtx r;
12577 if (GET_MODE (exp) == VOIDmode)
12578 return force_reg (Pmode, exp);
12579 if (GET_MODE (exp) == Pmode)
12580 return copy_to_mode_reg (Pmode, exp);
12581 r = gen_reg_rtx (Pmode);
12582 emit_insn (gen_zero_extendsidi2 (r, exp));
12583 return r;
12584 }
12585
12586 /* Expand string move (memcpy) operation. Use i386 string operations when
12587 profitable. expand_clrmem contains similar code. */
12588 int
12589 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
12590 {
12591 rtx srcreg, destreg, countreg, srcexp, destexp;
12592 enum machine_mode counter_mode;
12593 HOST_WIDE_INT align = 0;
12594 unsigned HOST_WIDE_INT count = 0;
12595
12596 if (GET_CODE (align_exp) == CONST_INT)
12597 align = INTVAL (align_exp);
12598
12599 /* Can't use any of this if the user has appropriated esi or edi. */
12600 if (global_regs[4] || global_regs[5])
12601 return 0;
12602
12603 /* This simple hack avoids all inlining code and simplifies code below. */
12604 if (!TARGET_ALIGN_STRINGOPS)
12605 align = 64;
12606
12607 if (GET_CODE (count_exp) == CONST_INT)
12608 {
12609 count = INTVAL (count_exp);
12610 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12611 return 0;
12612 }
12613
12614 /* Figure out proper mode for counter. For 32bits it is always SImode,
12615 for 64bits use SImode when possible, otherwise DImode.
12616 Set count to number of bytes copied when known at compile time. */
12617 if (!TARGET_64BIT
12618 || GET_MODE (count_exp) == SImode
12619 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12620 counter_mode = SImode;
12621 else
12622 counter_mode = DImode;
12623
12624 gcc_assert (counter_mode == SImode || counter_mode == DImode);
12625
12626 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12627 if (destreg != XEXP (dst, 0))
12628 dst = replace_equiv_address_nv (dst, destreg);
12629 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
12630 if (srcreg != XEXP (src, 0))
12631 src = replace_equiv_address_nv (src, srcreg);
12632
12633 /* When optimizing for size emit simple rep ; movsb instruction for
12634 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
12635 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
12636 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
12637 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
12638 but we don't know whether upper 24 (resp. 56) bits of %ecx will be
12639 known to be zero or not. The rep; movsb sequence causes higher
12640 register pressure though, so take that into account. */
12641
12642 if ((!optimize || optimize_size)
12643 && (count == 0
12644 || ((count & 0x03)
12645 && (!optimize_size
12646 || count > 5 * 4
12647 || (count & 3) + count / 4 > 6))))
12648 {
12649 emit_insn (gen_cld ());
12650 countreg = ix86_zero_extend_to_Pmode (count_exp);
12651 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12652 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
12653 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
12654 destexp, srcexp));
12655 }
12656
12657 /* For constant aligned (or small unaligned) copies use rep movsl
12658 followed by code copying the rest. For PentiumPro ensure 8 byte
12659 alignment to allow rep movsl acceleration. */
12660
12661 else if (count != 0
12662 && (align >= 8
12663 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12664 || optimize_size || count < (unsigned int) 64))
12665 {
12666 unsigned HOST_WIDE_INT offset = 0;
12667 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12668 rtx srcmem, dstmem;
12669
12670 emit_insn (gen_cld ());
12671 if (count & ~(size - 1))
12672 {
12673 if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
12674 {
12675 enum machine_mode movs_mode = size == 4 ? SImode : DImode;
12676
12677 while (offset < (count & ~(size - 1)))
12678 {
12679 srcmem = adjust_automodify_address_nv (src, movs_mode,
12680 srcreg, offset);
12681 dstmem = adjust_automodify_address_nv (dst, movs_mode,
12682 destreg, offset);
12683 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12684 offset += size;
12685 }
12686 }
12687 else
12688 {
12689 countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
12690 & (TARGET_64BIT ? -1 : 0x3fffffff));
12691 countreg = copy_to_mode_reg (counter_mode, countreg);
12692 countreg = ix86_zero_extend_to_Pmode (countreg);
12693
12694 destexp = gen_rtx_ASHIFT (Pmode, countreg,
12695 GEN_INT (size == 4 ? 2 : 3));
12696 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12697 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12698
12699 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12700 countreg, destexp, srcexp));
12701 offset = count & ~(size - 1);
12702 }
12703 }
12704 if (size == 8 && (count & 0x04))
12705 {
12706 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
12707 offset);
12708 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
12709 offset);
12710 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12711 offset += 4;
12712 }
12713 if (count & 0x02)
12714 {
12715 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
12716 offset);
12717 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
12718 offset);
12719 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12720 offset += 2;
12721 }
12722 if (count & 0x01)
12723 {
12724 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
12725 offset);
12726 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
12727 offset);
12728 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12729 }
12730 }
12731 /* The generic code based on the glibc implementation:
12732 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
12733 allowing accelerated copying there)
12734 - copy the data using rep movsl
12735 - copy the rest. */
12736 else
12737 {
12738 rtx countreg2;
12739 rtx label = NULL;
12740 rtx srcmem, dstmem;
12741 int desired_alignment = (TARGET_PENTIUMPRO
12742 && (count == 0 || count >= (unsigned int) 260)
12743 ? 8 : UNITS_PER_WORD);
12744 /* Get rid of MEM_OFFSETs, they won't be accurate. */
12745 dst = change_address (dst, BLKmode, destreg);
12746 src = change_address (src, BLKmode, srcreg);
12747
12748 /* In case we don't know anything about the alignment, default to
12749 library version, since it is usually equally fast and result in
12750 shorter code.
12751
12752 Also emit call when we know that the count is large and call overhead
12753 will not be important. */
12754 if (!TARGET_INLINE_ALL_STRINGOPS
12755 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12756 return 0;
12757
12758 if (TARGET_SINGLE_STRINGOP)
12759 emit_insn (gen_cld ());
12760
12761 countreg2 = gen_reg_rtx (Pmode);
12762 countreg = copy_to_mode_reg (counter_mode, count_exp);
12763
12764 /* We don't use loops to align destination and to copy parts smaller
12765 than 4 bytes, because gcc is able to optimize such code better (in
12766 the case the destination or the count really is aligned, gcc is often
12767 able to predict the branches) and also it is friendlier to the
12768 hardware branch prediction.
12769
12770 Using loops is beneficial for generic case, because we can
12771 handle small counts using the loops. Many CPUs (such as Athlon)
12772 have large REP prefix setup costs.
12773
12774 This is quite costly. Maybe we can revisit this decision later or
12775 add some customizability to this code. */
12776
12777 if (count == 0 && align < desired_alignment)
12778 {
12779 label = gen_label_rtx ();
12780 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12781 LEU, 0, counter_mode, 1, label);
12782 }
12783 if (align <= 1)
12784 {
12785 rtx label = ix86_expand_aligntest (destreg, 1);
12786 srcmem = change_address (src, QImode, srcreg);
12787 dstmem = change_address (dst, QImode, destreg);
12788 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12789 ix86_adjust_counter (countreg, 1);
12790 emit_label (label);
12791 LABEL_NUSES (label) = 1;
12792 }
12793 if (align <= 2)
12794 {
12795 rtx label = ix86_expand_aligntest (destreg, 2);
12796 srcmem = change_address (src, HImode, srcreg);
12797 dstmem = change_address (dst, HImode, destreg);
12798 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12799 ix86_adjust_counter (countreg, 2);
12800 emit_label (label);
12801 LABEL_NUSES (label) = 1;
12802 }
12803 if (align <= 4 && desired_alignment > 4)
12804 {
12805 rtx label = ix86_expand_aligntest (destreg, 4);
12806 srcmem = change_address (src, SImode, srcreg);
12807 dstmem = change_address (dst, SImode, destreg);
12808 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12809 ix86_adjust_counter (countreg, 4);
12810 emit_label (label);
12811 LABEL_NUSES (label) = 1;
12812 }
12813
12814 if (label && desired_alignment > 4 && !TARGET_64BIT)
12815 {
12816 emit_label (label);
12817 LABEL_NUSES (label) = 1;
12818 label = NULL_RTX;
12819 }
12820 if (!TARGET_SINGLE_STRINGOP)
12821 emit_insn (gen_cld ());
12822 if (TARGET_64BIT)
12823 {
12824 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
12825 GEN_INT (3)));
12826 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
12827 }
12828 else
12829 {
12830 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
12831 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
12832 }
12833 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12834 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12835 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12836 countreg2, destexp, srcexp));
12837
12838 if (label)
12839 {
12840 emit_label (label);
12841 LABEL_NUSES (label) = 1;
12842 }
12843 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
12844 {
12845 srcmem = change_address (src, SImode, srcreg);
12846 dstmem = change_address (dst, SImode, destreg);
12847 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12848 }
12849 if ((align <= 4 || count == 0) && TARGET_64BIT)
12850 {
12851 rtx label = ix86_expand_aligntest (countreg, 4);
12852 srcmem = change_address (src, SImode, srcreg);
12853 dstmem = change_address (dst, SImode, destreg);
12854 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12855 emit_label (label);
12856 LABEL_NUSES (label) = 1;
12857 }
12858 if (align > 2 && count != 0 && (count & 2))
12859 {
12860 srcmem = change_address (src, HImode, srcreg);
12861 dstmem = change_address (dst, HImode, destreg);
12862 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12863 }
12864 if (align <= 2 || count == 0)
12865 {
12866 rtx label = ix86_expand_aligntest (countreg, 2);
12867 srcmem = change_address (src, HImode, srcreg);
12868 dstmem = change_address (dst, HImode, destreg);
12869 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12870 emit_label (label);
12871 LABEL_NUSES (label) = 1;
12872 }
12873 if (align > 1 && count != 0 && (count & 1))
12874 {
12875 srcmem = change_address (src, QImode, srcreg);
12876 dstmem = change_address (dst, QImode, destreg);
12877 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12878 }
12879 if (align <= 1 || count == 0)
12880 {
12881 rtx label = ix86_expand_aligntest (countreg, 1);
12882 srcmem = change_address (src, QImode, srcreg);
12883 dstmem = change_address (dst, QImode, destreg);
12884 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12885 emit_label (label);
12886 LABEL_NUSES (label) = 1;
12887 }
12888 }
12889
12890 return 1;
12891 }
12892
12893 /* Expand string clear operation (bzero). Use i386 string operations when
12894 profitable. expand_movmem contains similar code. */
12895 int
12896 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
12897 {
12898 rtx destreg, zeroreg, countreg, destexp;
12899 enum machine_mode counter_mode;
12900 HOST_WIDE_INT align = 0;
12901 unsigned HOST_WIDE_INT count = 0;
12902
12903 if (GET_CODE (align_exp) == CONST_INT)
12904 align = INTVAL (align_exp);
12905
12906 /* Can't use any of this if the user has appropriated esi. */
12907 if (global_regs[4])
12908 return 0;
12909
12910 /* This simple hack avoids all inlining code and simplifies code below. */
12911 if (!TARGET_ALIGN_STRINGOPS)
12912 align = 32;
12913
12914 if (GET_CODE (count_exp) == CONST_INT)
12915 {
12916 count = INTVAL (count_exp);
12917 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12918 return 0;
12919 }
12920 /* Figure out proper mode for counter. For 32bits it is always SImode,
12921 for 64bits use SImode when possible, otherwise DImode.
12922 Set count to number of bytes copied when known at compile time. */
12923 if (!TARGET_64BIT
12924 || GET_MODE (count_exp) == SImode
12925 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12926 counter_mode = SImode;
12927 else
12928 counter_mode = DImode;
12929
12930 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12931 if (destreg != XEXP (dst, 0))
12932 dst = replace_equiv_address_nv (dst, destreg);
12933
12934
12935 /* When optimizing for size emit simple rep ; movsb instruction for
12936 counts not divisible by 4. The movl $N, %ecx; rep; stosb
12937 sequence is 7 bytes long, so if optimizing for size and count is
12938 small enough that some stosl, stosw and stosb instructions without
12939 rep are shorter, fall back into the next if. */
12940
12941 if ((!optimize || optimize_size)
12942 && (count == 0
12943 || ((count & 0x03)
12944 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
12945 {
12946 emit_insn (gen_cld ());
12947
12948 countreg = ix86_zero_extend_to_Pmode (count_exp);
12949 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
12950 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12951 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
12952 }
12953 else if (count != 0
12954 && (align >= 8
12955 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12956 || optimize_size || count < (unsigned int) 64))
12957 {
12958 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12959 unsigned HOST_WIDE_INT offset = 0;
12960
12961 emit_insn (gen_cld ());
12962
12963 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
12964 if (count & ~(size - 1))
12965 {
12966 unsigned HOST_WIDE_INT repcount;
12967 unsigned int max_nonrep;
12968
12969 repcount = count >> (size == 4 ? 2 : 3);
12970 if (!TARGET_64BIT)
12971 repcount &= 0x3fffffff;
12972
12973 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
12974 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
12975 bytes. In both cases the latter seems to be faster for small
12976 values of N. */
12977 max_nonrep = size == 4 ? 7 : 4;
12978 if (!optimize_size)
12979 switch (ix86_tune)
12980 {
12981 case PROCESSOR_PENTIUM4:
12982 case PROCESSOR_NOCONA:
12983 max_nonrep = 3;
12984 break;
12985 default:
12986 break;
12987 }
12988
12989 if (repcount <= max_nonrep)
12990 while (repcount-- > 0)
12991 {
12992 rtx mem = adjust_automodify_address_nv (dst,
12993 GET_MODE (zeroreg),
12994 destreg, offset);
12995 emit_insn (gen_strset (destreg, mem, zeroreg));
12996 offset += size;
12997 }
12998 else
12999 {
13000 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
13001 countreg = ix86_zero_extend_to_Pmode (countreg);
13002 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13003 GEN_INT (size == 4 ? 2 : 3));
13004 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13005 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
13006 destexp));
13007 offset = count & ~(size - 1);
13008 }
13009 }
13010 if (size == 8 && (count & 0x04))
13011 {
13012 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
13013 offset);
13014 emit_insn (gen_strset (destreg, mem,
13015 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13016 offset += 4;
13017 }
13018 if (count & 0x02)
13019 {
13020 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
13021 offset);
13022 emit_insn (gen_strset (destreg, mem,
13023 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13024 offset += 2;
13025 }
13026 if (count & 0x01)
13027 {
13028 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
13029 offset);
13030 emit_insn (gen_strset (destreg, mem,
13031 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13032 }
13033 }
13034 else
13035 {
13036 rtx countreg2;
13037 rtx label = NULL;
13038 /* Compute desired alignment of the string operation. */
13039 int desired_alignment = (TARGET_PENTIUMPRO
13040 && (count == 0 || count >= (unsigned int) 260)
13041 ? 8 : UNITS_PER_WORD);
13042
13043 /* In case we don't know anything about the alignment, default to
13044 library version, since it is usually equally fast and result in
13045 shorter code.
13046
13047 Also emit call when we know that the count is large and call overhead
13048 will not be important. */
13049 if (!TARGET_INLINE_ALL_STRINGOPS
13050 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
13051 return 0;
13052
13053 if (TARGET_SINGLE_STRINGOP)
13054 emit_insn (gen_cld ());
13055
13056 countreg2 = gen_reg_rtx (Pmode);
13057 countreg = copy_to_mode_reg (counter_mode, count_exp);
13058 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
13059 /* Get rid of MEM_OFFSET, it won't be accurate. */
13060 dst = change_address (dst, BLKmode, destreg);
13061
13062 if (count == 0 && align < desired_alignment)
13063 {
13064 label = gen_label_rtx ();
13065 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
13066 LEU, 0, counter_mode, 1, label);
13067 }
13068 if (align <= 1)
13069 {
13070 rtx label = ix86_expand_aligntest (destreg, 1);
13071 emit_insn (gen_strset (destreg, dst,
13072 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13073 ix86_adjust_counter (countreg, 1);
13074 emit_label (label);
13075 LABEL_NUSES (label) = 1;
13076 }
13077 if (align <= 2)
13078 {
13079 rtx label = ix86_expand_aligntest (destreg, 2);
13080 emit_insn (gen_strset (destreg, dst,
13081 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13082 ix86_adjust_counter (countreg, 2);
13083 emit_label (label);
13084 LABEL_NUSES (label) = 1;
13085 }
13086 if (align <= 4 && desired_alignment > 4)
13087 {
13088 rtx label = ix86_expand_aligntest (destreg, 4);
13089 emit_insn (gen_strset (destreg, dst,
13090 (TARGET_64BIT
13091 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
13092 : zeroreg)));
13093 ix86_adjust_counter (countreg, 4);
13094 emit_label (label);
13095 LABEL_NUSES (label) = 1;
13096 }
13097
13098 if (label && desired_alignment > 4 && !TARGET_64BIT)
13099 {
13100 emit_label (label);
13101 LABEL_NUSES (label) = 1;
13102 label = NULL_RTX;
13103 }
13104
13105 if (!TARGET_SINGLE_STRINGOP)
13106 emit_insn (gen_cld ());
13107 if (TARGET_64BIT)
13108 {
13109 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
13110 GEN_INT (3)));
13111 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
13112 }
13113 else
13114 {
13115 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
13116 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
13117 }
13118 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13119 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
13120
13121 if (label)
13122 {
13123 emit_label (label);
13124 LABEL_NUSES (label) = 1;
13125 }
13126
13127 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
13128 emit_insn (gen_strset (destreg, dst,
13129 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13130 if (TARGET_64BIT && (align <= 4 || count == 0))
13131 {
13132 rtx label = ix86_expand_aligntest (countreg, 4);
13133 emit_insn (gen_strset (destreg, dst,
13134 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13135 emit_label (label);
13136 LABEL_NUSES (label) = 1;
13137 }
13138 if (align > 2 && count != 0 && (count & 2))
13139 emit_insn (gen_strset (destreg, dst,
13140 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13141 if (align <= 2 || count == 0)
13142 {
13143 rtx label = ix86_expand_aligntest (countreg, 2);
13144 emit_insn (gen_strset (destreg, dst,
13145 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13146 emit_label (label);
13147 LABEL_NUSES (label) = 1;
13148 }
13149 if (align > 1 && count != 0 && (count & 1))
13150 emit_insn (gen_strset (destreg, dst,
13151 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13152 if (align <= 1 || count == 0)
13153 {
13154 rtx label = ix86_expand_aligntest (countreg, 1);
13155 emit_insn (gen_strset (destreg, dst,
13156 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13157 emit_label (label);
13158 LABEL_NUSES (label) = 1;
13159 }
13160 }
13161 return 1;
13162 }
13163
13164 /* Expand strlen. */
13165 int
13166 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
13167 {
13168 rtx addr, scratch1, scratch2, scratch3, scratch4;
13169
13170 /* The generic case of strlen expander is long. Avoid it's
13171 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
13172
13173 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13174 && !TARGET_INLINE_ALL_STRINGOPS
13175 && !optimize_size
13176 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
13177 return 0;
13178
13179 addr = force_reg (Pmode, XEXP (src, 0));
13180 scratch1 = gen_reg_rtx (Pmode);
13181
13182 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13183 && !optimize_size)
13184 {
13185 /* Well it seems that some optimizer does not combine a call like
13186 foo(strlen(bar), strlen(bar));
13187 when the move and the subtraction is done here. It does calculate
13188 the length just once when these instructions are done inside of
13189 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
13190 often used and I use one fewer register for the lifetime of
13191 output_strlen_unroll() this is better. */
13192
13193 emit_move_insn (out, addr);
13194
13195 ix86_expand_strlensi_unroll_1 (out, src, align);
13196
13197 /* strlensi_unroll_1 returns the address of the zero at the end of
13198 the string, like memchr(), so compute the length by subtracting
13199 the start address. */
13200 if (TARGET_64BIT)
13201 emit_insn (gen_subdi3 (out, out, addr));
13202 else
13203 emit_insn (gen_subsi3 (out, out, addr));
13204 }
13205 else
13206 {
13207 rtx unspec;
13208 scratch2 = gen_reg_rtx (Pmode);
13209 scratch3 = gen_reg_rtx (Pmode);
13210 scratch4 = force_reg (Pmode, constm1_rtx);
13211
13212 emit_move_insn (scratch3, addr);
13213 eoschar = force_reg (QImode, eoschar);
13214
13215 emit_insn (gen_cld ());
13216 src = replace_equiv_address_nv (src, scratch3);
13217
13218 /* If .md starts supporting :P, this can be done in .md. */
13219 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
13220 scratch4), UNSPEC_SCAS);
13221 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
13222 if (TARGET_64BIT)
13223 {
13224 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
13225 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
13226 }
13227 else
13228 {
13229 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
13230 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
13231 }
13232 }
13233 return 1;
13234 }
13235
13236 /* Expand the appropriate insns for doing strlen if not just doing
13237 repnz; scasb
13238
13239 out = result, initialized with the start address
13240 align_rtx = alignment of the address.
13241 scratch = scratch register, initialized with the startaddress when
13242 not aligned, otherwise undefined
13243
13244 This is just the body. It needs the initializations mentioned above and
13245 some address computing at the end. These things are done in i386.md. */
13246
13247 static void
13248 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
13249 {
13250 int align;
13251 rtx tmp;
13252 rtx align_2_label = NULL_RTX;
13253 rtx align_3_label = NULL_RTX;
13254 rtx align_4_label = gen_label_rtx ();
13255 rtx end_0_label = gen_label_rtx ();
13256 rtx mem;
13257 rtx tmpreg = gen_reg_rtx (SImode);
13258 rtx scratch = gen_reg_rtx (SImode);
13259 rtx cmp;
13260
13261 align = 0;
13262 if (GET_CODE (align_rtx) == CONST_INT)
13263 align = INTVAL (align_rtx);
13264
13265 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
13266
13267 /* Is there a known alignment and is it less than 4? */
13268 if (align < 4)
13269 {
13270 rtx scratch1 = gen_reg_rtx (Pmode);
13271 emit_move_insn (scratch1, out);
13272 /* Is there a known alignment and is it not 2? */
13273 if (align != 2)
13274 {
13275 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
13276 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
13277
13278 /* Leave just the 3 lower bits. */
13279 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
13280 NULL_RTX, 0, OPTAB_WIDEN);
13281
13282 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13283 Pmode, 1, align_4_label);
13284 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
13285 Pmode, 1, align_2_label);
13286 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
13287 Pmode, 1, align_3_label);
13288 }
13289 else
13290 {
13291 /* Since the alignment is 2, we have to check 2 or 0 bytes;
13292 check if is aligned to 4 - byte. */
13293
13294 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
13295 NULL_RTX, 0, OPTAB_WIDEN);
13296
13297 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13298 Pmode, 1, align_4_label);
13299 }
13300
13301 mem = change_address (src, QImode, out);
13302
13303 /* Now compare the bytes. */
13304
13305 /* Compare the first n unaligned byte on a byte per byte basis. */
13306 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
13307 QImode, 1, end_0_label);
13308
13309 /* Increment the address. */
13310 if (TARGET_64BIT)
13311 emit_insn (gen_adddi3 (out, out, const1_rtx));
13312 else
13313 emit_insn (gen_addsi3 (out, out, const1_rtx));
13314
13315 /* Not needed with an alignment of 2 */
13316 if (align != 2)
13317 {
13318 emit_label (align_2_label);
13319
13320 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13321 end_0_label);
13322
13323 if (TARGET_64BIT)
13324 emit_insn (gen_adddi3 (out, out, const1_rtx));
13325 else
13326 emit_insn (gen_addsi3 (out, out, const1_rtx));
13327
13328 emit_label (align_3_label);
13329 }
13330
13331 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13332 end_0_label);
13333
13334 if (TARGET_64BIT)
13335 emit_insn (gen_adddi3 (out, out, const1_rtx));
13336 else
13337 emit_insn (gen_addsi3 (out, out, const1_rtx));
13338 }
13339
13340 /* Generate loop to check 4 bytes at a time. It is not a good idea to
13341 align this loop. It gives only huge programs, but does not help to
13342 speed up. */
13343 emit_label (align_4_label);
13344
13345 mem = change_address (src, SImode, out);
13346 emit_move_insn (scratch, mem);
13347 if (TARGET_64BIT)
13348 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
13349 else
13350 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
13351
13352 /* This formula yields a nonzero result iff one of the bytes is zero.
13353 This saves three branches inside loop and many cycles. */
13354
13355 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
13356 emit_insn (gen_one_cmplsi2 (scratch, scratch));
13357 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
13358 emit_insn (gen_andsi3 (tmpreg, tmpreg,
13359 gen_int_mode (0x80808080, SImode)));
13360 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
13361 align_4_label);
13362
13363 if (TARGET_CMOVE)
13364 {
13365 rtx reg = gen_reg_rtx (SImode);
13366 rtx reg2 = gen_reg_rtx (Pmode);
13367 emit_move_insn (reg, tmpreg);
13368 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
13369
13370 /* If zero is not in the first two bytes, move two bytes forward. */
13371 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13372 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13373 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13374 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
13375 gen_rtx_IF_THEN_ELSE (SImode, tmp,
13376 reg,
13377 tmpreg)));
13378 /* Emit lea manually to avoid clobbering of flags. */
13379 emit_insn (gen_rtx_SET (SImode, reg2,
13380 gen_rtx_PLUS (Pmode, out, const2_rtx)));
13381
13382 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13383 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13384 emit_insn (gen_rtx_SET (VOIDmode, out,
13385 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
13386 reg2,
13387 out)));
13388
13389 }
13390 else
13391 {
13392 rtx end_2_label = gen_label_rtx ();
13393 /* Is zero in the first two bytes? */
13394
13395 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13396 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13397 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
13398 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13399 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
13400 pc_rtx);
13401 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
13402 JUMP_LABEL (tmp) = end_2_label;
13403
13404 /* Not in the first two. Move two bytes forward. */
13405 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
13406 if (TARGET_64BIT)
13407 emit_insn (gen_adddi3 (out, out, const2_rtx));
13408 else
13409 emit_insn (gen_addsi3 (out, out, const2_rtx));
13410
13411 emit_label (end_2_label);
13412
13413 }
13414
13415 /* Avoid branch in fixing the byte. */
13416 tmpreg = gen_lowpart (QImode, tmpreg);
13417 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
13418 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
13419 if (TARGET_64BIT)
13420 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
13421 else
13422 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
13423
13424 emit_label (end_0_label);
13425 }
13426
13427 void
13428 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
13429 rtx callarg2 ATTRIBUTE_UNUSED,
13430 rtx pop, int sibcall)
13431 {
13432 rtx use = NULL, call;
13433
13434 if (pop == const0_rtx)
13435 pop = NULL;
13436 gcc_assert (!TARGET_64BIT || !pop);
13437
13438 if (TARGET_MACHO && !TARGET_64BIT)
13439 {
13440 #if TARGET_MACHO
13441 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
13442 fnaddr = machopic_indirect_call_target (fnaddr);
13443 #endif
13444 }
13445 else
13446 {
13447 /* Static functions and indirect calls don't need the pic register. */
13448 if (! TARGET_64BIT && flag_pic
13449 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
13450 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
13451 use_reg (&use, pic_offset_table_rtx);
13452 }
13453
13454 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
13455 {
13456 rtx al = gen_rtx_REG (QImode, 0);
13457 emit_move_insn (al, callarg2);
13458 use_reg (&use, al);
13459 }
13460
13461 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
13462 {
13463 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13464 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13465 }
13466 if (sibcall && TARGET_64BIT
13467 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
13468 {
13469 rtx addr;
13470 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13471 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
13472 emit_move_insn (fnaddr, addr);
13473 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13474 }
13475
13476 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
13477 if (retval)
13478 call = gen_rtx_SET (VOIDmode, retval, call);
13479 if (pop)
13480 {
13481 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
13482 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
13483 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
13484 }
13485
13486 call = emit_call_insn (call);
13487 if (use)
13488 CALL_INSN_FUNCTION_USAGE (call) = use;
13489 }
13490
13491 \f
13492 /* Clear stack slot assignments remembered from previous functions.
13493 This is called from INIT_EXPANDERS once before RTL is emitted for each
13494 function. */
13495
13496 static struct machine_function *
13497 ix86_init_machine_status (void)
13498 {
13499 struct machine_function *f;
13500
13501 f = ggc_alloc_cleared (sizeof (struct machine_function));
13502 f->use_fast_prologue_epilogue_nregs = -1;
13503 f->tls_descriptor_call_expanded_p = 0;
13504
13505 return f;
13506 }
13507
13508 /* Return a MEM corresponding to a stack slot with mode MODE.
13509 Allocate a new slot if necessary.
13510
13511 The RTL for a function can have several slots available: N is
13512 which slot to use. */
13513
13514 rtx
13515 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
13516 {
13517 struct stack_local_entry *s;
13518
13519 gcc_assert (n < MAX_386_STACK_LOCALS);
13520
13521 for (s = ix86_stack_locals; s; s = s->next)
13522 if (s->mode == mode && s->n == n)
13523 return copy_rtx (s->rtl);
13524
13525 s = (struct stack_local_entry *)
13526 ggc_alloc (sizeof (struct stack_local_entry));
13527 s->n = n;
13528 s->mode = mode;
13529 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
13530
13531 s->next = ix86_stack_locals;
13532 ix86_stack_locals = s;
13533 return s->rtl;
13534 }
13535
13536 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13537
13538 static GTY(()) rtx ix86_tls_symbol;
13539 rtx
13540 ix86_tls_get_addr (void)
13541 {
13542
13543 if (!ix86_tls_symbol)
13544 {
13545 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
13546 (TARGET_ANY_GNU_TLS
13547 && !TARGET_64BIT)
13548 ? "___tls_get_addr"
13549 : "__tls_get_addr");
13550 }
13551
13552 return ix86_tls_symbol;
13553 }
13554
13555 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13556
13557 static GTY(()) rtx ix86_tls_module_base_symbol;
13558 rtx
13559 ix86_tls_module_base (void)
13560 {
13561
13562 if (!ix86_tls_module_base_symbol)
13563 {
13564 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
13565 "_TLS_MODULE_BASE_");
13566 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13567 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13568 }
13569
13570 return ix86_tls_module_base_symbol;
13571 }
13572 \f
13573 /* Calculate the length of the memory address in the instruction
13574 encoding. Does not include the one-byte modrm, opcode, or prefix. */
13575
13576 int
13577 memory_address_length (rtx addr)
13578 {
13579 struct ix86_address parts;
13580 rtx base, index, disp;
13581 int len;
13582 int ok;
13583
13584 if (GET_CODE (addr) == PRE_DEC
13585 || GET_CODE (addr) == POST_INC
13586 || GET_CODE (addr) == PRE_MODIFY
13587 || GET_CODE (addr) == POST_MODIFY)
13588 return 0;
13589
13590 ok = ix86_decompose_address (addr, &parts);
13591 gcc_assert (ok);
13592
13593 if (parts.base && GET_CODE (parts.base) == SUBREG)
13594 parts.base = SUBREG_REG (parts.base);
13595 if (parts.index && GET_CODE (parts.index) == SUBREG)
13596 parts.index = SUBREG_REG (parts.index);
13597
13598 base = parts.base;
13599 index = parts.index;
13600 disp = parts.disp;
13601 len = 0;
13602
13603 /* Rule of thumb:
13604 - esp as the base always wants an index,
13605 - ebp as the base always wants a displacement. */
13606
13607 /* Register Indirect. */
13608 if (base && !index && !disp)
13609 {
13610 /* esp (for its index) and ebp (for its displacement) need
13611 the two-byte modrm form. */
13612 if (addr == stack_pointer_rtx
13613 || addr == arg_pointer_rtx
13614 || addr == frame_pointer_rtx
13615 || addr == hard_frame_pointer_rtx)
13616 len = 1;
13617 }
13618
13619 /* Direct Addressing. */
13620 else if (disp && !base && !index)
13621 len = 4;
13622
13623 else
13624 {
13625 /* Find the length of the displacement constant. */
13626 if (disp)
13627 {
13628 if (base && satisfies_constraint_K (disp))
13629 len = 1;
13630 else
13631 len = 4;
13632 }
13633 /* ebp always wants a displacement. */
13634 else if (base == hard_frame_pointer_rtx)
13635 len = 1;
13636
13637 /* An index requires the two-byte modrm form.... */
13638 if (index
13639 /* ...like esp, which always wants an index. */
13640 || base == stack_pointer_rtx
13641 || base == arg_pointer_rtx
13642 || base == frame_pointer_rtx)
13643 len += 1;
13644 }
13645
13646 return len;
13647 }
13648
13649 /* Compute default value for "length_immediate" attribute. When SHORTFORM
13650 is set, expect that insn have 8bit immediate alternative. */
13651 int
13652 ix86_attr_length_immediate_default (rtx insn, int shortform)
13653 {
13654 int len = 0;
13655 int i;
13656 extract_insn_cached (insn);
13657 for (i = recog_data.n_operands - 1; i >= 0; --i)
13658 if (CONSTANT_P (recog_data.operand[i]))
13659 {
13660 gcc_assert (!len);
13661 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
13662 len = 1;
13663 else
13664 {
13665 switch (get_attr_mode (insn))
13666 {
13667 case MODE_QI:
13668 len+=1;
13669 break;
13670 case MODE_HI:
13671 len+=2;
13672 break;
13673 case MODE_SI:
13674 len+=4;
13675 break;
13676 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
13677 case MODE_DI:
13678 len+=4;
13679 break;
13680 default:
13681 fatal_insn ("unknown insn mode", insn);
13682 }
13683 }
13684 }
13685 return len;
13686 }
13687 /* Compute default value for "length_address" attribute. */
13688 int
13689 ix86_attr_length_address_default (rtx insn)
13690 {
13691 int i;
13692
13693 if (get_attr_type (insn) == TYPE_LEA)
13694 {
13695 rtx set = PATTERN (insn);
13696
13697 if (GET_CODE (set) == PARALLEL)
13698 set = XVECEXP (set, 0, 0);
13699
13700 gcc_assert (GET_CODE (set) == SET);
13701
13702 return memory_address_length (SET_SRC (set));
13703 }
13704
13705 extract_insn_cached (insn);
13706 for (i = recog_data.n_operands - 1; i >= 0; --i)
13707 if (GET_CODE (recog_data.operand[i]) == MEM)
13708 {
13709 return memory_address_length (XEXP (recog_data.operand[i], 0));
13710 break;
13711 }
13712 return 0;
13713 }
13714 \f
13715 /* Return the maximum number of instructions a cpu can issue. */
13716
13717 static int
13718 ix86_issue_rate (void)
13719 {
13720 switch (ix86_tune)
13721 {
13722 case PROCESSOR_PENTIUM:
13723 case PROCESSOR_K6:
13724 return 2;
13725
13726 case PROCESSOR_PENTIUMPRO:
13727 case PROCESSOR_PENTIUM4:
13728 case PROCESSOR_ATHLON:
13729 case PROCESSOR_K8:
13730 case PROCESSOR_NOCONA:
13731 case PROCESSOR_GENERIC32:
13732 case PROCESSOR_GENERIC64:
13733 return 3;
13734
13735 default:
13736 return 1;
13737 }
13738 }
13739
13740 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
13741 by DEP_INSN and nothing set by DEP_INSN. */
13742
13743 static int
13744 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13745 {
13746 rtx set, set2;
13747
13748 /* Simplify the test for uninteresting insns. */
13749 if (insn_type != TYPE_SETCC
13750 && insn_type != TYPE_ICMOV
13751 && insn_type != TYPE_FCMOV
13752 && insn_type != TYPE_IBR)
13753 return 0;
13754
13755 if ((set = single_set (dep_insn)) != 0)
13756 {
13757 set = SET_DEST (set);
13758 set2 = NULL_RTX;
13759 }
13760 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
13761 && XVECLEN (PATTERN (dep_insn), 0) == 2
13762 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
13763 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
13764 {
13765 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13766 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13767 }
13768 else
13769 return 0;
13770
13771 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
13772 return 0;
13773
13774 /* This test is true if the dependent insn reads the flags but
13775 not any other potentially set register. */
13776 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
13777 return 0;
13778
13779 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
13780 return 0;
13781
13782 return 1;
13783 }
13784
13785 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
13786 address with operands set by DEP_INSN. */
13787
13788 static int
13789 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13790 {
13791 rtx addr;
13792
13793 if (insn_type == TYPE_LEA
13794 && TARGET_PENTIUM)
13795 {
13796 addr = PATTERN (insn);
13797
13798 if (GET_CODE (addr) == PARALLEL)
13799 addr = XVECEXP (addr, 0, 0);
13800
13801 gcc_assert (GET_CODE (addr) == SET);
13802
13803 addr = SET_SRC (addr);
13804 }
13805 else
13806 {
13807 int i;
13808 extract_insn_cached (insn);
13809 for (i = recog_data.n_operands - 1; i >= 0; --i)
13810 if (GET_CODE (recog_data.operand[i]) == MEM)
13811 {
13812 addr = XEXP (recog_data.operand[i], 0);
13813 goto found;
13814 }
13815 return 0;
13816 found:;
13817 }
13818
13819 return modified_in_p (addr, dep_insn);
13820 }
13821
13822 static int
13823 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
13824 {
13825 enum attr_type insn_type, dep_insn_type;
13826 enum attr_memory memory;
13827 rtx set, set2;
13828 int dep_insn_code_number;
13829
13830 /* Anti and output dependencies have zero cost on all CPUs. */
13831 if (REG_NOTE_KIND (link) != 0)
13832 return 0;
13833
13834 dep_insn_code_number = recog_memoized (dep_insn);
13835
13836 /* If we can't recognize the insns, we can't really do anything. */
13837 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
13838 return cost;
13839
13840 insn_type = get_attr_type (insn);
13841 dep_insn_type = get_attr_type (dep_insn);
13842
13843 switch (ix86_tune)
13844 {
13845 case PROCESSOR_PENTIUM:
13846 /* Address Generation Interlock adds a cycle of latency. */
13847 if (ix86_agi_dependent (insn, dep_insn, insn_type))
13848 cost += 1;
13849
13850 /* ??? Compares pair with jump/setcc. */
13851 if (ix86_flags_dependent (insn, dep_insn, insn_type))
13852 cost = 0;
13853
13854 /* Floating point stores require value to be ready one cycle earlier. */
13855 if (insn_type == TYPE_FMOV
13856 && get_attr_memory (insn) == MEMORY_STORE
13857 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13858 cost += 1;
13859 break;
13860
13861 case PROCESSOR_PENTIUMPRO:
13862 memory = get_attr_memory (insn);
13863
13864 /* INT->FP conversion is expensive. */
13865 if (get_attr_fp_int_src (dep_insn))
13866 cost += 5;
13867
13868 /* There is one cycle extra latency between an FP op and a store. */
13869 if (insn_type == TYPE_FMOV
13870 && (set = single_set (dep_insn)) != NULL_RTX
13871 && (set2 = single_set (insn)) != NULL_RTX
13872 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
13873 && GET_CODE (SET_DEST (set2)) == MEM)
13874 cost += 1;
13875
13876 /* Show ability of reorder buffer to hide latency of load by executing
13877 in parallel with previous instruction in case
13878 previous instruction is not needed to compute the address. */
13879 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13880 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13881 {
13882 /* Claim moves to take one cycle, as core can issue one load
13883 at time and the next load can start cycle later. */
13884 if (dep_insn_type == TYPE_IMOV
13885 || dep_insn_type == TYPE_FMOV)
13886 cost = 1;
13887 else if (cost > 1)
13888 cost--;
13889 }
13890 break;
13891
13892 case PROCESSOR_K6:
13893 memory = get_attr_memory (insn);
13894
13895 /* The esp dependency is resolved before the instruction is really
13896 finished. */
13897 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
13898 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
13899 return 1;
13900
13901 /* INT->FP conversion is expensive. */
13902 if (get_attr_fp_int_src (dep_insn))
13903 cost += 5;
13904
13905 /* Show ability of reorder buffer to hide latency of load by executing
13906 in parallel with previous instruction in case
13907 previous instruction is not needed to compute the address. */
13908 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13909 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13910 {
13911 /* Claim moves to take one cycle, as core can issue one load
13912 at time and the next load can start cycle later. */
13913 if (dep_insn_type == TYPE_IMOV
13914 || dep_insn_type == TYPE_FMOV)
13915 cost = 1;
13916 else if (cost > 2)
13917 cost -= 2;
13918 else
13919 cost = 1;
13920 }
13921 break;
13922
13923 case PROCESSOR_ATHLON:
13924 case PROCESSOR_K8:
13925 case PROCESSOR_GENERIC32:
13926 case PROCESSOR_GENERIC64:
13927 memory = get_attr_memory (insn);
13928
13929 /* Show ability of reorder buffer to hide latency of load by executing
13930 in parallel with previous instruction in case
13931 previous instruction is not needed to compute the address. */
13932 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13933 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13934 {
13935 enum attr_unit unit = get_attr_unit (insn);
13936 int loadcost = 3;
13937
13938 /* Because of the difference between the length of integer and
13939 floating unit pipeline preparation stages, the memory operands
13940 for floating point are cheaper.
13941
13942 ??? For Athlon it the difference is most probably 2. */
13943 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
13944 loadcost = 3;
13945 else
13946 loadcost = TARGET_ATHLON ? 2 : 0;
13947
13948 if (cost >= loadcost)
13949 cost -= loadcost;
13950 else
13951 cost = 0;
13952 }
13953
13954 default:
13955 break;
13956 }
13957
13958 return cost;
13959 }
13960
13961 /* How many alternative schedules to try. This should be as wide as the
13962 scheduling freedom in the DFA, but no wider. Making this value too
13963 large results extra work for the scheduler. */
13964
13965 static int
13966 ia32_multipass_dfa_lookahead (void)
13967 {
13968 if (ix86_tune == PROCESSOR_PENTIUM)
13969 return 2;
13970
13971 if (ix86_tune == PROCESSOR_PENTIUMPRO
13972 || ix86_tune == PROCESSOR_K6)
13973 return 1;
13974
13975 else
13976 return 0;
13977 }
13978
13979 \f
13980 /* Compute the alignment given to a constant that is being placed in memory.
13981 EXP is the constant and ALIGN is the alignment that the object would
13982 ordinarily have.
13983 The value of this function is used instead of that alignment to align
13984 the object. */
13985
13986 int
13987 ix86_constant_alignment (tree exp, int align)
13988 {
13989 if (TREE_CODE (exp) == REAL_CST)
13990 {
13991 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
13992 return 64;
13993 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
13994 return 128;
13995 }
13996 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
13997 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
13998 return BITS_PER_WORD;
13999
14000 return align;
14001 }
14002
14003 /* Compute the alignment for a static variable.
14004 TYPE is the data type, and ALIGN is the alignment that
14005 the object would ordinarily have. The value of this function is used
14006 instead of that alignment to align the object. */
14007
14008 int
14009 ix86_data_alignment (tree type, int align)
14010 {
14011 int max_align = optimize_size ? BITS_PER_WORD : 256;
14012
14013 if (AGGREGATE_TYPE_P (type)
14014 && TYPE_SIZE (type)
14015 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14016 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
14017 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
14018 && align < max_align)
14019 align = max_align;
14020
14021 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14022 to 16byte boundary. */
14023 if (TARGET_64BIT)
14024 {
14025 if (AGGREGATE_TYPE_P (type)
14026 && TYPE_SIZE (type)
14027 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14028 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
14029 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14030 return 128;
14031 }
14032
14033 if (TREE_CODE (type) == ARRAY_TYPE)
14034 {
14035 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14036 return 64;
14037 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14038 return 128;
14039 }
14040 else if (TREE_CODE (type) == COMPLEX_TYPE)
14041 {
14042
14043 if (TYPE_MODE (type) == DCmode && align < 64)
14044 return 64;
14045 if (TYPE_MODE (type) == XCmode && align < 128)
14046 return 128;
14047 }
14048 else if ((TREE_CODE (type) == RECORD_TYPE
14049 || TREE_CODE (type) == UNION_TYPE
14050 || TREE_CODE (type) == QUAL_UNION_TYPE)
14051 && TYPE_FIELDS (type))
14052 {
14053 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14054 return 64;
14055 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14056 return 128;
14057 }
14058 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14059 || TREE_CODE (type) == INTEGER_TYPE)
14060 {
14061 if (TYPE_MODE (type) == DFmode && align < 64)
14062 return 64;
14063 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14064 return 128;
14065 }
14066
14067 return align;
14068 }
14069
14070 /* Compute the alignment for a local variable.
14071 TYPE is the data type, and ALIGN is the alignment that
14072 the object would ordinarily have. The value of this macro is used
14073 instead of that alignment to align the object. */
14074
14075 int
14076 ix86_local_alignment (tree type, int align)
14077 {
14078 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14079 to 16byte boundary. */
14080 if (TARGET_64BIT)
14081 {
14082 if (AGGREGATE_TYPE_P (type)
14083 && TYPE_SIZE (type)
14084 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14085 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
14086 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14087 return 128;
14088 }
14089 if (TREE_CODE (type) == ARRAY_TYPE)
14090 {
14091 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14092 return 64;
14093 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14094 return 128;
14095 }
14096 else if (TREE_CODE (type) == COMPLEX_TYPE)
14097 {
14098 if (TYPE_MODE (type) == DCmode && align < 64)
14099 return 64;
14100 if (TYPE_MODE (type) == XCmode && align < 128)
14101 return 128;
14102 }
14103 else if ((TREE_CODE (type) == RECORD_TYPE
14104 || TREE_CODE (type) == UNION_TYPE
14105 || TREE_CODE (type) == QUAL_UNION_TYPE)
14106 && TYPE_FIELDS (type))
14107 {
14108 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14109 return 64;
14110 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14111 return 128;
14112 }
14113 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14114 || TREE_CODE (type) == INTEGER_TYPE)
14115 {
14116
14117 if (TYPE_MODE (type) == DFmode && align < 64)
14118 return 64;
14119 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14120 return 128;
14121 }
14122 return align;
14123 }
14124 \f
14125 /* Emit RTL insns to initialize the variable parts of a trampoline.
14126 FNADDR is an RTX for the address of the function's pure code.
14127 CXT is an RTX for the static chain value for the function. */
14128 void
14129 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
14130 {
14131 if (!TARGET_64BIT)
14132 {
14133 /* Compute offset from the end of the jmp to the target function. */
14134 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
14135 plus_constant (tramp, 10),
14136 NULL_RTX, 1, OPTAB_DIRECT);
14137 emit_move_insn (gen_rtx_MEM (QImode, tramp),
14138 gen_int_mode (0xb9, QImode));
14139 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
14140 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
14141 gen_int_mode (0xe9, QImode));
14142 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
14143 }
14144 else
14145 {
14146 int offset = 0;
14147 /* Try to load address using shorter movl instead of movabs.
14148 We may want to support movq for kernel mode, but kernel does not use
14149 trampolines at the moment. */
14150 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
14151 {
14152 fnaddr = copy_to_mode_reg (DImode, fnaddr);
14153 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14154 gen_int_mode (0xbb41, HImode));
14155 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
14156 gen_lowpart (SImode, fnaddr));
14157 offset += 6;
14158 }
14159 else
14160 {
14161 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14162 gen_int_mode (0xbb49, HImode));
14163 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14164 fnaddr);
14165 offset += 10;
14166 }
14167 /* Load static chain using movabs to r10. */
14168 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14169 gen_int_mode (0xba49, HImode));
14170 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14171 cxt);
14172 offset += 10;
14173 /* Jump to the r11 */
14174 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14175 gen_int_mode (0xff49, HImode));
14176 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
14177 gen_int_mode (0xe3, QImode));
14178 offset += 3;
14179 gcc_assert (offset <= TRAMPOLINE_SIZE);
14180 }
14181
14182 #ifdef ENABLE_EXECUTE_STACK
14183 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
14184 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
14185 #endif
14186 }
14187 \f
14188 /* Codes for all the SSE/MMX builtins. */
14189 enum ix86_builtins
14190 {
14191 IX86_BUILTIN_ADDPS,
14192 IX86_BUILTIN_ADDSS,
14193 IX86_BUILTIN_DIVPS,
14194 IX86_BUILTIN_DIVSS,
14195 IX86_BUILTIN_MULPS,
14196 IX86_BUILTIN_MULSS,
14197 IX86_BUILTIN_SUBPS,
14198 IX86_BUILTIN_SUBSS,
14199
14200 IX86_BUILTIN_CMPEQPS,
14201 IX86_BUILTIN_CMPLTPS,
14202 IX86_BUILTIN_CMPLEPS,
14203 IX86_BUILTIN_CMPGTPS,
14204 IX86_BUILTIN_CMPGEPS,
14205 IX86_BUILTIN_CMPNEQPS,
14206 IX86_BUILTIN_CMPNLTPS,
14207 IX86_BUILTIN_CMPNLEPS,
14208 IX86_BUILTIN_CMPNGTPS,
14209 IX86_BUILTIN_CMPNGEPS,
14210 IX86_BUILTIN_CMPORDPS,
14211 IX86_BUILTIN_CMPUNORDPS,
14212 IX86_BUILTIN_CMPEQSS,
14213 IX86_BUILTIN_CMPLTSS,
14214 IX86_BUILTIN_CMPLESS,
14215 IX86_BUILTIN_CMPNEQSS,
14216 IX86_BUILTIN_CMPNLTSS,
14217 IX86_BUILTIN_CMPNLESS,
14218 IX86_BUILTIN_CMPNGTSS,
14219 IX86_BUILTIN_CMPNGESS,
14220 IX86_BUILTIN_CMPORDSS,
14221 IX86_BUILTIN_CMPUNORDSS,
14222
14223 IX86_BUILTIN_COMIEQSS,
14224 IX86_BUILTIN_COMILTSS,
14225 IX86_BUILTIN_COMILESS,
14226 IX86_BUILTIN_COMIGTSS,
14227 IX86_BUILTIN_COMIGESS,
14228 IX86_BUILTIN_COMINEQSS,
14229 IX86_BUILTIN_UCOMIEQSS,
14230 IX86_BUILTIN_UCOMILTSS,
14231 IX86_BUILTIN_UCOMILESS,
14232 IX86_BUILTIN_UCOMIGTSS,
14233 IX86_BUILTIN_UCOMIGESS,
14234 IX86_BUILTIN_UCOMINEQSS,
14235
14236 IX86_BUILTIN_CVTPI2PS,
14237 IX86_BUILTIN_CVTPS2PI,
14238 IX86_BUILTIN_CVTSI2SS,
14239 IX86_BUILTIN_CVTSI642SS,
14240 IX86_BUILTIN_CVTSS2SI,
14241 IX86_BUILTIN_CVTSS2SI64,
14242 IX86_BUILTIN_CVTTPS2PI,
14243 IX86_BUILTIN_CVTTSS2SI,
14244 IX86_BUILTIN_CVTTSS2SI64,
14245
14246 IX86_BUILTIN_MAXPS,
14247 IX86_BUILTIN_MAXSS,
14248 IX86_BUILTIN_MINPS,
14249 IX86_BUILTIN_MINSS,
14250
14251 IX86_BUILTIN_LOADUPS,
14252 IX86_BUILTIN_STOREUPS,
14253 IX86_BUILTIN_MOVSS,
14254
14255 IX86_BUILTIN_MOVHLPS,
14256 IX86_BUILTIN_MOVLHPS,
14257 IX86_BUILTIN_LOADHPS,
14258 IX86_BUILTIN_LOADLPS,
14259 IX86_BUILTIN_STOREHPS,
14260 IX86_BUILTIN_STORELPS,
14261
14262 IX86_BUILTIN_MASKMOVQ,
14263 IX86_BUILTIN_MOVMSKPS,
14264 IX86_BUILTIN_PMOVMSKB,
14265
14266 IX86_BUILTIN_MOVNTPS,
14267 IX86_BUILTIN_MOVNTQ,
14268
14269 IX86_BUILTIN_LOADDQU,
14270 IX86_BUILTIN_STOREDQU,
14271
14272 IX86_BUILTIN_PACKSSWB,
14273 IX86_BUILTIN_PACKSSDW,
14274 IX86_BUILTIN_PACKUSWB,
14275
14276 IX86_BUILTIN_PADDB,
14277 IX86_BUILTIN_PADDW,
14278 IX86_BUILTIN_PADDD,
14279 IX86_BUILTIN_PADDQ,
14280 IX86_BUILTIN_PADDSB,
14281 IX86_BUILTIN_PADDSW,
14282 IX86_BUILTIN_PADDUSB,
14283 IX86_BUILTIN_PADDUSW,
14284 IX86_BUILTIN_PSUBB,
14285 IX86_BUILTIN_PSUBW,
14286 IX86_BUILTIN_PSUBD,
14287 IX86_BUILTIN_PSUBQ,
14288 IX86_BUILTIN_PSUBSB,
14289 IX86_BUILTIN_PSUBSW,
14290 IX86_BUILTIN_PSUBUSB,
14291 IX86_BUILTIN_PSUBUSW,
14292
14293 IX86_BUILTIN_PAND,
14294 IX86_BUILTIN_PANDN,
14295 IX86_BUILTIN_POR,
14296 IX86_BUILTIN_PXOR,
14297
14298 IX86_BUILTIN_PAVGB,
14299 IX86_BUILTIN_PAVGW,
14300
14301 IX86_BUILTIN_PCMPEQB,
14302 IX86_BUILTIN_PCMPEQW,
14303 IX86_BUILTIN_PCMPEQD,
14304 IX86_BUILTIN_PCMPGTB,
14305 IX86_BUILTIN_PCMPGTW,
14306 IX86_BUILTIN_PCMPGTD,
14307
14308 IX86_BUILTIN_PMADDWD,
14309
14310 IX86_BUILTIN_PMAXSW,
14311 IX86_BUILTIN_PMAXUB,
14312 IX86_BUILTIN_PMINSW,
14313 IX86_BUILTIN_PMINUB,
14314
14315 IX86_BUILTIN_PMULHUW,
14316 IX86_BUILTIN_PMULHW,
14317 IX86_BUILTIN_PMULLW,
14318
14319 IX86_BUILTIN_PSADBW,
14320 IX86_BUILTIN_PSHUFW,
14321
14322 IX86_BUILTIN_PSLLW,
14323 IX86_BUILTIN_PSLLD,
14324 IX86_BUILTIN_PSLLQ,
14325 IX86_BUILTIN_PSRAW,
14326 IX86_BUILTIN_PSRAD,
14327 IX86_BUILTIN_PSRLW,
14328 IX86_BUILTIN_PSRLD,
14329 IX86_BUILTIN_PSRLQ,
14330 IX86_BUILTIN_PSLLWI,
14331 IX86_BUILTIN_PSLLDI,
14332 IX86_BUILTIN_PSLLQI,
14333 IX86_BUILTIN_PSRAWI,
14334 IX86_BUILTIN_PSRADI,
14335 IX86_BUILTIN_PSRLWI,
14336 IX86_BUILTIN_PSRLDI,
14337 IX86_BUILTIN_PSRLQI,
14338
14339 IX86_BUILTIN_PUNPCKHBW,
14340 IX86_BUILTIN_PUNPCKHWD,
14341 IX86_BUILTIN_PUNPCKHDQ,
14342 IX86_BUILTIN_PUNPCKLBW,
14343 IX86_BUILTIN_PUNPCKLWD,
14344 IX86_BUILTIN_PUNPCKLDQ,
14345
14346 IX86_BUILTIN_SHUFPS,
14347
14348 IX86_BUILTIN_RCPPS,
14349 IX86_BUILTIN_RCPSS,
14350 IX86_BUILTIN_RSQRTPS,
14351 IX86_BUILTIN_RSQRTSS,
14352 IX86_BUILTIN_SQRTPS,
14353 IX86_BUILTIN_SQRTSS,
14354
14355 IX86_BUILTIN_UNPCKHPS,
14356 IX86_BUILTIN_UNPCKLPS,
14357
14358 IX86_BUILTIN_ANDPS,
14359 IX86_BUILTIN_ANDNPS,
14360 IX86_BUILTIN_ORPS,
14361 IX86_BUILTIN_XORPS,
14362
14363 IX86_BUILTIN_EMMS,
14364 IX86_BUILTIN_LDMXCSR,
14365 IX86_BUILTIN_STMXCSR,
14366 IX86_BUILTIN_SFENCE,
14367
14368 /* 3DNow! Original */
14369 IX86_BUILTIN_FEMMS,
14370 IX86_BUILTIN_PAVGUSB,
14371 IX86_BUILTIN_PF2ID,
14372 IX86_BUILTIN_PFACC,
14373 IX86_BUILTIN_PFADD,
14374 IX86_BUILTIN_PFCMPEQ,
14375 IX86_BUILTIN_PFCMPGE,
14376 IX86_BUILTIN_PFCMPGT,
14377 IX86_BUILTIN_PFMAX,
14378 IX86_BUILTIN_PFMIN,
14379 IX86_BUILTIN_PFMUL,
14380 IX86_BUILTIN_PFRCP,
14381 IX86_BUILTIN_PFRCPIT1,
14382 IX86_BUILTIN_PFRCPIT2,
14383 IX86_BUILTIN_PFRSQIT1,
14384 IX86_BUILTIN_PFRSQRT,
14385 IX86_BUILTIN_PFSUB,
14386 IX86_BUILTIN_PFSUBR,
14387 IX86_BUILTIN_PI2FD,
14388 IX86_BUILTIN_PMULHRW,
14389
14390 /* 3DNow! Athlon Extensions */
14391 IX86_BUILTIN_PF2IW,
14392 IX86_BUILTIN_PFNACC,
14393 IX86_BUILTIN_PFPNACC,
14394 IX86_BUILTIN_PI2FW,
14395 IX86_BUILTIN_PSWAPDSI,
14396 IX86_BUILTIN_PSWAPDSF,
14397
14398 /* SSE2 */
14399 IX86_BUILTIN_ADDPD,
14400 IX86_BUILTIN_ADDSD,
14401 IX86_BUILTIN_DIVPD,
14402 IX86_BUILTIN_DIVSD,
14403 IX86_BUILTIN_MULPD,
14404 IX86_BUILTIN_MULSD,
14405 IX86_BUILTIN_SUBPD,
14406 IX86_BUILTIN_SUBSD,
14407
14408 IX86_BUILTIN_CMPEQPD,
14409 IX86_BUILTIN_CMPLTPD,
14410 IX86_BUILTIN_CMPLEPD,
14411 IX86_BUILTIN_CMPGTPD,
14412 IX86_BUILTIN_CMPGEPD,
14413 IX86_BUILTIN_CMPNEQPD,
14414 IX86_BUILTIN_CMPNLTPD,
14415 IX86_BUILTIN_CMPNLEPD,
14416 IX86_BUILTIN_CMPNGTPD,
14417 IX86_BUILTIN_CMPNGEPD,
14418 IX86_BUILTIN_CMPORDPD,
14419 IX86_BUILTIN_CMPUNORDPD,
14420 IX86_BUILTIN_CMPNEPD,
14421 IX86_BUILTIN_CMPEQSD,
14422 IX86_BUILTIN_CMPLTSD,
14423 IX86_BUILTIN_CMPLESD,
14424 IX86_BUILTIN_CMPNEQSD,
14425 IX86_BUILTIN_CMPNLTSD,
14426 IX86_BUILTIN_CMPNLESD,
14427 IX86_BUILTIN_CMPORDSD,
14428 IX86_BUILTIN_CMPUNORDSD,
14429 IX86_BUILTIN_CMPNESD,
14430
14431 IX86_BUILTIN_COMIEQSD,
14432 IX86_BUILTIN_COMILTSD,
14433 IX86_BUILTIN_COMILESD,
14434 IX86_BUILTIN_COMIGTSD,
14435 IX86_BUILTIN_COMIGESD,
14436 IX86_BUILTIN_COMINEQSD,
14437 IX86_BUILTIN_UCOMIEQSD,
14438 IX86_BUILTIN_UCOMILTSD,
14439 IX86_BUILTIN_UCOMILESD,
14440 IX86_BUILTIN_UCOMIGTSD,
14441 IX86_BUILTIN_UCOMIGESD,
14442 IX86_BUILTIN_UCOMINEQSD,
14443
14444 IX86_BUILTIN_MAXPD,
14445 IX86_BUILTIN_MAXSD,
14446 IX86_BUILTIN_MINPD,
14447 IX86_BUILTIN_MINSD,
14448
14449 IX86_BUILTIN_ANDPD,
14450 IX86_BUILTIN_ANDNPD,
14451 IX86_BUILTIN_ORPD,
14452 IX86_BUILTIN_XORPD,
14453
14454 IX86_BUILTIN_SQRTPD,
14455 IX86_BUILTIN_SQRTSD,
14456
14457 IX86_BUILTIN_UNPCKHPD,
14458 IX86_BUILTIN_UNPCKLPD,
14459
14460 IX86_BUILTIN_SHUFPD,
14461
14462 IX86_BUILTIN_LOADUPD,
14463 IX86_BUILTIN_STOREUPD,
14464 IX86_BUILTIN_MOVSD,
14465
14466 IX86_BUILTIN_LOADHPD,
14467 IX86_BUILTIN_LOADLPD,
14468
14469 IX86_BUILTIN_CVTDQ2PD,
14470 IX86_BUILTIN_CVTDQ2PS,
14471
14472 IX86_BUILTIN_CVTPD2DQ,
14473 IX86_BUILTIN_CVTPD2PI,
14474 IX86_BUILTIN_CVTPD2PS,
14475 IX86_BUILTIN_CVTTPD2DQ,
14476 IX86_BUILTIN_CVTTPD2PI,
14477
14478 IX86_BUILTIN_CVTPI2PD,
14479 IX86_BUILTIN_CVTSI2SD,
14480 IX86_BUILTIN_CVTSI642SD,
14481
14482 IX86_BUILTIN_CVTSD2SI,
14483 IX86_BUILTIN_CVTSD2SI64,
14484 IX86_BUILTIN_CVTSD2SS,
14485 IX86_BUILTIN_CVTSS2SD,
14486 IX86_BUILTIN_CVTTSD2SI,
14487 IX86_BUILTIN_CVTTSD2SI64,
14488
14489 IX86_BUILTIN_CVTPS2DQ,
14490 IX86_BUILTIN_CVTPS2PD,
14491 IX86_BUILTIN_CVTTPS2DQ,
14492
14493 IX86_BUILTIN_MOVNTI,
14494 IX86_BUILTIN_MOVNTPD,
14495 IX86_BUILTIN_MOVNTDQ,
14496
14497 /* SSE2 MMX */
14498 IX86_BUILTIN_MASKMOVDQU,
14499 IX86_BUILTIN_MOVMSKPD,
14500 IX86_BUILTIN_PMOVMSKB128,
14501
14502 IX86_BUILTIN_PACKSSWB128,
14503 IX86_BUILTIN_PACKSSDW128,
14504 IX86_BUILTIN_PACKUSWB128,
14505
14506 IX86_BUILTIN_PADDB128,
14507 IX86_BUILTIN_PADDW128,
14508 IX86_BUILTIN_PADDD128,
14509 IX86_BUILTIN_PADDQ128,
14510 IX86_BUILTIN_PADDSB128,
14511 IX86_BUILTIN_PADDSW128,
14512 IX86_BUILTIN_PADDUSB128,
14513 IX86_BUILTIN_PADDUSW128,
14514 IX86_BUILTIN_PSUBB128,
14515 IX86_BUILTIN_PSUBW128,
14516 IX86_BUILTIN_PSUBD128,
14517 IX86_BUILTIN_PSUBQ128,
14518 IX86_BUILTIN_PSUBSB128,
14519 IX86_BUILTIN_PSUBSW128,
14520 IX86_BUILTIN_PSUBUSB128,
14521 IX86_BUILTIN_PSUBUSW128,
14522
14523 IX86_BUILTIN_PAND128,
14524 IX86_BUILTIN_PANDN128,
14525 IX86_BUILTIN_POR128,
14526 IX86_BUILTIN_PXOR128,
14527
14528 IX86_BUILTIN_PAVGB128,
14529 IX86_BUILTIN_PAVGW128,
14530
14531 IX86_BUILTIN_PCMPEQB128,
14532 IX86_BUILTIN_PCMPEQW128,
14533 IX86_BUILTIN_PCMPEQD128,
14534 IX86_BUILTIN_PCMPGTB128,
14535 IX86_BUILTIN_PCMPGTW128,
14536 IX86_BUILTIN_PCMPGTD128,
14537
14538 IX86_BUILTIN_PMADDWD128,
14539
14540 IX86_BUILTIN_PMAXSW128,
14541 IX86_BUILTIN_PMAXUB128,
14542 IX86_BUILTIN_PMINSW128,
14543 IX86_BUILTIN_PMINUB128,
14544
14545 IX86_BUILTIN_PMULUDQ,
14546 IX86_BUILTIN_PMULUDQ128,
14547 IX86_BUILTIN_PMULHUW128,
14548 IX86_BUILTIN_PMULHW128,
14549 IX86_BUILTIN_PMULLW128,
14550
14551 IX86_BUILTIN_PSADBW128,
14552 IX86_BUILTIN_PSHUFHW,
14553 IX86_BUILTIN_PSHUFLW,
14554 IX86_BUILTIN_PSHUFD,
14555
14556 IX86_BUILTIN_PSLLW128,
14557 IX86_BUILTIN_PSLLD128,
14558 IX86_BUILTIN_PSLLQ128,
14559 IX86_BUILTIN_PSRAW128,
14560 IX86_BUILTIN_PSRAD128,
14561 IX86_BUILTIN_PSRLW128,
14562 IX86_BUILTIN_PSRLD128,
14563 IX86_BUILTIN_PSRLQ128,
14564 IX86_BUILTIN_PSLLDQI128,
14565 IX86_BUILTIN_PSLLWI128,
14566 IX86_BUILTIN_PSLLDI128,
14567 IX86_BUILTIN_PSLLQI128,
14568 IX86_BUILTIN_PSRAWI128,
14569 IX86_BUILTIN_PSRADI128,
14570 IX86_BUILTIN_PSRLDQI128,
14571 IX86_BUILTIN_PSRLWI128,
14572 IX86_BUILTIN_PSRLDI128,
14573 IX86_BUILTIN_PSRLQI128,
14574
14575 IX86_BUILTIN_PUNPCKHBW128,
14576 IX86_BUILTIN_PUNPCKHWD128,
14577 IX86_BUILTIN_PUNPCKHDQ128,
14578 IX86_BUILTIN_PUNPCKHQDQ128,
14579 IX86_BUILTIN_PUNPCKLBW128,
14580 IX86_BUILTIN_PUNPCKLWD128,
14581 IX86_BUILTIN_PUNPCKLDQ128,
14582 IX86_BUILTIN_PUNPCKLQDQ128,
14583
14584 IX86_BUILTIN_CLFLUSH,
14585 IX86_BUILTIN_MFENCE,
14586 IX86_BUILTIN_LFENCE,
14587
14588 /* Prescott New Instructions. */
14589 IX86_BUILTIN_ADDSUBPS,
14590 IX86_BUILTIN_HADDPS,
14591 IX86_BUILTIN_HSUBPS,
14592 IX86_BUILTIN_MOVSHDUP,
14593 IX86_BUILTIN_MOVSLDUP,
14594 IX86_BUILTIN_ADDSUBPD,
14595 IX86_BUILTIN_HADDPD,
14596 IX86_BUILTIN_HSUBPD,
14597 IX86_BUILTIN_LDDQU,
14598
14599 IX86_BUILTIN_MONITOR,
14600 IX86_BUILTIN_MWAIT,
14601
14602 /* SSSE3. */
14603 IX86_BUILTIN_PHADDW,
14604 IX86_BUILTIN_PHADDD,
14605 IX86_BUILTIN_PHADDSW,
14606 IX86_BUILTIN_PHSUBW,
14607 IX86_BUILTIN_PHSUBD,
14608 IX86_BUILTIN_PHSUBSW,
14609 IX86_BUILTIN_PMADDUBSW,
14610 IX86_BUILTIN_PMULHRSW,
14611 IX86_BUILTIN_PSHUFB,
14612 IX86_BUILTIN_PSIGNB,
14613 IX86_BUILTIN_PSIGNW,
14614 IX86_BUILTIN_PSIGND,
14615 IX86_BUILTIN_PALIGNR,
14616 IX86_BUILTIN_PABSB,
14617 IX86_BUILTIN_PABSW,
14618 IX86_BUILTIN_PABSD,
14619
14620 IX86_BUILTIN_PHADDW128,
14621 IX86_BUILTIN_PHADDD128,
14622 IX86_BUILTIN_PHADDSW128,
14623 IX86_BUILTIN_PHSUBW128,
14624 IX86_BUILTIN_PHSUBD128,
14625 IX86_BUILTIN_PHSUBSW128,
14626 IX86_BUILTIN_PMADDUBSW128,
14627 IX86_BUILTIN_PMULHRSW128,
14628 IX86_BUILTIN_PSHUFB128,
14629 IX86_BUILTIN_PSIGNB128,
14630 IX86_BUILTIN_PSIGNW128,
14631 IX86_BUILTIN_PSIGND128,
14632 IX86_BUILTIN_PALIGNR128,
14633 IX86_BUILTIN_PABSB128,
14634 IX86_BUILTIN_PABSW128,
14635 IX86_BUILTIN_PABSD128,
14636
14637 IX86_BUILTIN_VEC_INIT_V2SI,
14638 IX86_BUILTIN_VEC_INIT_V4HI,
14639 IX86_BUILTIN_VEC_INIT_V8QI,
14640 IX86_BUILTIN_VEC_EXT_V2DF,
14641 IX86_BUILTIN_VEC_EXT_V2DI,
14642 IX86_BUILTIN_VEC_EXT_V4SF,
14643 IX86_BUILTIN_VEC_EXT_V4SI,
14644 IX86_BUILTIN_VEC_EXT_V8HI,
14645 IX86_BUILTIN_VEC_EXT_V2SI,
14646 IX86_BUILTIN_VEC_EXT_V4HI,
14647 IX86_BUILTIN_VEC_SET_V8HI,
14648 IX86_BUILTIN_VEC_SET_V4HI,
14649
14650 IX86_BUILTIN_MAX
14651 };
14652
14653 #define def_builtin(MASK, NAME, TYPE, CODE) \
14654 do { \
14655 if ((MASK) & target_flags \
14656 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
14657 add_builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
14658 NULL, NULL_TREE); \
14659 } while (0)
14660
14661 /* Bits for builtin_description.flag. */
14662
14663 /* Set when we don't support the comparison natively, and should
14664 swap_comparison in order to support it. */
14665 #define BUILTIN_DESC_SWAP_OPERANDS 1
14666
14667 struct builtin_description
14668 {
14669 const unsigned int mask;
14670 const enum insn_code icode;
14671 const char *const name;
14672 const enum ix86_builtins code;
14673 const enum rtx_code comparison;
14674 const unsigned int flag;
14675 };
14676
14677 static const struct builtin_description bdesc_comi[] =
14678 {
14679 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
14680 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
14681 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
14682 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
14683 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
14684 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
14685 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
14686 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
14687 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
14688 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
14689 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
14690 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
14691 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
14692 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
14693 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
14694 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
14695 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
14696 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
14697 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
14698 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
14699 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
14700 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
14701 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
14702 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
14703 };
14704
14705 static const struct builtin_description bdesc_2arg[] =
14706 {
14707 /* SSE */
14708 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
14709 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
14710 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
14711 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
14712 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
14713 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
14714 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
14715 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
14716
14717 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
14718 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
14719 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
14720 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
14721 BUILTIN_DESC_SWAP_OPERANDS },
14722 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
14723 BUILTIN_DESC_SWAP_OPERANDS },
14724 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
14725 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
14726 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
14727 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
14728 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
14729 BUILTIN_DESC_SWAP_OPERANDS },
14730 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
14731 BUILTIN_DESC_SWAP_OPERANDS },
14732 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
14733 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
14734 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
14735 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
14736 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
14737 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
14738 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
14739 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
14740 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
14741 BUILTIN_DESC_SWAP_OPERANDS },
14742 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
14743 BUILTIN_DESC_SWAP_OPERANDS },
14744 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
14745
14746 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
14747 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
14748 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
14749 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
14750
14751 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
14752 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
14753 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
14754 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
14755
14756 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
14757 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
14758 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
14759 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
14760 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
14761
14762 /* MMX */
14763 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
14764 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
14765 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
14766 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
14767 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
14768 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
14769 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
14770 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
14771
14772 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
14773 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
14774 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
14775 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
14776 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
14777 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
14778 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
14779 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
14780
14781 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
14782 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
14783 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
14784
14785 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
14786 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
14787 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
14788 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
14789
14790 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
14791 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
14792
14793 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
14794 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
14795 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
14796 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
14797 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
14798 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
14799
14800 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
14801 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
14802 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
14803 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
14804
14805 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
14806 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
14807 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
14808 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
14809 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
14810 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
14811
14812 /* Special. */
14813 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
14814 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
14815 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
14816
14817 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
14818 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
14819 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
14820
14821 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
14822 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
14823 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
14824 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
14825 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
14826 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
14827
14828 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
14829 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
14830 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
14831 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
14832 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
14833 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
14834
14835 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
14836 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
14837 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
14838 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
14839
14840 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
14841 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
14842
14843 /* SSE2 */
14844 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
14845 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
14846 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
14847 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
14848 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
14849 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
14850 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
14851 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
14852
14853 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
14854 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
14855 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
14856 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
14857 BUILTIN_DESC_SWAP_OPERANDS },
14858 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
14859 BUILTIN_DESC_SWAP_OPERANDS },
14860 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
14861 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
14862 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
14863 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
14864 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
14865 BUILTIN_DESC_SWAP_OPERANDS },
14866 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
14867 BUILTIN_DESC_SWAP_OPERANDS },
14868 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
14869 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
14870 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
14871 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
14872 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
14873 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
14874 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
14875 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
14876 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
14877
14878 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
14879 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
14880 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
14881 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
14882
14883 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
14884 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
14885 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
14886 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
14887
14888 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
14889 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
14890 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
14891
14892 /* SSE2 MMX */
14893 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
14894 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
14895 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
14896 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
14897 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
14898 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
14899 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
14900 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
14901
14902 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
14903 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
14904 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
14905 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
14906 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
14907 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
14908 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
14909 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
14910
14911 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
14912 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
14913
14914 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
14915 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
14916 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
14917 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
14918
14919 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
14920 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
14921
14922 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
14923 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
14924 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
14925 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
14926 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
14927 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
14928
14929 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
14930 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
14931 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
14932 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
14933
14934 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
14935 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
14936 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
14937 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
14938 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
14939 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
14940 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
14941 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
14942
14943 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
14944 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
14945 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
14946
14947 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
14948 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
14949
14950 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
14951 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
14952
14953 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
14954 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
14955 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
14956
14957 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
14958 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
14959 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
14960
14961 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
14962 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
14963
14964 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
14965
14966 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
14967 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
14968 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
14969 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
14970
14971 /* SSE3 MMX */
14972 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
14973 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
14974 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
14975 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
14976 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
14977 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
14978
14979 /* SSSE3 */
14980 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
14981 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
14982 { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
14983 { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
14984 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
14985 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
14986 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
14987 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
14988 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
14989 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
14990 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
14991 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
14992 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
14993 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
14994 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
14995 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
14996 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
14997 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
14998 { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
14999 { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
15000 { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
15001 { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
15002 { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
15003 { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
15004 };
15005
15006 static const struct builtin_description bdesc_1arg[] =
15007 {
15008 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
15009 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
15010
15011 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
15012 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
15013 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
15014
15015 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
15016 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
15017 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
15018 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
15019 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
15020 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
15021
15022 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
15023 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
15024
15025 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
15026
15027 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
15028 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
15029
15030 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
15031 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
15032 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
15033 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
15034 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
15035
15036 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
15037
15038 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
15039 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
15040 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
15041 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
15042
15043 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
15044 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
15045 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
15046
15047 /* SSE3 */
15048 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
15049 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
15050
15051 /* SSSE3 */
15052 { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
15053 { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
15054 { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
15055 { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
15056 { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
15057 { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
15058 };
15059
15060 static void
15061 ix86_init_builtins (void)
15062 {
15063 if (TARGET_MMX)
15064 ix86_init_mmx_sse_builtins ();
15065 }
15066
15067 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
15068 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
15069 builtins. */
15070 static void
15071 ix86_init_mmx_sse_builtins (void)
15072 {
15073 const struct builtin_description * d;
15074 size_t i;
15075
15076 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
15077 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
15078 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
15079 tree V2DI_type_node
15080 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
15081 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
15082 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
15083 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
15084 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
15085 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
15086 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
15087
15088 tree pchar_type_node = build_pointer_type (char_type_node);
15089 tree pcchar_type_node = build_pointer_type (
15090 build_type_variant (char_type_node, 1, 0));
15091 tree pfloat_type_node = build_pointer_type (float_type_node);
15092 tree pcfloat_type_node = build_pointer_type (
15093 build_type_variant (float_type_node, 1, 0));
15094 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
15095 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
15096 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
15097
15098 /* Comparisons. */
15099 tree int_ftype_v4sf_v4sf
15100 = build_function_type_list (integer_type_node,
15101 V4SF_type_node, V4SF_type_node, NULL_TREE);
15102 tree v4si_ftype_v4sf_v4sf
15103 = build_function_type_list (V4SI_type_node,
15104 V4SF_type_node, V4SF_type_node, NULL_TREE);
15105 /* MMX/SSE/integer conversions. */
15106 tree int_ftype_v4sf
15107 = build_function_type_list (integer_type_node,
15108 V4SF_type_node, NULL_TREE);
15109 tree int64_ftype_v4sf
15110 = build_function_type_list (long_long_integer_type_node,
15111 V4SF_type_node, NULL_TREE);
15112 tree int_ftype_v8qi
15113 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
15114 tree v4sf_ftype_v4sf_int
15115 = build_function_type_list (V4SF_type_node,
15116 V4SF_type_node, integer_type_node, NULL_TREE);
15117 tree v4sf_ftype_v4sf_int64
15118 = build_function_type_list (V4SF_type_node,
15119 V4SF_type_node, long_long_integer_type_node,
15120 NULL_TREE);
15121 tree v4sf_ftype_v4sf_v2si
15122 = build_function_type_list (V4SF_type_node,
15123 V4SF_type_node, V2SI_type_node, NULL_TREE);
15124
15125 /* Miscellaneous. */
15126 tree v8qi_ftype_v4hi_v4hi
15127 = build_function_type_list (V8QI_type_node,
15128 V4HI_type_node, V4HI_type_node, NULL_TREE);
15129 tree v4hi_ftype_v2si_v2si
15130 = build_function_type_list (V4HI_type_node,
15131 V2SI_type_node, V2SI_type_node, NULL_TREE);
15132 tree v4sf_ftype_v4sf_v4sf_int
15133 = build_function_type_list (V4SF_type_node,
15134 V4SF_type_node, V4SF_type_node,
15135 integer_type_node, NULL_TREE);
15136 tree v2si_ftype_v4hi_v4hi
15137 = build_function_type_list (V2SI_type_node,
15138 V4HI_type_node, V4HI_type_node, NULL_TREE);
15139 tree v4hi_ftype_v4hi_int
15140 = build_function_type_list (V4HI_type_node,
15141 V4HI_type_node, integer_type_node, NULL_TREE);
15142 tree v4hi_ftype_v4hi_di
15143 = build_function_type_list (V4HI_type_node,
15144 V4HI_type_node, long_long_unsigned_type_node,
15145 NULL_TREE);
15146 tree v2si_ftype_v2si_di
15147 = build_function_type_list (V2SI_type_node,
15148 V2SI_type_node, long_long_unsigned_type_node,
15149 NULL_TREE);
15150 tree void_ftype_void
15151 = build_function_type (void_type_node, void_list_node);
15152 tree void_ftype_unsigned
15153 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
15154 tree void_ftype_unsigned_unsigned
15155 = build_function_type_list (void_type_node, unsigned_type_node,
15156 unsigned_type_node, NULL_TREE);
15157 tree void_ftype_pcvoid_unsigned_unsigned
15158 = build_function_type_list (void_type_node, const_ptr_type_node,
15159 unsigned_type_node, unsigned_type_node,
15160 NULL_TREE);
15161 tree unsigned_ftype_void
15162 = build_function_type (unsigned_type_node, void_list_node);
15163 tree v2si_ftype_v4sf
15164 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
15165 /* Loads/stores. */
15166 tree void_ftype_v8qi_v8qi_pchar
15167 = build_function_type_list (void_type_node,
15168 V8QI_type_node, V8QI_type_node,
15169 pchar_type_node, NULL_TREE);
15170 tree v4sf_ftype_pcfloat
15171 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
15172 /* @@@ the type is bogus */
15173 tree v4sf_ftype_v4sf_pv2si
15174 = build_function_type_list (V4SF_type_node,
15175 V4SF_type_node, pv2si_type_node, NULL_TREE);
15176 tree void_ftype_pv2si_v4sf
15177 = build_function_type_list (void_type_node,
15178 pv2si_type_node, V4SF_type_node, NULL_TREE);
15179 tree void_ftype_pfloat_v4sf
15180 = build_function_type_list (void_type_node,
15181 pfloat_type_node, V4SF_type_node, NULL_TREE);
15182 tree void_ftype_pdi_di
15183 = build_function_type_list (void_type_node,
15184 pdi_type_node, long_long_unsigned_type_node,
15185 NULL_TREE);
15186 tree void_ftype_pv2di_v2di
15187 = build_function_type_list (void_type_node,
15188 pv2di_type_node, V2DI_type_node, NULL_TREE);
15189 /* Normal vector unops. */
15190 tree v4sf_ftype_v4sf
15191 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
15192 tree v16qi_ftype_v16qi
15193 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
15194 tree v8hi_ftype_v8hi
15195 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
15196 tree v4si_ftype_v4si
15197 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
15198 tree v8qi_ftype_v8qi
15199 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
15200 tree v4hi_ftype_v4hi
15201 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
15202
15203 /* Normal vector binops. */
15204 tree v4sf_ftype_v4sf_v4sf
15205 = build_function_type_list (V4SF_type_node,
15206 V4SF_type_node, V4SF_type_node, NULL_TREE);
15207 tree v8qi_ftype_v8qi_v8qi
15208 = build_function_type_list (V8QI_type_node,
15209 V8QI_type_node, V8QI_type_node, NULL_TREE);
15210 tree v4hi_ftype_v4hi_v4hi
15211 = build_function_type_list (V4HI_type_node,
15212 V4HI_type_node, V4HI_type_node, NULL_TREE);
15213 tree v2si_ftype_v2si_v2si
15214 = build_function_type_list (V2SI_type_node,
15215 V2SI_type_node, V2SI_type_node, NULL_TREE);
15216 tree di_ftype_di_di
15217 = build_function_type_list (long_long_unsigned_type_node,
15218 long_long_unsigned_type_node,
15219 long_long_unsigned_type_node, NULL_TREE);
15220
15221 tree di_ftype_di_di_int
15222 = build_function_type_list (long_long_unsigned_type_node,
15223 long_long_unsigned_type_node,
15224 long_long_unsigned_type_node,
15225 integer_type_node, NULL_TREE);
15226
15227 tree v2si_ftype_v2sf
15228 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
15229 tree v2sf_ftype_v2si
15230 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
15231 tree v2si_ftype_v2si
15232 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
15233 tree v2sf_ftype_v2sf
15234 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
15235 tree v2sf_ftype_v2sf_v2sf
15236 = build_function_type_list (V2SF_type_node,
15237 V2SF_type_node, V2SF_type_node, NULL_TREE);
15238 tree v2si_ftype_v2sf_v2sf
15239 = build_function_type_list (V2SI_type_node,
15240 V2SF_type_node, V2SF_type_node, NULL_TREE);
15241 tree pint_type_node = build_pointer_type (integer_type_node);
15242 tree pdouble_type_node = build_pointer_type (double_type_node);
15243 tree pcdouble_type_node = build_pointer_type (
15244 build_type_variant (double_type_node, 1, 0));
15245 tree int_ftype_v2df_v2df
15246 = build_function_type_list (integer_type_node,
15247 V2DF_type_node, V2DF_type_node, NULL_TREE);
15248
15249 tree void_ftype_pcvoid
15250 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
15251 tree v4sf_ftype_v4si
15252 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
15253 tree v4si_ftype_v4sf
15254 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
15255 tree v2df_ftype_v4si
15256 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
15257 tree v4si_ftype_v2df
15258 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
15259 tree v2si_ftype_v2df
15260 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
15261 tree v4sf_ftype_v2df
15262 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
15263 tree v2df_ftype_v2si
15264 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
15265 tree v2df_ftype_v4sf
15266 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
15267 tree int_ftype_v2df
15268 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
15269 tree int64_ftype_v2df
15270 = build_function_type_list (long_long_integer_type_node,
15271 V2DF_type_node, NULL_TREE);
15272 tree v2df_ftype_v2df_int
15273 = build_function_type_list (V2DF_type_node,
15274 V2DF_type_node, integer_type_node, NULL_TREE);
15275 tree v2df_ftype_v2df_int64
15276 = build_function_type_list (V2DF_type_node,
15277 V2DF_type_node, long_long_integer_type_node,
15278 NULL_TREE);
15279 tree v4sf_ftype_v4sf_v2df
15280 = build_function_type_list (V4SF_type_node,
15281 V4SF_type_node, V2DF_type_node, NULL_TREE);
15282 tree v2df_ftype_v2df_v4sf
15283 = build_function_type_list (V2DF_type_node,
15284 V2DF_type_node, V4SF_type_node, NULL_TREE);
15285 tree v2df_ftype_v2df_v2df_int
15286 = build_function_type_list (V2DF_type_node,
15287 V2DF_type_node, V2DF_type_node,
15288 integer_type_node,
15289 NULL_TREE);
15290 tree v2df_ftype_v2df_pcdouble
15291 = build_function_type_list (V2DF_type_node,
15292 V2DF_type_node, pcdouble_type_node, NULL_TREE);
15293 tree void_ftype_pdouble_v2df
15294 = build_function_type_list (void_type_node,
15295 pdouble_type_node, V2DF_type_node, NULL_TREE);
15296 tree void_ftype_pint_int
15297 = build_function_type_list (void_type_node,
15298 pint_type_node, integer_type_node, NULL_TREE);
15299 tree void_ftype_v16qi_v16qi_pchar
15300 = build_function_type_list (void_type_node,
15301 V16QI_type_node, V16QI_type_node,
15302 pchar_type_node, NULL_TREE);
15303 tree v2df_ftype_pcdouble
15304 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
15305 tree v2df_ftype_v2df_v2df
15306 = build_function_type_list (V2DF_type_node,
15307 V2DF_type_node, V2DF_type_node, NULL_TREE);
15308 tree v16qi_ftype_v16qi_v16qi
15309 = build_function_type_list (V16QI_type_node,
15310 V16QI_type_node, V16QI_type_node, NULL_TREE);
15311 tree v8hi_ftype_v8hi_v8hi
15312 = build_function_type_list (V8HI_type_node,
15313 V8HI_type_node, V8HI_type_node, NULL_TREE);
15314 tree v4si_ftype_v4si_v4si
15315 = build_function_type_list (V4SI_type_node,
15316 V4SI_type_node, V4SI_type_node, NULL_TREE);
15317 tree v2di_ftype_v2di_v2di
15318 = build_function_type_list (V2DI_type_node,
15319 V2DI_type_node, V2DI_type_node, NULL_TREE);
15320 tree v2di_ftype_v2df_v2df
15321 = build_function_type_list (V2DI_type_node,
15322 V2DF_type_node, V2DF_type_node, NULL_TREE);
15323 tree v2df_ftype_v2df
15324 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
15325 tree v2di_ftype_v2di_int
15326 = build_function_type_list (V2DI_type_node,
15327 V2DI_type_node, integer_type_node, NULL_TREE);
15328 tree v2di_ftype_v2di_v2di_int
15329 = build_function_type_list (V2DI_type_node, V2DI_type_node,
15330 V2DI_type_node, integer_type_node, NULL_TREE);
15331 tree v4si_ftype_v4si_int
15332 = build_function_type_list (V4SI_type_node,
15333 V4SI_type_node, integer_type_node, NULL_TREE);
15334 tree v8hi_ftype_v8hi_int
15335 = build_function_type_list (V8HI_type_node,
15336 V8HI_type_node, integer_type_node, NULL_TREE);
15337 tree v8hi_ftype_v8hi_v2di
15338 = build_function_type_list (V8HI_type_node,
15339 V8HI_type_node, V2DI_type_node, NULL_TREE);
15340 tree v4si_ftype_v4si_v2di
15341 = build_function_type_list (V4SI_type_node,
15342 V4SI_type_node, V2DI_type_node, NULL_TREE);
15343 tree v4si_ftype_v8hi_v8hi
15344 = build_function_type_list (V4SI_type_node,
15345 V8HI_type_node, V8HI_type_node, NULL_TREE);
15346 tree di_ftype_v8qi_v8qi
15347 = build_function_type_list (long_long_unsigned_type_node,
15348 V8QI_type_node, V8QI_type_node, NULL_TREE);
15349 tree di_ftype_v2si_v2si
15350 = build_function_type_list (long_long_unsigned_type_node,
15351 V2SI_type_node, V2SI_type_node, NULL_TREE);
15352 tree v2di_ftype_v16qi_v16qi
15353 = build_function_type_list (V2DI_type_node,
15354 V16QI_type_node, V16QI_type_node, NULL_TREE);
15355 tree v2di_ftype_v4si_v4si
15356 = build_function_type_list (V2DI_type_node,
15357 V4SI_type_node, V4SI_type_node, NULL_TREE);
15358 tree int_ftype_v16qi
15359 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
15360 tree v16qi_ftype_pcchar
15361 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
15362 tree void_ftype_pchar_v16qi
15363 = build_function_type_list (void_type_node,
15364 pchar_type_node, V16QI_type_node, NULL_TREE);
15365
15366 tree float80_type;
15367 tree float128_type;
15368 tree ftype;
15369
15370 /* The __float80 type. */
15371 if (TYPE_MODE (long_double_type_node) == XFmode)
15372 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
15373 "__float80");
15374 else
15375 {
15376 /* The __float80 type. */
15377 float80_type = make_node (REAL_TYPE);
15378 TYPE_PRECISION (float80_type) = 80;
15379 layout_type (float80_type);
15380 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
15381 }
15382
15383 if (TARGET_64BIT)
15384 {
15385 float128_type = make_node (REAL_TYPE);
15386 TYPE_PRECISION (float128_type) = 128;
15387 layout_type (float128_type);
15388 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
15389 }
15390
15391 /* Add all builtins that are more or less simple operations on two
15392 operands. */
15393 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15394 {
15395 /* Use one of the operands; the target can have a different mode for
15396 mask-generating compares. */
15397 enum machine_mode mode;
15398 tree type;
15399
15400 if (d->name == 0)
15401 continue;
15402 mode = insn_data[d->icode].operand[1].mode;
15403
15404 switch (mode)
15405 {
15406 case V16QImode:
15407 type = v16qi_ftype_v16qi_v16qi;
15408 break;
15409 case V8HImode:
15410 type = v8hi_ftype_v8hi_v8hi;
15411 break;
15412 case V4SImode:
15413 type = v4si_ftype_v4si_v4si;
15414 break;
15415 case V2DImode:
15416 type = v2di_ftype_v2di_v2di;
15417 break;
15418 case V2DFmode:
15419 type = v2df_ftype_v2df_v2df;
15420 break;
15421 case V4SFmode:
15422 type = v4sf_ftype_v4sf_v4sf;
15423 break;
15424 case V8QImode:
15425 type = v8qi_ftype_v8qi_v8qi;
15426 break;
15427 case V4HImode:
15428 type = v4hi_ftype_v4hi_v4hi;
15429 break;
15430 case V2SImode:
15431 type = v2si_ftype_v2si_v2si;
15432 break;
15433 case DImode:
15434 type = di_ftype_di_di;
15435 break;
15436
15437 default:
15438 gcc_unreachable ();
15439 }
15440
15441 /* Override for comparisons. */
15442 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
15443 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
15444 type = v4si_ftype_v4sf_v4sf;
15445
15446 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
15447 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
15448 type = v2di_ftype_v2df_v2df;
15449
15450 def_builtin (d->mask, d->name, type, d->code);
15451 }
15452
15453 /* Add all builtins that are more or less simple operations on 1 operand. */
15454 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15455 {
15456 enum machine_mode mode;
15457 tree type;
15458
15459 if (d->name == 0)
15460 continue;
15461 mode = insn_data[d->icode].operand[1].mode;
15462
15463 switch (mode)
15464 {
15465 case V16QImode:
15466 type = v16qi_ftype_v16qi;
15467 break;
15468 case V8HImode:
15469 type = v8hi_ftype_v8hi;
15470 break;
15471 case V4SImode:
15472 type = v4si_ftype_v4si;
15473 break;
15474 case V2DFmode:
15475 type = v2df_ftype_v2df;
15476 break;
15477 case V4SFmode:
15478 type = v4sf_ftype_v4sf;
15479 break;
15480 case V8QImode:
15481 type = v8qi_ftype_v8qi;
15482 break;
15483 case V4HImode:
15484 type = v4hi_ftype_v4hi;
15485 break;
15486 case V2SImode:
15487 type = v2si_ftype_v2si;
15488 break;
15489
15490 default:
15491 abort ();
15492 }
15493
15494 def_builtin (d->mask, d->name, type, d->code);
15495 }
15496
15497 /* Add the remaining MMX insns with somewhat more complicated types. */
15498 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
15499 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
15500 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
15501 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
15502
15503 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
15504 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
15505 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
15506
15507 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
15508 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
15509
15510 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
15511 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
15512
15513 /* comi/ucomi insns. */
15514 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
15515 if (d->mask == MASK_SSE2)
15516 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
15517 else
15518 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
15519
15520 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
15521 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
15522 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
15523
15524 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
15525 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
15526 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
15527 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
15528 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
15529 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
15530 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
15531 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
15532 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
15533 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
15534 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
15535
15536 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
15537
15538 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
15539 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
15540
15541 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
15542 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
15543 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
15544 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
15545
15546 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
15547 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
15548 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
15549 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
15550
15551 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
15552
15553 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
15554
15555 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
15556 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
15557 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
15558 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
15559 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
15560 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
15561
15562 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
15563
15564 /* Original 3DNow! */
15565 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
15566 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
15567 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
15568 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
15569 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
15570 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
15571 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
15572 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
15573 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
15574 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
15575 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
15576 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
15577 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
15578 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
15579 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
15580 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
15581 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
15582 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
15583 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
15584 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
15585
15586 /* 3DNow! extension as used in the Athlon CPU. */
15587 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
15588 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
15589 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
15590 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
15591 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
15592 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
15593
15594 /* SSE2 */
15595 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
15596
15597 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
15598 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
15599
15600 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
15601 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
15602
15603 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
15604 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
15605 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
15606 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
15607 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
15608
15609 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
15610 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
15611 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
15612 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
15613
15614 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
15615 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
15616
15617 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
15618
15619 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
15620 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
15621
15622 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
15623 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
15624 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
15625 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
15626 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
15627
15628 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
15629
15630 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
15631 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
15632 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
15633 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
15634
15635 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
15636 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
15637 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
15638
15639 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
15640 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
15641 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
15642 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
15643
15644 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
15645 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
15646 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
15647
15648 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
15649 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
15650
15651 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
15652 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
15653
15654 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
15655 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
15656 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
15657
15658 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
15659 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
15660 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
15661
15662 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
15663 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
15664
15665 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
15666 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
15667 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
15668 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
15669
15670 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
15671 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
15672 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
15673 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
15674
15675 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
15676 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
15677
15678 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
15679
15680 /* Prescott New Instructions. */
15681 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
15682 void_ftype_pcvoid_unsigned_unsigned,
15683 IX86_BUILTIN_MONITOR);
15684 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
15685 void_ftype_unsigned_unsigned,
15686 IX86_BUILTIN_MWAIT);
15687 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
15688 v4sf_ftype_v4sf,
15689 IX86_BUILTIN_MOVSHDUP);
15690 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
15691 v4sf_ftype_v4sf,
15692 IX86_BUILTIN_MOVSLDUP);
15693 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
15694 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
15695
15696 /* SSSE3. */
15697 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
15698 v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
15699 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
15700 IX86_BUILTIN_PALIGNR);
15701
15702 /* Access to the vec_init patterns. */
15703 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
15704 integer_type_node, NULL_TREE);
15705 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
15706 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
15707
15708 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
15709 short_integer_type_node,
15710 short_integer_type_node,
15711 short_integer_type_node, NULL_TREE);
15712 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
15713 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
15714
15715 ftype = build_function_type_list (V8QI_type_node, char_type_node,
15716 char_type_node, char_type_node,
15717 char_type_node, char_type_node,
15718 char_type_node, char_type_node,
15719 char_type_node, NULL_TREE);
15720 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
15721 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
15722
15723 /* Access to the vec_extract patterns. */
15724 ftype = build_function_type_list (double_type_node, V2DF_type_node,
15725 integer_type_node, NULL_TREE);
15726 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
15727 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
15728
15729 ftype = build_function_type_list (long_long_integer_type_node,
15730 V2DI_type_node, integer_type_node,
15731 NULL_TREE);
15732 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
15733 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
15734
15735 ftype = build_function_type_list (float_type_node, V4SF_type_node,
15736 integer_type_node, NULL_TREE);
15737 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
15738 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
15739
15740 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
15741 integer_type_node, NULL_TREE);
15742 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
15743 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
15744
15745 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
15746 integer_type_node, NULL_TREE);
15747 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
15748 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
15749
15750 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
15751 integer_type_node, NULL_TREE);
15752 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
15753 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
15754
15755 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
15756 integer_type_node, NULL_TREE);
15757 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
15758 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
15759
15760 /* Access to the vec_set patterns. */
15761 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
15762 intHI_type_node,
15763 integer_type_node, NULL_TREE);
15764 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
15765 ftype, IX86_BUILTIN_VEC_SET_V8HI);
15766
15767 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
15768 intHI_type_node,
15769 integer_type_node, NULL_TREE);
15770 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
15771 ftype, IX86_BUILTIN_VEC_SET_V4HI);
15772 }
15773
15774 /* Errors in the source file can cause expand_expr to return const0_rtx
15775 where we expect a vector. To avoid crashing, use one of the vector
15776 clear instructions. */
15777 static rtx
15778 safe_vector_operand (rtx x, enum machine_mode mode)
15779 {
15780 if (x == const0_rtx)
15781 x = CONST0_RTX (mode);
15782 return x;
15783 }
15784
15785 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
15786
15787 static rtx
15788 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
15789 {
15790 rtx pat, xops[3];
15791 tree arg0 = TREE_VALUE (arglist);
15792 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15793 rtx op0 = expand_normal (arg0);
15794 rtx op1 = expand_normal (arg1);
15795 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15796 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15797 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
15798
15799 if (VECTOR_MODE_P (mode0))
15800 op0 = safe_vector_operand (op0, mode0);
15801 if (VECTOR_MODE_P (mode1))
15802 op1 = safe_vector_operand (op1, mode1);
15803
15804 if (optimize || !target
15805 || GET_MODE (target) != tmode
15806 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15807 target = gen_reg_rtx (tmode);
15808
15809 if (GET_MODE (op1) == SImode && mode1 == TImode)
15810 {
15811 rtx x = gen_reg_rtx (V4SImode);
15812 emit_insn (gen_sse2_loadd (x, op1));
15813 op1 = gen_lowpart (TImode, x);
15814 }
15815
15816 /* The insn must want input operands in the same modes as the
15817 result. */
15818 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
15819 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
15820
15821 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
15822 op0 = copy_to_mode_reg (mode0, op0);
15823 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
15824 op1 = copy_to_mode_reg (mode1, op1);
15825
15826 /* ??? Using ix86_fixup_binary_operands is problematic when
15827 we've got mismatched modes. Fake it. */
15828
15829 xops[0] = target;
15830 xops[1] = op0;
15831 xops[2] = op1;
15832
15833 if (tmode == mode0 && tmode == mode1)
15834 {
15835 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
15836 op0 = xops[1];
15837 op1 = xops[2];
15838 }
15839 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
15840 {
15841 op0 = force_reg (mode0, op0);
15842 op1 = force_reg (mode1, op1);
15843 target = gen_reg_rtx (tmode);
15844 }
15845
15846 pat = GEN_FCN (icode) (target, op0, op1);
15847 if (! pat)
15848 return 0;
15849 emit_insn (pat);
15850 return target;
15851 }
15852
15853 /* Subroutine of ix86_expand_builtin to take care of stores. */
15854
15855 static rtx
15856 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
15857 {
15858 rtx pat;
15859 tree arg0 = TREE_VALUE (arglist);
15860 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15861 rtx op0 = expand_normal (arg0);
15862 rtx op1 = expand_normal (arg1);
15863 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
15864 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
15865
15866 if (VECTOR_MODE_P (mode1))
15867 op1 = safe_vector_operand (op1, mode1);
15868
15869 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15870 op1 = copy_to_mode_reg (mode1, op1);
15871
15872 pat = GEN_FCN (icode) (op0, op1);
15873 if (pat)
15874 emit_insn (pat);
15875 return 0;
15876 }
15877
15878 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
15879
15880 static rtx
15881 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
15882 rtx target, int do_load)
15883 {
15884 rtx pat;
15885 tree arg0 = TREE_VALUE (arglist);
15886 rtx op0 = expand_normal (arg0);
15887 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15888 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15889
15890 if (optimize || !target
15891 || GET_MODE (target) != tmode
15892 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15893 target = gen_reg_rtx (tmode);
15894 if (do_load)
15895 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15896 else
15897 {
15898 if (VECTOR_MODE_P (mode0))
15899 op0 = safe_vector_operand (op0, mode0);
15900
15901 if ((optimize && !register_operand (op0, mode0))
15902 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15903 op0 = copy_to_mode_reg (mode0, op0);
15904 }
15905
15906 pat = GEN_FCN (icode) (target, op0);
15907 if (! pat)
15908 return 0;
15909 emit_insn (pat);
15910 return target;
15911 }
15912
15913 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
15914 sqrtss, rsqrtss, rcpss. */
15915
15916 static rtx
15917 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
15918 {
15919 rtx pat;
15920 tree arg0 = TREE_VALUE (arglist);
15921 rtx op1, op0 = expand_normal (arg0);
15922 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15923 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15924
15925 if (optimize || !target
15926 || GET_MODE (target) != tmode
15927 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15928 target = gen_reg_rtx (tmode);
15929
15930 if (VECTOR_MODE_P (mode0))
15931 op0 = safe_vector_operand (op0, mode0);
15932
15933 if ((optimize && !register_operand (op0, mode0))
15934 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15935 op0 = copy_to_mode_reg (mode0, op0);
15936
15937 op1 = op0;
15938 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
15939 op1 = copy_to_mode_reg (mode0, op1);
15940
15941 pat = GEN_FCN (icode) (target, op0, op1);
15942 if (! pat)
15943 return 0;
15944 emit_insn (pat);
15945 return target;
15946 }
15947
15948 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
15949
15950 static rtx
15951 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
15952 rtx target)
15953 {
15954 rtx pat;
15955 tree arg0 = TREE_VALUE (arglist);
15956 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15957 rtx op0 = expand_normal (arg0);
15958 rtx op1 = expand_normal (arg1);
15959 rtx op2;
15960 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
15961 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
15962 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
15963 enum rtx_code comparison = d->comparison;
15964
15965 if (VECTOR_MODE_P (mode0))
15966 op0 = safe_vector_operand (op0, mode0);
15967 if (VECTOR_MODE_P (mode1))
15968 op1 = safe_vector_operand (op1, mode1);
15969
15970 /* Swap operands if we have a comparison that isn't available in
15971 hardware. */
15972 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15973 {
15974 rtx tmp = gen_reg_rtx (mode1);
15975 emit_move_insn (tmp, op1);
15976 op1 = op0;
15977 op0 = tmp;
15978 }
15979
15980 if (optimize || !target
15981 || GET_MODE (target) != tmode
15982 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
15983 target = gen_reg_rtx (tmode);
15984
15985 if ((optimize && !register_operand (op0, mode0))
15986 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
15987 op0 = copy_to_mode_reg (mode0, op0);
15988 if ((optimize && !register_operand (op1, mode1))
15989 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
15990 op1 = copy_to_mode_reg (mode1, op1);
15991
15992 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15993 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
15994 if (! pat)
15995 return 0;
15996 emit_insn (pat);
15997 return target;
15998 }
15999
16000 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
16001
16002 static rtx
16003 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
16004 rtx target)
16005 {
16006 rtx pat;
16007 tree arg0 = TREE_VALUE (arglist);
16008 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16009 rtx op0 = expand_normal (arg0);
16010 rtx op1 = expand_normal (arg1);
16011 rtx op2;
16012 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
16013 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
16014 enum rtx_code comparison = d->comparison;
16015
16016 if (VECTOR_MODE_P (mode0))
16017 op0 = safe_vector_operand (op0, mode0);
16018 if (VECTOR_MODE_P (mode1))
16019 op1 = safe_vector_operand (op1, mode1);
16020
16021 /* Swap operands if we have a comparison that isn't available in
16022 hardware. */
16023 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
16024 {
16025 rtx tmp = op1;
16026 op1 = op0;
16027 op0 = tmp;
16028 }
16029
16030 target = gen_reg_rtx (SImode);
16031 emit_move_insn (target, const0_rtx);
16032 target = gen_rtx_SUBREG (QImode, target, 0);
16033
16034 if ((optimize && !register_operand (op0, mode0))
16035 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
16036 op0 = copy_to_mode_reg (mode0, op0);
16037 if ((optimize && !register_operand (op1, mode1))
16038 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
16039 op1 = copy_to_mode_reg (mode1, op1);
16040
16041 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
16042 pat = GEN_FCN (d->icode) (op0, op1);
16043 if (! pat)
16044 return 0;
16045 emit_insn (pat);
16046 emit_insn (gen_rtx_SET (VOIDmode,
16047 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
16048 gen_rtx_fmt_ee (comparison, QImode,
16049 SET_DEST (pat),
16050 const0_rtx)));
16051
16052 return SUBREG_REG (target);
16053 }
16054
16055 /* Return the integer constant in ARG. Constrain it to be in the range
16056 of the subparts of VEC_TYPE; issue an error if not. */
16057
16058 static int
16059 get_element_number (tree vec_type, tree arg)
16060 {
16061 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
16062
16063 if (!host_integerp (arg, 1)
16064 || (elt = tree_low_cst (arg, 1), elt > max))
16065 {
16066 error ("selector must be an integer constant in the range 0..%wi", max);
16067 return 0;
16068 }
16069
16070 return elt;
16071 }
16072
16073 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16074 ix86_expand_vector_init. We DO have language-level syntax for this, in
16075 the form of (type){ init-list }. Except that since we can't place emms
16076 instructions from inside the compiler, we can't allow the use of MMX
16077 registers unless the user explicitly asks for it. So we do *not* define
16078 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
16079 we have builtins invoked by mmintrin.h that gives us license to emit
16080 these sorts of instructions. */
16081
16082 static rtx
16083 ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
16084 {
16085 enum machine_mode tmode = TYPE_MODE (type);
16086 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
16087 int i, n_elt = GET_MODE_NUNITS (tmode);
16088 rtvec v = rtvec_alloc (n_elt);
16089
16090 gcc_assert (VECTOR_MODE_P (tmode));
16091
16092 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
16093 {
16094 rtx x = expand_normal (TREE_VALUE (arglist));
16095 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
16096 }
16097
16098 gcc_assert (arglist == NULL);
16099
16100 if (!target || !register_operand (target, tmode))
16101 target = gen_reg_rtx (tmode);
16102
16103 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
16104 return target;
16105 }
16106
16107 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16108 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
16109 had a language-level syntax for referencing vector elements. */
16110
16111 static rtx
16112 ix86_expand_vec_ext_builtin (tree arglist, rtx target)
16113 {
16114 enum machine_mode tmode, mode0;
16115 tree arg0, arg1;
16116 int elt;
16117 rtx op0;
16118
16119 arg0 = TREE_VALUE (arglist);
16120 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16121
16122 op0 = expand_normal (arg0);
16123 elt = get_element_number (TREE_TYPE (arg0), arg1);
16124
16125 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16126 mode0 = TYPE_MODE (TREE_TYPE (arg0));
16127 gcc_assert (VECTOR_MODE_P (mode0));
16128
16129 op0 = force_reg (mode0, op0);
16130
16131 if (optimize || !target || !register_operand (target, tmode))
16132 target = gen_reg_rtx (tmode);
16133
16134 ix86_expand_vector_extract (true, target, op0, elt);
16135
16136 return target;
16137 }
16138
16139 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16140 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
16141 a language-level syntax for referencing vector elements. */
16142
16143 static rtx
16144 ix86_expand_vec_set_builtin (tree arglist)
16145 {
16146 enum machine_mode tmode, mode1;
16147 tree arg0, arg1, arg2;
16148 int elt;
16149 rtx op0, op1;
16150
16151 arg0 = TREE_VALUE (arglist);
16152 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16153 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16154
16155 tmode = TYPE_MODE (TREE_TYPE (arg0));
16156 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16157 gcc_assert (VECTOR_MODE_P (tmode));
16158
16159 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
16160 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
16161 elt = get_element_number (TREE_TYPE (arg0), arg2);
16162
16163 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
16164 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
16165
16166 op0 = force_reg (tmode, op0);
16167 op1 = force_reg (mode1, op1);
16168
16169 ix86_expand_vector_set (true, op0, op1, elt);
16170
16171 return op0;
16172 }
16173
16174 /* Expand an expression EXP that calls a built-in function,
16175 with result going to TARGET if that's convenient
16176 (and in mode MODE if that's convenient).
16177 SUBTARGET may be used as the target for computing one of EXP's operands.
16178 IGNORE is nonzero if the value is to be ignored. */
16179
16180 static rtx
16181 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
16182 enum machine_mode mode ATTRIBUTE_UNUSED,
16183 int ignore ATTRIBUTE_UNUSED)
16184 {
16185 const struct builtin_description *d;
16186 size_t i;
16187 enum insn_code icode;
16188 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
16189 tree arglist = TREE_OPERAND (exp, 1);
16190 tree arg0, arg1, arg2;
16191 rtx op0, op1, op2, pat;
16192 enum machine_mode tmode, mode0, mode1, mode2, mode3;
16193 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
16194
16195 switch (fcode)
16196 {
16197 case IX86_BUILTIN_EMMS:
16198 emit_insn (gen_mmx_emms ());
16199 return 0;
16200
16201 case IX86_BUILTIN_SFENCE:
16202 emit_insn (gen_sse_sfence ());
16203 return 0;
16204
16205 case IX86_BUILTIN_MASKMOVQ:
16206 case IX86_BUILTIN_MASKMOVDQU:
16207 icode = (fcode == IX86_BUILTIN_MASKMOVQ
16208 ? CODE_FOR_mmx_maskmovq
16209 : CODE_FOR_sse2_maskmovdqu);
16210 /* Note the arg order is different from the operand order. */
16211 arg1 = TREE_VALUE (arglist);
16212 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
16213 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16214 op0 = expand_normal (arg0);
16215 op1 = expand_normal (arg1);
16216 op2 = expand_normal (arg2);
16217 mode0 = insn_data[icode].operand[0].mode;
16218 mode1 = insn_data[icode].operand[1].mode;
16219 mode2 = insn_data[icode].operand[2].mode;
16220
16221 op0 = force_reg (Pmode, op0);
16222 op0 = gen_rtx_MEM (mode1, op0);
16223
16224 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16225 op0 = copy_to_mode_reg (mode0, op0);
16226 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
16227 op1 = copy_to_mode_reg (mode1, op1);
16228 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
16229 op2 = copy_to_mode_reg (mode2, op2);
16230 pat = GEN_FCN (icode) (op0, op1, op2);
16231 if (! pat)
16232 return 0;
16233 emit_insn (pat);
16234 return 0;
16235
16236 case IX86_BUILTIN_SQRTSS:
16237 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
16238 case IX86_BUILTIN_RSQRTSS:
16239 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
16240 case IX86_BUILTIN_RCPSS:
16241 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
16242
16243 case IX86_BUILTIN_LOADUPS:
16244 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
16245
16246 case IX86_BUILTIN_STOREUPS:
16247 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
16248
16249 case IX86_BUILTIN_LOADHPS:
16250 case IX86_BUILTIN_LOADLPS:
16251 case IX86_BUILTIN_LOADHPD:
16252 case IX86_BUILTIN_LOADLPD:
16253 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
16254 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
16255 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
16256 : CODE_FOR_sse2_loadlpd);
16257 arg0 = TREE_VALUE (arglist);
16258 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16259 op0 = expand_normal (arg0);
16260 op1 = expand_normal (arg1);
16261 tmode = insn_data[icode].operand[0].mode;
16262 mode0 = insn_data[icode].operand[1].mode;
16263 mode1 = insn_data[icode].operand[2].mode;
16264
16265 op0 = force_reg (mode0, op0);
16266 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
16267 if (optimize || target == 0
16268 || GET_MODE (target) != tmode
16269 || !register_operand (target, tmode))
16270 target = gen_reg_rtx (tmode);
16271 pat = GEN_FCN (icode) (target, op0, op1);
16272 if (! pat)
16273 return 0;
16274 emit_insn (pat);
16275 return target;
16276
16277 case IX86_BUILTIN_STOREHPS:
16278 case IX86_BUILTIN_STORELPS:
16279 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
16280 : CODE_FOR_sse_storelps);
16281 arg0 = TREE_VALUE (arglist);
16282 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16283 op0 = expand_normal (arg0);
16284 op1 = expand_normal (arg1);
16285 mode0 = insn_data[icode].operand[0].mode;
16286 mode1 = insn_data[icode].operand[1].mode;
16287
16288 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16289 op1 = force_reg (mode1, op1);
16290
16291 pat = GEN_FCN (icode) (op0, op1);
16292 if (! pat)
16293 return 0;
16294 emit_insn (pat);
16295 return const0_rtx;
16296
16297 case IX86_BUILTIN_MOVNTPS:
16298 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
16299 case IX86_BUILTIN_MOVNTQ:
16300 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
16301
16302 case IX86_BUILTIN_LDMXCSR:
16303 op0 = expand_normal (TREE_VALUE (arglist));
16304 target = assign_386_stack_local (SImode, SLOT_TEMP);
16305 emit_move_insn (target, op0);
16306 emit_insn (gen_sse_ldmxcsr (target));
16307 return 0;
16308
16309 case IX86_BUILTIN_STMXCSR:
16310 target = assign_386_stack_local (SImode, SLOT_TEMP);
16311 emit_insn (gen_sse_stmxcsr (target));
16312 return copy_to_mode_reg (SImode, target);
16313
16314 case IX86_BUILTIN_SHUFPS:
16315 case IX86_BUILTIN_SHUFPD:
16316 icode = (fcode == IX86_BUILTIN_SHUFPS
16317 ? CODE_FOR_sse_shufps
16318 : CODE_FOR_sse2_shufpd);
16319 arg0 = TREE_VALUE (arglist);
16320 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16321 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16322 op0 = expand_normal (arg0);
16323 op1 = expand_normal (arg1);
16324 op2 = expand_normal (arg2);
16325 tmode = insn_data[icode].operand[0].mode;
16326 mode0 = insn_data[icode].operand[1].mode;
16327 mode1 = insn_data[icode].operand[2].mode;
16328 mode2 = insn_data[icode].operand[3].mode;
16329
16330 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16331 op0 = copy_to_mode_reg (mode0, op0);
16332 if ((optimize && !register_operand (op1, mode1))
16333 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
16334 op1 = copy_to_mode_reg (mode1, op1);
16335 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16336 {
16337 /* @@@ better error message */
16338 error ("mask must be an immediate");
16339 return gen_reg_rtx (tmode);
16340 }
16341 if (optimize || target == 0
16342 || GET_MODE (target) != tmode
16343 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16344 target = gen_reg_rtx (tmode);
16345 pat = GEN_FCN (icode) (target, op0, op1, op2);
16346 if (! pat)
16347 return 0;
16348 emit_insn (pat);
16349 return target;
16350
16351 case IX86_BUILTIN_PSHUFW:
16352 case IX86_BUILTIN_PSHUFD:
16353 case IX86_BUILTIN_PSHUFHW:
16354 case IX86_BUILTIN_PSHUFLW:
16355 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
16356 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
16357 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
16358 : CODE_FOR_mmx_pshufw);
16359 arg0 = TREE_VALUE (arglist);
16360 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16361 op0 = expand_normal (arg0);
16362 op1 = expand_normal (arg1);
16363 tmode = insn_data[icode].operand[0].mode;
16364 mode1 = insn_data[icode].operand[1].mode;
16365 mode2 = insn_data[icode].operand[2].mode;
16366
16367 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16368 op0 = copy_to_mode_reg (mode1, op0);
16369 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16370 {
16371 /* @@@ better error message */
16372 error ("mask must be an immediate");
16373 return const0_rtx;
16374 }
16375 if (target == 0
16376 || GET_MODE (target) != tmode
16377 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16378 target = gen_reg_rtx (tmode);
16379 pat = GEN_FCN (icode) (target, op0, op1);
16380 if (! pat)
16381 return 0;
16382 emit_insn (pat);
16383 return target;
16384
16385 case IX86_BUILTIN_PSLLDQI128:
16386 case IX86_BUILTIN_PSRLDQI128:
16387 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
16388 : CODE_FOR_sse2_lshrti3);
16389 arg0 = TREE_VALUE (arglist);
16390 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16391 op0 = expand_normal (arg0);
16392 op1 = expand_normal (arg1);
16393 tmode = insn_data[icode].operand[0].mode;
16394 mode1 = insn_data[icode].operand[1].mode;
16395 mode2 = insn_data[icode].operand[2].mode;
16396
16397 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16398 {
16399 op0 = copy_to_reg (op0);
16400 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16401 }
16402 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16403 {
16404 error ("shift must be an immediate");
16405 return const0_rtx;
16406 }
16407 target = gen_reg_rtx (V2DImode);
16408 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
16409 if (! pat)
16410 return 0;
16411 emit_insn (pat);
16412 return target;
16413
16414 case IX86_BUILTIN_FEMMS:
16415 emit_insn (gen_mmx_femms ());
16416 return NULL_RTX;
16417
16418 case IX86_BUILTIN_PAVGUSB:
16419 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
16420
16421 case IX86_BUILTIN_PF2ID:
16422 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
16423
16424 case IX86_BUILTIN_PFACC:
16425 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
16426
16427 case IX86_BUILTIN_PFADD:
16428 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
16429
16430 case IX86_BUILTIN_PFCMPEQ:
16431 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
16432
16433 case IX86_BUILTIN_PFCMPGE:
16434 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
16435
16436 case IX86_BUILTIN_PFCMPGT:
16437 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
16438
16439 case IX86_BUILTIN_PFMAX:
16440 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
16441
16442 case IX86_BUILTIN_PFMIN:
16443 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
16444
16445 case IX86_BUILTIN_PFMUL:
16446 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
16447
16448 case IX86_BUILTIN_PFRCP:
16449 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
16450
16451 case IX86_BUILTIN_PFRCPIT1:
16452 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
16453
16454 case IX86_BUILTIN_PFRCPIT2:
16455 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
16456
16457 case IX86_BUILTIN_PFRSQIT1:
16458 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
16459
16460 case IX86_BUILTIN_PFRSQRT:
16461 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
16462
16463 case IX86_BUILTIN_PFSUB:
16464 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
16465
16466 case IX86_BUILTIN_PFSUBR:
16467 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
16468
16469 case IX86_BUILTIN_PI2FD:
16470 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
16471
16472 case IX86_BUILTIN_PMULHRW:
16473 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
16474
16475 case IX86_BUILTIN_PF2IW:
16476 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
16477
16478 case IX86_BUILTIN_PFNACC:
16479 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
16480
16481 case IX86_BUILTIN_PFPNACC:
16482 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
16483
16484 case IX86_BUILTIN_PI2FW:
16485 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
16486
16487 case IX86_BUILTIN_PSWAPDSI:
16488 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
16489
16490 case IX86_BUILTIN_PSWAPDSF:
16491 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
16492
16493 case IX86_BUILTIN_SQRTSD:
16494 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
16495 case IX86_BUILTIN_LOADUPD:
16496 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
16497 case IX86_BUILTIN_STOREUPD:
16498 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
16499
16500 case IX86_BUILTIN_MFENCE:
16501 emit_insn (gen_sse2_mfence ());
16502 return 0;
16503 case IX86_BUILTIN_LFENCE:
16504 emit_insn (gen_sse2_lfence ());
16505 return 0;
16506
16507 case IX86_BUILTIN_CLFLUSH:
16508 arg0 = TREE_VALUE (arglist);
16509 op0 = expand_normal (arg0);
16510 icode = CODE_FOR_sse2_clflush;
16511 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
16512 op0 = copy_to_mode_reg (Pmode, op0);
16513
16514 emit_insn (gen_sse2_clflush (op0));
16515 return 0;
16516
16517 case IX86_BUILTIN_MOVNTPD:
16518 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
16519 case IX86_BUILTIN_MOVNTDQ:
16520 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
16521 case IX86_BUILTIN_MOVNTI:
16522 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
16523
16524 case IX86_BUILTIN_LOADDQU:
16525 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
16526 case IX86_BUILTIN_STOREDQU:
16527 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
16528
16529 case IX86_BUILTIN_MONITOR:
16530 arg0 = TREE_VALUE (arglist);
16531 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16532 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16533 op0 = expand_normal (arg0);
16534 op1 = expand_normal (arg1);
16535 op2 = expand_normal (arg2);
16536 if (!REG_P (op0))
16537 op0 = copy_to_mode_reg (Pmode, op0);
16538 if (!REG_P (op1))
16539 op1 = copy_to_mode_reg (SImode, op1);
16540 if (!REG_P (op2))
16541 op2 = copy_to_mode_reg (SImode, op2);
16542 if (!TARGET_64BIT)
16543 emit_insn (gen_sse3_monitor (op0, op1, op2));
16544 else
16545 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
16546 return 0;
16547
16548 case IX86_BUILTIN_MWAIT:
16549 arg0 = TREE_VALUE (arglist);
16550 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16551 op0 = expand_normal (arg0);
16552 op1 = expand_normal (arg1);
16553 if (!REG_P (op0))
16554 op0 = copy_to_mode_reg (SImode, op0);
16555 if (!REG_P (op1))
16556 op1 = copy_to_mode_reg (SImode, op1);
16557 emit_insn (gen_sse3_mwait (op0, op1));
16558 return 0;
16559
16560 case IX86_BUILTIN_LDDQU:
16561 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
16562 target, 1);
16563
16564 case IX86_BUILTIN_PALIGNR:
16565 case IX86_BUILTIN_PALIGNR128:
16566 if (fcode == IX86_BUILTIN_PALIGNR)
16567 {
16568 icode = CODE_FOR_ssse3_palignrdi;
16569 mode = DImode;
16570 }
16571 else
16572 {
16573 icode = CODE_FOR_ssse3_palignrti;
16574 mode = V2DImode;
16575 }
16576 arg0 = TREE_VALUE (arglist);
16577 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16578 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16579 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
16580 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
16581 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
16582 tmode = insn_data[icode].operand[0].mode;
16583 mode1 = insn_data[icode].operand[1].mode;
16584 mode2 = insn_data[icode].operand[2].mode;
16585 mode3 = insn_data[icode].operand[3].mode;
16586
16587 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16588 {
16589 op0 = copy_to_reg (op0);
16590 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16591 }
16592 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16593 {
16594 op1 = copy_to_reg (op1);
16595 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
16596 }
16597 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
16598 {
16599 error ("shift must be an immediate");
16600 return const0_rtx;
16601 }
16602 target = gen_reg_rtx (mode);
16603 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
16604 op0, op1, op2);
16605 if (! pat)
16606 return 0;
16607 emit_insn (pat);
16608 return target;
16609
16610 case IX86_BUILTIN_VEC_INIT_V2SI:
16611 case IX86_BUILTIN_VEC_INIT_V4HI:
16612 case IX86_BUILTIN_VEC_INIT_V8QI:
16613 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
16614
16615 case IX86_BUILTIN_VEC_EXT_V2DF:
16616 case IX86_BUILTIN_VEC_EXT_V2DI:
16617 case IX86_BUILTIN_VEC_EXT_V4SF:
16618 case IX86_BUILTIN_VEC_EXT_V4SI:
16619 case IX86_BUILTIN_VEC_EXT_V8HI:
16620 case IX86_BUILTIN_VEC_EXT_V2SI:
16621 case IX86_BUILTIN_VEC_EXT_V4HI:
16622 return ix86_expand_vec_ext_builtin (arglist, target);
16623
16624 case IX86_BUILTIN_VEC_SET_V8HI:
16625 case IX86_BUILTIN_VEC_SET_V4HI:
16626 return ix86_expand_vec_set_builtin (arglist);
16627
16628 default:
16629 break;
16630 }
16631
16632 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16633 if (d->code == fcode)
16634 {
16635 /* Compares are treated specially. */
16636 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
16637 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
16638 || d->icode == CODE_FOR_sse2_maskcmpv2df3
16639 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
16640 return ix86_expand_sse_compare (d, arglist, target);
16641
16642 return ix86_expand_binop_builtin (d->icode, arglist, target);
16643 }
16644
16645 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16646 if (d->code == fcode)
16647 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
16648
16649 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
16650 if (d->code == fcode)
16651 return ix86_expand_sse_comi (d, arglist, target);
16652
16653 gcc_unreachable ();
16654 }
16655
16656 /* Store OPERAND to the memory after reload is completed. This means
16657 that we can't easily use assign_stack_local. */
16658 rtx
16659 ix86_force_to_memory (enum machine_mode mode, rtx operand)
16660 {
16661 rtx result;
16662
16663 gcc_assert (reload_completed);
16664 if (TARGET_RED_ZONE)
16665 {
16666 result = gen_rtx_MEM (mode,
16667 gen_rtx_PLUS (Pmode,
16668 stack_pointer_rtx,
16669 GEN_INT (-RED_ZONE_SIZE)));
16670 emit_move_insn (result, operand);
16671 }
16672 else if (!TARGET_RED_ZONE && TARGET_64BIT)
16673 {
16674 switch (mode)
16675 {
16676 case HImode:
16677 case SImode:
16678 operand = gen_lowpart (DImode, operand);
16679 /* FALLTHRU */
16680 case DImode:
16681 emit_insn (
16682 gen_rtx_SET (VOIDmode,
16683 gen_rtx_MEM (DImode,
16684 gen_rtx_PRE_DEC (DImode,
16685 stack_pointer_rtx)),
16686 operand));
16687 break;
16688 default:
16689 gcc_unreachable ();
16690 }
16691 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16692 }
16693 else
16694 {
16695 switch (mode)
16696 {
16697 case DImode:
16698 {
16699 rtx operands[2];
16700 split_di (&operand, 1, operands, operands + 1);
16701 emit_insn (
16702 gen_rtx_SET (VOIDmode,
16703 gen_rtx_MEM (SImode,
16704 gen_rtx_PRE_DEC (Pmode,
16705 stack_pointer_rtx)),
16706 operands[1]));
16707 emit_insn (
16708 gen_rtx_SET (VOIDmode,
16709 gen_rtx_MEM (SImode,
16710 gen_rtx_PRE_DEC (Pmode,
16711 stack_pointer_rtx)),
16712 operands[0]));
16713 }
16714 break;
16715 case HImode:
16716 /* Store HImodes as SImodes. */
16717 operand = gen_lowpart (SImode, operand);
16718 /* FALLTHRU */
16719 case SImode:
16720 emit_insn (
16721 gen_rtx_SET (VOIDmode,
16722 gen_rtx_MEM (GET_MODE (operand),
16723 gen_rtx_PRE_DEC (SImode,
16724 stack_pointer_rtx)),
16725 operand));
16726 break;
16727 default:
16728 gcc_unreachable ();
16729 }
16730 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16731 }
16732 return result;
16733 }
16734
16735 /* Free operand from the memory. */
16736 void
16737 ix86_free_from_memory (enum machine_mode mode)
16738 {
16739 if (!TARGET_RED_ZONE)
16740 {
16741 int size;
16742
16743 if (mode == DImode || TARGET_64BIT)
16744 size = 8;
16745 else
16746 size = 4;
16747 /* Use LEA to deallocate stack space. In peephole2 it will be converted
16748 to pop or add instruction if registers are available. */
16749 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16750 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
16751 GEN_INT (size))));
16752 }
16753 }
16754
16755 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
16756 QImode must go into class Q_REGS.
16757 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
16758 movdf to do mem-to-mem moves through integer regs. */
16759 enum reg_class
16760 ix86_preferred_reload_class (rtx x, enum reg_class class)
16761 {
16762 enum machine_mode mode = GET_MODE (x);
16763
16764 /* We're only allowed to return a subclass of CLASS. Many of the
16765 following checks fail for NO_REGS, so eliminate that early. */
16766 if (class == NO_REGS)
16767 return NO_REGS;
16768
16769 /* All classes can load zeros. */
16770 if (x == CONST0_RTX (mode))
16771 return class;
16772
16773 /* Force constants into memory if we are loading a (nonzero) constant into
16774 an MMX or SSE register. This is because there are no MMX/SSE instructions
16775 to load from a constant. */
16776 if (CONSTANT_P (x)
16777 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
16778 return NO_REGS;
16779
16780 /* Prefer SSE regs only, if we can use them for math. */
16781 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
16782 return SSE_CLASS_P (class) ? class : NO_REGS;
16783
16784 /* Floating-point constants need more complex checks. */
16785 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
16786 {
16787 /* General regs can load everything. */
16788 if (reg_class_subset_p (class, GENERAL_REGS))
16789 return class;
16790
16791 /* Floats can load 0 and 1 plus some others. Note that we eliminated
16792 zero above. We only want to wind up preferring 80387 registers if
16793 we plan on doing computation with them. */
16794 if (TARGET_80387
16795 && standard_80387_constant_p (x))
16796 {
16797 /* Limit class to non-sse. */
16798 if (class == FLOAT_SSE_REGS)
16799 return FLOAT_REGS;
16800 if (class == FP_TOP_SSE_REGS)
16801 return FP_TOP_REG;
16802 if (class == FP_SECOND_SSE_REGS)
16803 return FP_SECOND_REG;
16804 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
16805 return class;
16806 }
16807
16808 return NO_REGS;
16809 }
16810
16811 /* Generally when we see PLUS here, it's the function invariant
16812 (plus soft-fp const_int). Which can only be computed into general
16813 regs. */
16814 if (GET_CODE (x) == PLUS)
16815 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
16816
16817 /* QImode constants are easy to load, but non-constant QImode data
16818 must go into Q_REGS. */
16819 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
16820 {
16821 if (reg_class_subset_p (class, Q_REGS))
16822 return class;
16823 if (reg_class_subset_p (Q_REGS, class))
16824 return Q_REGS;
16825 return NO_REGS;
16826 }
16827
16828 return class;
16829 }
16830
16831 /* Discourage putting floating-point values in SSE registers unless
16832 SSE math is being used, and likewise for the 387 registers. */
16833 enum reg_class
16834 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
16835 {
16836 enum machine_mode mode = GET_MODE (x);
16837
16838 /* Restrict the output reload class to the register bank that we are doing
16839 math on. If we would like not to return a subset of CLASS, reject this
16840 alternative: if reload cannot do this, it will still use its choice. */
16841 mode = GET_MODE (x);
16842 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
16843 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
16844
16845 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
16846 {
16847 if (class == FP_TOP_SSE_REGS)
16848 return FP_TOP_REG;
16849 else if (class == FP_SECOND_SSE_REGS)
16850 return FP_SECOND_REG;
16851 else
16852 return FLOAT_CLASS_P (class) ? class : NO_REGS;
16853 }
16854
16855 return class;
16856 }
16857
16858 /* If we are copying between general and FP registers, we need a memory
16859 location. The same is true for SSE and MMX registers.
16860
16861 The macro can't work reliably when one of the CLASSES is class containing
16862 registers from multiple units (SSE, MMX, integer). We avoid this by never
16863 combining those units in single alternative in the machine description.
16864 Ensure that this constraint holds to avoid unexpected surprises.
16865
16866 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
16867 enforce these sanity checks. */
16868
16869 int
16870 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
16871 enum machine_mode mode, int strict)
16872 {
16873 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
16874 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
16875 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
16876 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
16877 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
16878 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
16879 {
16880 gcc_assert (!strict);
16881 return true;
16882 }
16883
16884 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
16885 return true;
16886
16887 /* ??? This is a lie. We do have moves between mmx/general, and for
16888 mmx/sse2. But by saying we need secondary memory we discourage the
16889 register allocator from using the mmx registers unless needed. */
16890 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
16891 return true;
16892
16893 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16894 {
16895 /* SSE1 doesn't have any direct moves from other classes. */
16896 if (!TARGET_SSE2)
16897 return true;
16898
16899 /* If the target says that inter-unit moves are more expensive
16900 than moving through memory, then don't generate them. */
16901 if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
16902 return true;
16903
16904 /* Between SSE and general, we have moves no larger than word size. */
16905 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
16906 return true;
16907
16908 /* ??? For the cost of one register reformat penalty, we could use
16909 the same instructions to move SFmode and DFmode data, but the
16910 relevant move patterns don't support those alternatives. */
16911 if (mode == SFmode || mode == DFmode)
16912 return true;
16913 }
16914
16915 return false;
16916 }
16917
16918 /* Return true if the registers in CLASS cannot represent the change from
16919 modes FROM to TO. */
16920
16921 bool
16922 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
16923 enum reg_class class)
16924 {
16925 if (from == to)
16926 return false;
16927
16928 /* x87 registers can't do subreg at all, as all values are reformatted
16929 to extended precision. */
16930 if (MAYBE_FLOAT_CLASS_P (class))
16931 return true;
16932
16933 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
16934 {
16935 /* Vector registers do not support QI or HImode loads. If we don't
16936 disallow a change to these modes, reload will assume it's ok to
16937 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
16938 the vec_dupv4hi pattern. */
16939 if (GET_MODE_SIZE (from) < 4)
16940 return true;
16941
16942 /* Vector registers do not support subreg with nonzero offsets, which
16943 are otherwise valid for integer registers. Since we can't see
16944 whether we have a nonzero offset from here, prohibit all
16945 nonparadoxical subregs changing size. */
16946 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
16947 return true;
16948 }
16949
16950 return false;
16951 }
16952
16953 /* Return the cost of moving data from a register in class CLASS1 to
16954 one in class CLASS2.
16955
16956 It is not required that the cost always equal 2 when FROM is the same as TO;
16957 on some machines it is expensive to move between registers if they are not
16958 general registers. */
16959
16960 int
16961 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
16962 enum reg_class class2)
16963 {
16964 /* In case we require secondary memory, compute cost of the store followed
16965 by load. In order to avoid bad register allocation choices, we need
16966 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
16967
16968 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
16969 {
16970 int cost = 1;
16971
16972 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
16973 MEMORY_MOVE_COST (mode, class1, 1));
16974 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
16975 MEMORY_MOVE_COST (mode, class2, 1));
16976
16977 /* In case of copying from general_purpose_register we may emit multiple
16978 stores followed by single load causing memory size mismatch stall.
16979 Count this as arbitrarily high cost of 20. */
16980 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
16981 cost += 20;
16982
16983 /* In the case of FP/MMX moves, the registers actually overlap, and we
16984 have to switch modes in order to treat them differently. */
16985 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
16986 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
16987 cost += 20;
16988
16989 return cost;
16990 }
16991
16992 /* Moves between SSE/MMX and integer unit are expensive. */
16993 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
16994 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16995 return ix86_cost->mmxsse_to_integer;
16996 if (MAYBE_FLOAT_CLASS_P (class1))
16997 return ix86_cost->fp_move;
16998 if (MAYBE_SSE_CLASS_P (class1))
16999 return ix86_cost->sse_move;
17000 if (MAYBE_MMX_CLASS_P (class1))
17001 return ix86_cost->mmx_move;
17002 return 2;
17003 }
17004
17005 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
17006
17007 bool
17008 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
17009 {
17010 /* Flags and only flags can only hold CCmode values. */
17011 if (CC_REGNO_P (regno))
17012 return GET_MODE_CLASS (mode) == MODE_CC;
17013 if (GET_MODE_CLASS (mode) == MODE_CC
17014 || GET_MODE_CLASS (mode) == MODE_RANDOM
17015 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
17016 return 0;
17017 if (FP_REGNO_P (regno))
17018 return VALID_FP_MODE_P (mode);
17019 if (SSE_REGNO_P (regno))
17020 {
17021 /* We implement the move patterns for all vector modes into and
17022 out of SSE registers, even when no operation instructions
17023 are available. */
17024 return (VALID_SSE_REG_MODE (mode)
17025 || VALID_SSE2_REG_MODE (mode)
17026 || VALID_MMX_REG_MODE (mode)
17027 || VALID_MMX_REG_MODE_3DNOW (mode));
17028 }
17029 if (MMX_REGNO_P (regno))
17030 {
17031 /* We implement the move patterns for 3DNOW modes even in MMX mode,
17032 so if the register is available at all, then we can move data of
17033 the given mode into or out of it. */
17034 return (VALID_MMX_REG_MODE (mode)
17035 || VALID_MMX_REG_MODE_3DNOW (mode));
17036 }
17037
17038 if (mode == QImode)
17039 {
17040 /* Take care for QImode values - they can be in non-QI regs,
17041 but then they do cause partial register stalls. */
17042 if (regno < 4 || TARGET_64BIT)
17043 return 1;
17044 if (!TARGET_PARTIAL_REG_STALL)
17045 return 1;
17046 return reload_in_progress || reload_completed;
17047 }
17048 /* We handle both integer and floats in the general purpose registers. */
17049 else if (VALID_INT_MODE_P (mode))
17050 return 1;
17051 else if (VALID_FP_MODE_P (mode))
17052 return 1;
17053 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
17054 on to use that value in smaller contexts, this can easily force a
17055 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
17056 supporting DImode, allow it. */
17057 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
17058 return 1;
17059
17060 return 0;
17061 }
17062
17063 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
17064 tieable integer mode. */
17065
17066 static bool
17067 ix86_tieable_integer_mode_p (enum machine_mode mode)
17068 {
17069 switch (mode)
17070 {
17071 case HImode:
17072 case SImode:
17073 return true;
17074
17075 case QImode:
17076 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
17077
17078 case DImode:
17079 return TARGET_64BIT;
17080
17081 default:
17082 return false;
17083 }
17084 }
17085
17086 /* Return true if MODE1 is accessible in a register that can hold MODE2
17087 without copying. That is, all register classes that can hold MODE2
17088 can also hold MODE1. */
17089
17090 bool
17091 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
17092 {
17093 if (mode1 == mode2)
17094 return true;
17095
17096 if (ix86_tieable_integer_mode_p (mode1)
17097 && ix86_tieable_integer_mode_p (mode2))
17098 return true;
17099
17100 /* MODE2 being XFmode implies fp stack or general regs, which means we
17101 can tie any smaller floating point modes to it. Note that we do not
17102 tie this with TFmode. */
17103 if (mode2 == XFmode)
17104 return mode1 == SFmode || mode1 == DFmode;
17105
17106 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
17107 that we can tie it with SFmode. */
17108 if (mode2 == DFmode)
17109 return mode1 == SFmode;
17110
17111 /* If MODE2 is only appropriate for an SSE register, then tie with
17112 any other mode acceptable to SSE registers. */
17113 if (GET_MODE_SIZE (mode2) >= 8
17114 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
17115 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
17116
17117 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
17118 with any other mode acceptable to MMX registers. */
17119 if (GET_MODE_SIZE (mode2) == 8
17120 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
17121 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
17122
17123 return false;
17124 }
17125
17126 /* Return the cost of moving data of mode M between a
17127 register and memory. A value of 2 is the default; this cost is
17128 relative to those in `REGISTER_MOVE_COST'.
17129
17130 If moving between registers and memory is more expensive than
17131 between two registers, you should define this macro to express the
17132 relative cost.
17133
17134 Model also increased moving costs of QImode registers in non
17135 Q_REGS classes.
17136 */
17137 int
17138 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
17139 {
17140 if (FLOAT_CLASS_P (class))
17141 {
17142 int index;
17143 switch (mode)
17144 {
17145 case SFmode:
17146 index = 0;
17147 break;
17148 case DFmode:
17149 index = 1;
17150 break;
17151 case XFmode:
17152 index = 2;
17153 break;
17154 default:
17155 return 100;
17156 }
17157 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
17158 }
17159 if (SSE_CLASS_P (class))
17160 {
17161 int index;
17162 switch (GET_MODE_SIZE (mode))
17163 {
17164 case 4:
17165 index = 0;
17166 break;
17167 case 8:
17168 index = 1;
17169 break;
17170 case 16:
17171 index = 2;
17172 break;
17173 default:
17174 return 100;
17175 }
17176 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
17177 }
17178 if (MMX_CLASS_P (class))
17179 {
17180 int index;
17181 switch (GET_MODE_SIZE (mode))
17182 {
17183 case 4:
17184 index = 0;
17185 break;
17186 case 8:
17187 index = 1;
17188 break;
17189 default:
17190 return 100;
17191 }
17192 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
17193 }
17194 switch (GET_MODE_SIZE (mode))
17195 {
17196 case 1:
17197 if (in)
17198 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
17199 : ix86_cost->movzbl_load);
17200 else
17201 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
17202 : ix86_cost->int_store[0] + 4);
17203 break;
17204 case 2:
17205 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
17206 default:
17207 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
17208 if (mode == TFmode)
17209 mode = XFmode;
17210 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
17211 * (((int) GET_MODE_SIZE (mode)
17212 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
17213 }
17214 }
17215
17216 /* Compute a (partial) cost for rtx X. Return true if the complete
17217 cost has been computed, and false if subexpressions should be
17218 scanned. In either case, *TOTAL contains the cost result. */
17219
17220 static bool
17221 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
17222 {
17223 enum machine_mode mode = GET_MODE (x);
17224
17225 switch (code)
17226 {
17227 case CONST_INT:
17228 case CONST:
17229 case LABEL_REF:
17230 case SYMBOL_REF:
17231 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
17232 *total = 3;
17233 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
17234 *total = 2;
17235 else if (flag_pic && SYMBOLIC_CONST (x)
17236 && (!TARGET_64BIT
17237 || (!GET_CODE (x) != LABEL_REF
17238 && (GET_CODE (x) != SYMBOL_REF
17239 || !SYMBOL_REF_LOCAL_P (x)))))
17240 *total = 1;
17241 else
17242 *total = 0;
17243 return true;
17244
17245 case CONST_DOUBLE:
17246 if (mode == VOIDmode)
17247 *total = 0;
17248 else
17249 switch (standard_80387_constant_p (x))
17250 {
17251 case 1: /* 0.0 */
17252 *total = 1;
17253 break;
17254 default: /* Other constants */
17255 *total = 2;
17256 break;
17257 case 0:
17258 case -1:
17259 /* Start with (MEM (SYMBOL_REF)), since that's where
17260 it'll probably end up. Add a penalty for size. */
17261 *total = (COSTS_N_INSNS (1)
17262 + (flag_pic != 0 && !TARGET_64BIT)
17263 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
17264 break;
17265 }
17266 return true;
17267
17268 case ZERO_EXTEND:
17269 /* The zero extensions is often completely free on x86_64, so make
17270 it as cheap as possible. */
17271 if (TARGET_64BIT && mode == DImode
17272 && GET_MODE (XEXP (x, 0)) == SImode)
17273 *total = 1;
17274 else if (TARGET_ZERO_EXTEND_WITH_AND)
17275 *total = ix86_cost->add;
17276 else
17277 *total = ix86_cost->movzx;
17278 return false;
17279
17280 case SIGN_EXTEND:
17281 *total = ix86_cost->movsx;
17282 return false;
17283
17284 case ASHIFT:
17285 if (GET_CODE (XEXP (x, 1)) == CONST_INT
17286 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
17287 {
17288 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17289 if (value == 1)
17290 {
17291 *total = ix86_cost->add;
17292 return false;
17293 }
17294 if ((value == 2 || value == 3)
17295 && ix86_cost->lea <= ix86_cost->shift_const)
17296 {
17297 *total = ix86_cost->lea;
17298 return false;
17299 }
17300 }
17301 /* FALLTHRU */
17302
17303 case ROTATE:
17304 case ASHIFTRT:
17305 case LSHIFTRT:
17306 case ROTATERT:
17307 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
17308 {
17309 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17310 {
17311 if (INTVAL (XEXP (x, 1)) > 32)
17312 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
17313 else
17314 *total = ix86_cost->shift_const * 2;
17315 }
17316 else
17317 {
17318 if (GET_CODE (XEXP (x, 1)) == AND)
17319 *total = ix86_cost->shift_var * 2;
17320 else
17321 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
17322 }
17323 }
17324 else
17325 {
17326 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17327 *total = ix86_cost->shift_const;
17328 else
17329 *total = ix86_cost->shift_var;
17330 }
17331 return false;
17332
17333 case MULT:
17334 if (FLOAT_MODE_P (mode))
17335 {
17336 *total = ix86_cost->fmul;
17337 return false;
17338 }
17339 else
17340 {
17341 rtx op0 = XEXP (x, 0);
17342 rtx op1 = XEXP (x, 1);
17343 int nbits;
17344 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17345 {
17346 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17347 for (nbits = 0; value != 0; value &= value - 1)
17348 nbits++;
17349 }
17350 else
17351 /* This is arbitrary. */
17352 nbits = 7;
17353
17354 /* Compute costs correctly for widening multiplication. */
17355 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
17356 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
17357 == GET_MODE_SIZE (mode))
17358 {
17359 int is_mulwiden = 0;
17360 enum machine_mode inner_mode = GET_MODE (op0);
17361
17362 if (GET_CODE (op0) == GET_CODE (op1))
17363 is_mulwiden = 1, op1 = XEXP (op1, 0);
17364 else if (GET_CODE (op1) == CONST_INT)
17365 {
17366 if (GET_CODE (op0) == SIGN_EXTEND)
17367 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
17368 == INTVAL (op1);
17369 else
17370 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
17371 }
17372
17373 if (is_mulwiden)
17374 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
17375 }
17376
17377 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
17378 + nbits * ix86_cost->mult_bit
17379 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
17380
17381 return true;
17382 }
17383
17384 case DIV:
17385 case UDIV:
17386 case MOD:
17387 case UMOD:
17388 if (FLOAT_MODE_P (mode))
17389 *total = ix86_cost->fdiv;
17390 else
17391 *total = ix86_cost->divide[MODE_INDEX (mode)];
17392 return false;
17393
17394 case PLUS:
17395 if (FLOAT_MODE_P (mode))
17396 *total = ix86_cost->fadd;
17397 else if (GET_MODE_CLASS (mode) == MODE_INT
17398 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
17399 {
17400 if (GET_CODE (XEXP (x, 0)) == PLUS
17401 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
17402 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
17403 && CONSTANT_P (XEXP (x, 1)))
17404 {
17405 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
17406 if (val == 2 || val == 4 || val == 8)
17407 {
17408 *total = ix86_cost->lea;
17409 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17410 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
17411 outer_code);
17412 *total += rtx_cost (XEXP (x, 1), outer_code);
17413 return true;
17414 }
17415 }
17416 else if (GET_CODE (XEXP (x, 0)) == MULT
17417 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
17418 {
17419 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
17420 if (val == 2 || val == 4 || val == 8)
17421 {
17422 *total = ix86_cost->lea;
17423 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17424 *total += rtx_cost (XEXP (x, 1), outer_code);
17425 return true;
17426 }
17427 }
17428 else if (GET_CODE (XEXP (x, 0)) == PLUS)
17429 {
17430 *total = ix86_cost->lea;
17431 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17432 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17433 *total += rtx_cost (XEXP (x, 1), outer_code);
17434 return true;
17435 }
17436 }
17437 /* FALLTHRU */
17438
17439 case MINUS:
17440 if (FLOAT_MODE_P (mode))
17441 {
17442 *total = ix86_cost->fadd;
17443 return false;
17444 }
17445 /* FALLTHRU */
17446
17447 case AND:
17448 case IOR:
17449 case XOR:
17450 if (!TARGET_64BIT && mode == DImode)
17451 {
17452 *total = (ix86_cost->add * 2
17453 + (rtx_cost (XEXP (x, 0), outer_code)
17454 << (GET_MODE (XEXP (x, 0)) != DImode))
17455 + (rtx_cost (XEXP (x, 1), outer_code)
17456 << (GET_MODE (XEXP (x, 1)) != DImode)));
17457 return true;
17458 }
17459 /* FALLTHRU */
17460
17461 case NEG:
17462 if (FLOAT_MODE_P (mode))
17463 {
17464 *total = ix86_cost->fchs;
17465 return false;
17466 }
17467 /* FALLTHRU */
17468
17469 case NOT:
17470 if (!TARGET_64BIT && mode == DImode)
17471 *total = ix86_cost->add * 2;
17472 else
17473 *total = ix86_cost->add;
17474 return false;
17475
17476 case COMPARE:
17477 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
17478 && XEXP (XEXP (x, 0), 1) == const1_rtx
17479 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
17480 && XEXP (x, 1) == const0_rtx)
17481 {
17482 /* This kind of construct is implemented using test[bwl].
17483 Treat it as if we had an AND. */
17484 *total = (ix86_cost->add
17485 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
17486 + rtx_cost (const1_rtx, outer_code));
17487 return true;
17488 }
17489 return false;
17490
17491 case FLOAT_EXTEND:
17492 if (!TARGET_SSE_MATH
17493 || mode == XFmode
17494 || (mode == DFmode && !TARGET_SSE2))
17495 /* For standard 80387 constants, raise the cost to prevent
17496 compress_float_constant() to generate load from memory. */
17497 switch (standard_80387_constant_p (XEXP (x, 0)))
17498 {
17499 case -1:
17500 case 0:
17501 *total = 0;
17502 break;
17503 case 1: /* 0.0 */
17504 *total = 1;
17505 break;
17506 default:
17507 *total = (x86_ext_80387_constants & TUNEMASK
17508 || optimize_size
17509 ? 1 : 0);
17510 }
17511 return false;
17512
17513 case ABS:
17514 if (FLOAT_MODE_P (mode))
17515 *total = ix86_cost->fabs;
17516 return false;
17517
17518 case SQRT:
17519 if (FLOAT_MODE_P (mode))
17520 *total = ix86_cost->fsqrt;
17521 return false;
17522
17523 case UNSPEC:
17524 if (XINT (x, 1) == UNSPEC_TP)
17525 *total = 0;
17526 return false;
17527
17528 default:
17529 return false;
17530 }
17531 }
17532
17533 #if TARGET_MACHO
17534
17535 static int current_machopic_label_num;
17536
17537 /* Given a symbol name and its associated stub, write out the
17538 definition of the stub. */
17539
17540 void
17541 machopic_output_stub (FILE *file, const char *symb, const char *stub)
17542 {
17543 unsigned int length;
17544 char *binder_name, *symbol_name, lazy_ptr_name[32];
17545 int label = ++current_machopic_label_num;
17546
17547 /* For 64-bit we shouldn't get here. */
17548 gcc_assert (!TARGET_64BIT);
17549
17550 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
17551 symb = (*targetm.strip_name_encoding) (symb);
17552
17553 length = strlen (stub);
17554 binder_name = alloca (length + 32);
17555 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
17556
17557 length = strlen (symb);
17558 symbol_name = alloca (length + 32);
17559 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
17560
17561 sprintf (lazy_ptr_name, "L%d$lz", label);
17562
17563 if (MACHOPIC_PURE)
17564 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
17565 else
17566 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
17567
17568 fprintf (file, "%s:\n", stub);
17569 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17570
17571 if (MACHOPIC_PURE)
17572 {
17573 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
17574 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
17575 fprintf (file, "\tjmp\t*%%edx\n");
17576 }
17577 else
17578 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
17579
17580 fprintf (file, "%s:\n", binder_name);
17581
17582 if (MACHOPIC_PURE)
17583 {
17584 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
17585 fprintf (file, "\tpushl\t%%eax\n");
17586 }
17587 else
17588 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
17589
17590 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
17591
17592 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
17593 fprintf (file, "%s:\n", lazy_ptr_name);
17594 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17595 fprintf (file, "\t.long %s\n", binder_name);
17596 }
17597
17598 void
17599 darwin_x86_file_end (void)
17600 {
17601 darwin_file_end ();
17602 ix86_file_end ();
17603 }
17604 #endif /* TARGET_MACHO */
17605
17606 /* Order the registers for register allocator. */
17607
17608 void
17609 x86_order_regs_for_local_alloc (void)
17610 {
17611 int pos = 0;
17612 int i;
17613
17614 /* First allocate the local general purpose registers. */
17615 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17616 if (GENERAL_REGNO_P (i) && call_used_regs[i])
17617 reg_alloc_order [pos++] = i;
17618
17619 /* Global general purpose registers. */
17620 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17621 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
17622 reg_alloc_order [pos++] = i;
17623
17624 /* x87 registers come first in case we are doing FP math
17625 using them. */
17626 if (!TARGET_SSE_MATH)
17627 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17628 reg_alloc_order [pos++] = i;
17629
17630 /* SSE registers. */
17631 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
17632 reg_alloc_order [pos++] = i;
17633 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
17634 reg_alloc_order [pos++] = i;
17635
17636 /* x87 registers. */
17637 if (TARGET_SSE_MATH)
17638 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17639 reg_alloc_order [pos++] = i;
17640
17641 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
17642 reg_alloc_order [pos++] = i;
17643
17644 /* Initialize the rest of array as we do not allocate some registers
17645 at all. */
17646 while (pos < FIRST_PSEUDO_REGISTER)
17647 reg_alloc_order [pos++] = 0;
17648 }
17649
17650 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
17651 struct attribute_spec.handler. */
17652 static tree
17653 ix86_handle_struct_attribute (tree *node, tree name,
17654 tree args ATTRIBUTE_UNUSED,
17655 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
17656 {
17657 tree *type = NULL;
17658 if (DECL_P (*node))
17659 {
17660 if (TREE_CODE (*node) == TYPE_DECL)
17661 type = &TREE_TYPE (*node);
17662 }
17663 else
17664 type = node;
17665
17666 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
17667 || TREE_CODE (*type) == UNION_TYPE)))
17668 {
17669 warning (OPT_Wattributes, "%qs attribute ignored",
17670 IDENTIFIER_POINTER (name));
17671 *no_add_attrs = true;
17672 }
17673
17674 else if ((is_attribute_p ("ms_struct", name)
17675 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
17676 || ((is_attribute_p ("gcc_struct", name)
17677 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
17678 {
17679 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
17680 IDENTIFIER_POINTER (name));
17681 *no_add_attrs = true;
17682 }
17683
17684 return NULL_TREE;
17685 }
17686
17687 static bool
17688 ix86_ms_bitfield_layout_p (tree record_type)
17689 {
17690 return (TARGET_MS_BITFIELD_LAYOUT &&
17691 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
17692 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
17693 }
17694
17695 /* Returns an expression indicating where the this parameter is
17696 located on entry to the FUNCTION. */
17697
17698 static rtx
17699 x86_this_parameter (tree function)
17700 {
17701 tree type = TREE_TYPE (function);
17702
17703 if (TARGET_64BIT)
17704 {
17705 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
17706 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
17707 }
17708
17709 if (ix86_function_regparm (type, function) > 0)
17710 {
17711 tree parm;
17712
17713 parm = TYPE_ARG_TYPES (type);
17714 /* Figure out whether or not the function has a variable number of
17715 arguments. */
17716 for (; parm; parm = TREE_CHAIN (parm))
17717 if (TREE_VALUE (parm) == void_type_node)
17718 break;
17719 /* If not, the this parameter is in the first argument. */
17720 if (parm)
17721 {
17722 int regno = 0;
17723 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
17724 regno = 2;
17725 return gen_rtx_REG (SImode, regno);
17726 }
17727 }
17728
17729 if (aggregate_value_p (TREE_TYPE (type), type))
17730 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
17731 else
17732 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
17733 }
17734
17735 /* Determine whether x86_output_mi_thunk can succeed. */
17736
17737 static bool
17738 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
17739 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
17740 HOST_WIDE_INT vcall_offset, tree function)
17741 {
17742 /* 64-bit can handle anything. */
17743 if (TARGET_64BIT)
17744 return true;
17745
17746 /* For 32-bit, everything's fine if we have one free register. */
17747 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
17748 return true;
17749
17750 /* Need a free register for vcall_offset. */
17751 if (vcall_offset)
17752 return false;
17753
17754 /* Need a free register for GOT references. */
17755 if (flag_pic && !(*targetm.binds_local_p) (function))
17756 return false;
17757
17758 /* Otherwise ok. */
17759 return true;
17760 }
17761
17762 /* Output the assembler code for a thunk function. THUNK_DECL is the
17763 declaration for the thunk function itself, FUNCTION is the decl for
17764 the target function. DELTA is an immediate constant offset to be
17765 added to THIS. If VCALL_OFFSET is nonzero, the word at
17766 *(*this + vcall_offset) should be added to THIS. */
17767
17768 static void
17769 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
17770 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
17771 HOST_WIDE_INT vcall_offset, tree function)
17772 {
17773 rtx xops[3];
17774 rtx this = x86_this_parameter (function);
17775 rtx this_reg, tmp;
17776
17777 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
17778 pull it in now and let DELTA benefit. */
17779 if (REG_P (this))
17780 this_reg = this;
17781 else if (vcall_offset)
17782 {
17783 /* Put the this parameter into %eax. */
17784 xops[0] = this;
17785 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
17786 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17787 }
17788 else
17789 this_reg = NULL_RTX;
17790
17791 /* Adjust the this parameter by a fixed constant. */
17792 if (delta)
17793 {
17794 xops[0] = GEN_INT (delta);
17795 xops[1] = this_reg ? this_reg : this;
17796 if (TARGET_64BIT)
17797 {
17798 if (!x86_64_general_operand (xops[0], DImode))
17799 {
17800 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17801 xops[1] = tmp;
17802 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
17803 xops[0] = tmp;
17804 xops[1] = this;
17805 }
17806 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17807 }
17808 else
17809 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17810 }
17811
17812 /* Adjust the this parameter by a value stored in the vtable. */
17813 if (vcall_offset)
17814 {
17815 if (TARGET_64BIT)
17816 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17817 else
17818 {
17819 int tmp_regno = 2 /* ECX */;
17820 if (lookup_attribute ("fastcall",
17821 TYPE_ATTRIBUTES (TREE_TYPE (function))))
17822 tmp_regno = 0 /* EAX */;
17823 tmp = gen_rtx_REG (SImode, tmp_regno);
17824 }
17825
17826 xops[0] = gen_rtx_MEM (Pmode, this_reg);
17827 xops[1] = tmp;
17828 if (TARGET_64BIT)
17829 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17830 else
17831 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17832
17833 /* Adjust the this parameter. */
17834 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
17835 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
17836 {
17837 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
17838 xops[0] = GEN_INT (vcall_offset);
17839 xops[1] = tmp2;
17840 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17841 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
17842 }
17843 xops[1] = this_reg;
17844 if (TARGET_64BIT)
17845 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17846 else
17847 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17848 }
17849
17850 /* If necessary, drop THIS back to its stack slot. */
17851 if (this_reg && this_reg != this)
17852 {
17853 xops[0] = this_reg;
17854 xops[1] = this;
17855 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17856 }
17857
17858 xops[0] = XEXP (DECL_RTL (function), 0);
17859 if (TARGET_64BIT)
17860 {
17861 if (!flag_pic || (*targetm.binds_local_p) (function))
17862 output_asm_insn ("jmp\t%P0", xops);
17863 else
17864 {
17865 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
17866 tmp = gen_rtx_CONST (Pmode, tmp);
17867 tmp = gen_rtx_MEM (QImode, tmp);
17868 xops[0] = tmp;
17869 output_asm_insn ("jmp\t%A0", xops);
17870 }
17871 }
17872 else
17873 {
17874 if (!flag_pic || (*targetm.binds_local_p) (function))
17875 output_asm_insn ("jmp\t%P0", xops);
17876 else
17877 #if TARGET_MACHO
17878 if (TARGET_MACHO)
17879 {
17880 rtx sym_ref = XEXP (DECL_RTL (function), 0);
17881 tmp = (gen_rtx_SYMBOL_REF
17882 (Pmode,
17883 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
17884 tmp = gen_rtx_MEM (QImode, tmp);
17885 xops[0] = tmp;
17886 output_asm_insn ("jmp\t%0", xops);
17887 }
17888 else
17889 #endif /* TARGET_MACHO */
17890 {
17891 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
17892 output_set_got (tmp, NULL_RTX);
17893
17894 xops[1] = tmp;
17895 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
17896 output_asm_insn ("jmp\t{*}%1", xops);
17897 }
17898 }
17899 }
17900
17901 static void
17902 x86_file_start (void)
17903 {
17904 default_file_start ();
17905 #if TARGET_MACHO
17906 darwin_file_start ();
17907 #endif
17908 if (X86_FILE_START_VERSION_DIRECTIVE)
17909 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
17910 if (X86_FILE_START_FLTUSED)
17911 fputs ("\t.global\t__fltused\n", asm_out_file);
17912 if (ix86_asm_dialect == ASM_INTEL)
17913 fputs ("\t.intel_syntax\n", asm_out_file);
17914 }
17915
17916 int
17917 x86_field_alignment (tree field, int computed)
17918 {
17919 enum machine_mode mode;
17920 tree type = TREE_TYPE (field);
17921
17922 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
17923 return computed;
17924 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
17925 ? get_inner_array_type (type) : type);
17926 if (mode == DFmode || mode == DCmode
17927 || GET_MODE_CLASS (mode) == MODE_INT
17928 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
17929 return MIN (32, computed);
17930 return computed;
17931 }
17932
17933 /* Output assembler code to FILE to increment profiler label # LABELNO
17934 for profiling a function entry. */
17935 void
17936 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
17937 {
17938 if (TARGET_64BIT)
17939 if (flag_pic)
17940 {
17941 #ifndef NO_PROFILE_COUNTERS
17942 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
17943 #endif
17944 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
17945 }
17946 else
17947 {
17948 #ifndef NO_PROFILE_COUNTERS
17949 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
17950 #endif
17951 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17952 }
17953 else if (flag_pic)
17954 {
17955 #ifndef NO_PROFILE_COUNTERS
17956 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
17957 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
17958 #endif
17959 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
17960 }
17961 else
17962 {
17963 #ifndef NO_PROFILE_COUNTERS
17964 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
17965 PROFILE_COUNT_REGISTER);
17966 #endif
17967 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17968 }
17969 }
17970
17971 /* We don't have exact information about the insn sizes, but we may assume
17972 quite safely that we are informed about all 1 byte insns and memory
17973 address sizes. This is enough to eliminate unnecessary padding in
17974 99% of cases. */
17975
17976 static int
17977 min_insn_size (rtx insn)
17978 {
17979 int l = 0;
17980
17981 if (!INSN_P (insn) || !active_insn_p (insn))
17982 return 0;
17983
17984 /* Discard alignments we've emit and jump instructions. */
17985 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
17986 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
17987 return 0;
17988 if (GET_CODE (insn) == JUMP_INSN
17989 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
17990 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
17991 return 0;
17992
17993 /* Important case - calls are always 5 bytes.
17994 It is common to have many calls in the row. */
17995 if (GET_CODE (insn) == CALL_INSN
17996 && symbolic_reference_mentioned_p (PATTERN (insn))
17997 && !SIBLING_CALL_P (insn))
17998 return 5;
17999 if (get_attr_length (insn) <= 1)
18000 return 1;
18001
18002 /* For normal instructions we may rely on the sizes of addresses
18003 and the presence of symbol to require 4 bytes of encoding.
18004 This is not the case for jumps where references are PC relative. */
18005 if (GET_CODE (insn) != JUMP_INSN)
18006 {
18007 l = get_attr_length_address (insn);
18008 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
18009 l = 4;
18010 }
18011 if (l)
18012 return 1+l;
18013 else
18014 return 2;
18015 }
18016
18017 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
18018 window. */
18019
18020 static void
18021 ix86_avoid_jump_misspredicts (void)
18022 {
18023 rtx insn, start = get_insns ();
18024 int nbytes = 0, njumps = 0;
18025 int isjump = 0;
18026
18027 /* Look for all minimal intervals of instructions containing 4 jumps.
18028 The intervals are bounded by START and INSN. NBYTES is the total
18029 size of instructions in the interval including INSN and not including
18030 START. When the NBYTES is smaller than 16 bytes, it is possible
18031 that the end of START and INSN ends up in the same 16byte page.
18032
18033 The smallest offset in the page INSN can start is the case where START
18034 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
18035 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
18036 */
18037 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
18038 {
18039
18040 nbytes += min_insn_size (insn);
18041 if (dump_file)
18042 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
18043 INSN_UID (insn), min_insn_size (insn));
18044 if ((GET_CODE (insn) == JUMP_INSN
18045 && GET_CODE (PATTERN (insn)) != ADDR_VEC
18046 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
18047 || GET_CODE (insn) == CALL_INSN)
18048 njumps++;
18049 else
18050 continue;
18051
18052 while (njumps > 3)
18053 {
18054 start = NEXT_INSN (start);
18055 if ((GET_CODE (start) == JUMP_INSN
18056 && GET_CODE (PATTERN (start)) != ADDR_VEC
18057 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
18058 || GET_CODE (start) == CALL_INSN)
18059 njumps--, isjump = 1;
18060 else
18061 isjump = 0;
18062 nbytes -= min_insn_size (start);
18063 }
18064 gcc_assert (njumps >= 0);
18065 if (dump_file)
18066 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
18067 INSN_UID (start), INSN_UID (insn), nbytes);
18068
18069 if (njumps == 3 && isjump && nbytes < 16)
18070 {
18071 int padsize = 15 - nbytes + min_insn_size (insn);
18072
18073 if (dump_file)
18074 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
18075 INSN_UID (insn), padsize);
18076 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
18077 }
18078 }
18079 }
18080
18081 /* AMD Athlon works faster
18082 when RET is not destination of conditional jump or directly preceded
18083 by other jump instruction. We avoid the penalty by inserting NOP just
18084 before the RET instructions in such cases. */
18085 static void
18086 ix86_pad_returns (void)
18087 {
18088 edge e;
18089 edge_iterator ei;
18090
18091 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
18092 {
18093 basic_block bb = e->src;
18094 rtx ret = BB_END (bb);
18095 rtx prev;
18096 bool replace = false;
18097
18098 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
18099 || !maybe_hot_bb_p (bb))
18100 continue;
18101 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
18102 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
18103 break;
18104 if (prev && GET_CODE (prev) == CODE_LABEL)
18105 {
18106 edge e;
18107 edge_iterator ei;
18108
18109 FOR_EACH_EDGE (e, ei, bb->preds)
18110 if (EDGE_FREQUENCY (e) && e->src->index >= 0
18111 && !(e->flags & EDGE_FALLTHRU))
18112 replace = true;
18113 }
18114 if (!replace)
18115 {
18116 prev = prev_active_insn (ret);
18117 if (prev
18118 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
18119 || GET_CODE (prev) == CALL_INSN))
18120 replace = true;
18121 /* Empty functions get branch mispredict even when the jump destination
18122 is not visible to us. */
18123 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
18124 replace = true;
18125 }
18126 if (replace)
18127 {
18128 emit_insn_before (gen_return_internal_long (), ret);
18129 delete_insn (ret);
18130 }
18131 }
18132 }
18133
18134 /* Implement machine specific optimizations. We implement padding of returns
18135 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
18136 static void
18137 ix86_reorg (void)
18138 {
18139 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
18140 ix86_pad_returns ();
18141 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
18142 ix86_avoid_jump_misspredicts ();
18143 }
18144
18145 /* Return nonzero when QImode register that must be represented via REX prefix
18146 is used. */
18147 bool
18148 x86_extended_QIreg_mentioned_p (rtx insn)
18149 {
18150 int i;
18151 extract_insn_cached (insn);
18152 for (i = 0; i < recog_data.n_operands; i++)
18153 if (REG_P (recog_data.operand[i])
18154 && REGNO (recog_data.operand[i]) >= 4)
18155 return true;
18156 return false;
18157 }
18158
18159 /* Return nonzero when P points to register encoded via REX prefix.
18160 Called via for_each_rtx. */
18161 static int
18162 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
18163 {
18164 unsigned int regno;
18165 if (!REG_P (*p))
18166 return 0;
18167 regno = REGNO (*p);
18168 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
18169 }
18170
18171 /* Return true when INSN mentions register that must be encoded using REX
18172 prefix. */
18173 bool
18174 x86_extended_reg_mentioned_p (rtx insn)
18175 {
18176 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
18177 }
18178
18179 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
18180 optabs would emit if we didn't have TFmode patterns. */
18181
18182 void
18183 x86_emit_floatuns (rtx operands[2])
18184 {
18185 rtx neglab, donelab, i0, i1, f0, in, out;
18186 enum machine_mode mode, inmode;
18187
18188 inmode = GET_MODE (operands[1]);
18189 gcc_assert (inmode == SImode || inmode == DImode);
18190
18191 out = operands[0];
18192 in = force_reg (inmode, operands[1]);
18193 mode = GET_MODE (out);
18194 neglab = gen_label_rtx ();
18195 donelab = gen_label_rtx ();
18196 i1 = gen_reg_rtx (Pmode);
18197 f0 = gen_reg_rtx (mode);
18198
18199 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
18200
18201 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
18202 emit_jump_insn (gen_jump (donelab));
18203 emit_barrier ();
18204
18205 emit_label (neglab);
18206
18207 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18208 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18209 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
18210 expand_float (f0, i0, 0);
18211 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
18212
18213 emit_label (donelab);
18214 }
18215 \f
18216 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18217 with all elements equal to VAR. Return true if successful. */
18218
18219 static bool
18220 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
18221 rtx target, rtx val)
18222 {
18223 enum machine_mode smode, wsmode, wvmode;
18224 rtx x;
18225
18226 switch (mode)
18227 {
18228 case V2SImode:
18229 case V2SFmode:
18230 if (!mmx_ok)
18231 return false;
18232 /* FALLTHRU */
18233
18234 case V2DFmode:
18235 case V2DImode:
18236 case V4SFmode:
18237 case V4SImode:
18238 val = force_reg (GET_MODE_INNER (mode), val);
18239 x = gen_rtx_VEC_DUPLICATE (mode, val);
18240 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18241 return true;
18242
18243 case V4HImode:
18244 if (!mmx_ok)
18245 return false;
18246 if (TARGET_SSE || TARGET_3DNOW_A)
18247 {
18248 val = gen_lowpart (SImode, val);
18249 x = gen_rtx_TRUNCATE (HImode, val);
18250 x = gen_rtx_VEC_DUPLICATE (mode, x);
18251 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18252 return true;
18253 }
18254 else
18255 {
18256 smode = HImode;
18257 wsmode = SImode;
18258 wvmode = V2SImode;
18259 goto widen;
18260 }
18261
18262 case V8QImode:
18263 if (!mmx_ok)
18264 return false;
18265 smode = QImode;
18266 wsmode = HImode;
18267 wvmode = V4HImode;
18268 goto widen;
18269 case V8HImode:
18270 if (TARGET_SSE2)
18271 {
18272 rtx tmp1, tmp2;
18273 /* Extend HImode to SImode using a paradoxical SUBREG. */
18274 tmp1 = gen_reg_rtx (SImode);
18275 emit_move_insn (tmp1, gen_lowpart (SImode, val));
18276 /* Insert the SImode value as low element of V4SImode vector. */
18277 tmp2 = gen_reg_rtx (V4SImode);
18278 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18279 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18280 CONST0_RTX (V4SImode),
18281 const1_rtx);
18282 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18283 /* Cast the V4SImode vector back to a V8HImode vector. */
18284 tmp1 = gen_reg_rtx (V8HImode);
18285 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
18286 /* Duplicate the low short through the whole low SImode word. */
18287 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
18288 /* Cast the V8HImode vector back to a V4SImode vector. */
18289 tmp2 = gen_reg_rtx (V4SImode);
18290 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18291 /* Replicate the low element of the V4SImode vector. */
18292 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18293 /* Cast the V2SImode back to V8HImode, and store in target. */
18294 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
18295 return true;
18296 }
18297 smode = HImode;
18298 wsmode = SImode;
18299 wvmode = V4SImode;
18300 goto widen;
18301 case V16QImode:
18302 if (TARGET_SSE2)
18303 {
18304 rtx tmp1, tmp2;
18305 /* Extend QImode to SImode using a paradoxical SUBREG. */
18306 tmp1 = gen_reg_rtx (SImode);
18307 emit_move_insn (tmp1, gen_lowpart (SImode, val));
18308 /* Insert the SImode value as low element of V4SImode vector. */
18309 tmp2 = gen_reg_rtx (V4SImode);
18310 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18311 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18312 CONST0_RTX (V4SImode),
18313 const1_rtx);
18314 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18315 /* Cast the V4SImode vector back to a V16QImode vector. */
18316 tmp1 = gen_reg_rtx (V16QImode);
18317 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
18318 /* Duplicate the low byte through the whole low SImode word. */
18319 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18320 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18321 /* Cast the V16QImode vector back to a V4SImode vector. */
18322 tmp2 = gen_reg_rtx (V4SImode);
18323 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18324 /* Replicate the low element of the V4SImode vector. */
18325 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18326 /* Cast the V2SImode back to V16QImode, and store in target. */
18327 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
18328 return true;
18329 }
18330 smode = QImode;
18331 wsmode = HImode;
18332 wvmode = V8HImode;
18333 goto widen;
18334 widen:
18335 /* Replicate the value once into the next wider mode and recurse. */
18336 val = convert_modes (wsmode, smode, val, true);
18337 x = expand_simple_binop (wsmode, ASHIFT, val,
18338 GEN_INT (GET_MODE_BITSIZE (smode)),
18339 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18340 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
18341
18342 x = gen_reg_rtx (wvmode);
18343 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
18344 gcc_unreachable ();
18345 emit_move_insn (target, gen_lowpart (mode, x));
18346 return true;
18347
18348 default:
18349 return false;
18350 }
18351 }
18352
18353 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18354 whose ONE_VAR element is VAR, and other elements are zero. Return true
18355 if successful. */
18356
18357 static bool
18358 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
18359 rtx target, rtx var, int one_var)
18360 {
18361 enum machine_mode vsimode;
18362 rtx new_target;
18363 rtx x, tmp;
18364
18365 switch (mode)
18366 {
18367 case V2SFmode:
18368 case V2SImode:
18369 if (!mmx_ok)
18370 return false;
18371 /* FALLTHRU */
18372
18373 case V2DFmode:
18374 case V2DImode:
18375 if (one_var != 0)
18376 return false;
18377 var = force_reg (GET_MODE_INNER (mode), var);
18378 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
18379 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18380 return true;
18381
18382 case V4SFmode:
18383 case V4SImode:
18384 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
18385 new_target = gen_reg_rtx (mode);
18386 else
18387 new_target = target;
18388 var = force_reg (GET_MODE_INNER (mode), var);
18389 x = gen_rtx_VEC_DUPLICATE (mode, var);
18390 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
18391 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
18392 if (one_var != 0)
18393 {
18394 /* We need to shuffle the value to the correct position, so
18395 create a new pseudo to store the intermediate result. */
18396
18397 /* With SSE2, we can use the integer shuffle insns. */
18398 if (mode != V4SFmode && TARGET_SSE2)
18399 {
18400 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
18401 GEN_INT (1),
18402 GEN_INT (one_var == 1 ? 0 : 1),
18403 GEN_INT (one_var == 2 ? 0 : 1),
18404 GEN_INT (one_var == 3 ? 0 : 1)));
18405 if (target != new_target)
18406 emit_move_insn (target, new_target);
18407 return true;
18408 }
18409
18410 /* Otherwise convert the intermediate result to V4SFmode and
18411 use the SSE1 shuffle instructions. */
18412 if (mode != V4SFmode)
18413 {
18414 tmp = gen_reg_rtx (V4SFmode);
18415 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
18416 }
18417 else
18418 tmp = new_target;
18419
18420 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
18421 GEN_INT (1),
18422 GEN_INT (one_var == 1 ? 0 : 1),
18423 GEN_INT (one_var == 2 ? 0+4 : 1+4),
18424 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
18425
18426 if (mode != V4SFmode)
18427 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
18428 else if (tmp != target)
18429 emit_move_insn (target, tmp);
18430 }
18431 else if (target != new_target)
18432 emit_move_insn (target, new_target);
18433 return true;
18434
18435 case V8HImode:
18436 case V16QImode:
18437 vsimode = V4SImode;
18438 goto widen;
18439 case V4HImode:
18440 case V8QImode:
18441 if (!mmx_ok)
18442 return false;
18443 vsimode = V2SImode;
18444 goto widen;
18445 widen:
18446 if (one_var != 0)
18447 return false;
18448
18449 /* Zero extend the variable element to SImode and recurse. */
18450 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
18451
18452 x = gen_reg_rtx (vsimode);
18453 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
18454 var, one_var))
18455 gcc_unreachable ();
18456
18457 emit_move_insn (target, gen_lowpart (mode, x));
18458 return true;
18459
18460 default:
18461 return false;
18462 }
18463 }
18464
18465 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18466 consisting of the values in VALS. It is known that all elements
18467 except ONE_VAR are constants. Return true if successful. */
18468
18469 static bool
18470 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
18471 rtx target, rtx vals, int one_var)
18472 {
18473 rtx var = XVECEXP (vals, 0, one_var);
18474 enum machine_mode wmode;
18475 rtx const_vec, x;
18476
18477 const_vec = copy_rtx (vals);
18478 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
18479 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
18480
18481 switch (mode)
18482 {
18483 case V2DFmode:
18484 case V2DImode:
18485 case V2SFmode:
18486 case V2SImode:
18487 /* For the two element vectors, it's just as easy to use
18488 the general case. */
18489 return false;
18490
18491 case V4SFmode:
18492 case V4SImode:
18493 case V8HImode:
18494 case V4HImode:
18495 break;
18496
18497 case V16QImode:
18498 wmode = V8HImode;
18499 goto widen;
18500 case V8QImode:
18501 wmode = V4HImode;
18502 goto widen;
18503 widen:
18504 /* There's no way to set one QImode entry easily. Combine
18505 the variable value with its adjacent constant value, and
18506 promote to an HImode set. */
18507 x = XVECEXP (vals, 0, one_var ^ 1);
18508 if (one_var & 1)
18509 {
18510 var = convert_modes (HImode, QImode, var, true);
18511 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
18512 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18513 x = GEN_INT (INTVAL (x) & 0xff);
18514 }
18515 else
18516 {
18517 var = convert_modes (HImode, QImode, var, true);
18518 x = gen_int_mode (INTVAL (x) << 8, HImode);
18519 }
18520 if (x != const0_rtx)
18521 var = expand_simple_binop (HImode, IOR, var, x, var,
18522 1, OPTAB_LIB_WIDEN);
18523
18524 x = gen_reg_rtx (wmode);
18525 emit_move_insn (x, gen_lowpart (wmode, const_vec));
18526 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
18527
18528 emit_move_insn (target, gen_lowpart (mode, x));
18529 return true;
18530
18531 default:
18532 return false;
18533 }
18534
18535 emit_move_insn (target, const_vec);
18536 ix86_expand_vector_set (mmx_ok, target, var, one_var);
18537 return true;
18538 }
18539
18540 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
18541 all values variable, and none identical. */
18542
18543 static void
18544 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
18545 rtx target, rtx vals)
18546 {
18547 enum machine_mode half_mode = GET_MODE_INNER (mode);
18548 rtx op0 = NULL, op1 = NULL;
18549 bool use_vec_concat = false;
18550
18551 switch (mode)
18552 {
18553 case V2SFmode:
18554 case V2SImode:
18555 if (!mmx_ok && !TARGET_SSE)
18556 break;
18557 /* FALLTHRU */
18558
18559 case V2DFmode:
18560 case V2DImode:
18561 /* For the two element vectors, we always implement VEC_CONCAT. */
18562 op0 = XVECEXP (vals, 0, 0);
18563 op1 = XVECEXP (vals, 0, 1);
18564 use_vec_concat = true;
18565 break;
18566
18567 case V4SFmode:
18568 half_mode = V2SFmode;
18569 goto half;
18570 case V4SImode:
18571 half_mode = V2SImode;
18572 goto half;
18573 half:
18574 {
18575 rtvec v;
18576
18577 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
18578 Recurse to load the two halves. */
18579
18580 op0 = gen_reg_rtx (half_mode);
18581 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
18582 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
18583
18584 op1 = gen_reg_rtx (half_mode);
18585 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
18586 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
18587
18588 use_vec_concat = true;
18589 }
18590 break;
18591
18592 case V8HImode:
18593 case V16QImode:
18594 case V4HImode:
18595 case V8QImode:
18596 break;
18597
18598 default:
18599 gcc_unreachable ();
18600 }
18601
18602 if (use_vec_concat)
18603 {
18604 if (!register_operand (op0, half_mode))
18605 op0 = force_reg (half_mode, op0);
18606 if (!register_operand (op1, half_mode))
18607 op1 = force_reg (half_mode, op1);
18608
18609 emit_insn (gen_rtx_SET (VOIDmode, target,
18610 gen_rtx_VEC_CONCAT (mode, op0, op1)));
18611 }
18612 else
18613 {
18614 int i, j, n_elts, n_words, n_elt_per_word;
18615 enum machine_mode inner_mode;
18616 rtx words[4], shift;
18617
18618 inner_mode = GET_MODE_INNER (mode);
18619 n_elts = GET_MODE_NUNITS (mode);
18620 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
18621 n_elt_per_word = n_elts / n_words;
18622 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
18623
18624 for (i = 0; i < n_words; ++i)
18625 {
18626 rtx word = NULL_RTX;
18627
18628 for (j = 0; j < n_elt_per_word; ++j)
18629 {
18630 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
18631 elt = convert_modes (word_mode, inner_mode, elt, true);
18632
18633 if (j == 0)
18634 word = elt;
18635 else
18636 {
18637 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
18638 word, 1, OPTAB_LIB_WIDEN);
18639 word = expand_simple_binop (word_mode, IOR, word, elt,
18640 word, 1, OPTAB_LIB_WIDEN);
18641 }
18642 }
18643
18644 words[i] = word;
18645 }
18646
18647 if (n_words == 1)
18648 emit_move_insn (target, gen_lowpart (mode, words[0]));
18649 else if (n_words == 2)
18650 {
18651 rtx tmp = gen_reg_rtx (mode);
18652 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
18653 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
18654 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
18655 emit_move_insn (target, tmp);
18656 }
18657 else if (n_words == 4)
18658 {
18659 rtx tmp = gen_reg_rtx (V4SImode);
18660 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
18661 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
18662 emit_move_insn (target, gen_lowpart (mode, tmp));
18663 }
18664 else
18665 gcc_unreachable ();
18666 }
18667 }
18668
18669 /* Initialize vector TARGET via VALS. Suppress the use of MMX
18670 instructions unless MMX_OK is true. */
18671
18672 void
18673 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
18674 {
18675 enum machine_mode mode = GET_MODE (target);
18676 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18677 int n_elts = GET_MODE_NUNITS (mode);
18678 int n_var = 0, one_var = -1;
18679 bool all_same = true, all_const_zero = true;
18680 int i;
18681 rtx x;
18682
18683 for (i = 0; i < n_elts; ++i)
18684 {
18685 x = XVECEXP (vals, 0, i);
18686 if (!CONSTANT_P (x))
18687 n_var++, one_var = i;
18688 else if (x != CONST0_RTX (inner_mode))
18689 all_const_zero = false;
18690 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
18691 all_same = false;
18692 }
18693
18694 /* Constants are best loaded from the constant pool. */
18695 if (n_var == 0)
18696 {
18697 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
18698 return;
18699 }
18700
18701 /* If all values are identical, broadcast the value. */
18702 if (all_same
18703 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
18704 XVECEXP (vals, 0, 0)))
18705 return;
18706
18707 /* Values where only one field is non-constant are best loaded from
18708 the pool and overwritten via move later. */
18709 if (n_var == 1)
18710 {
18711 if (all_const_zero
18712 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
18713 XVECEXP (vals, 0, one_var),
18714 one_var))
18715 return;
18716
18717 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
18718 return;
18719 }
18720
18721 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
18722 }
18723
18724 void
18725 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
18726 {
18727 enum machine_mode mode = GET_MODE (target);
18728 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18729 bool use_vec_merge = false;
18730 rtx tmp;
18731
18732 switch (mode)
18733 {
18734 case V2SFmode:
18735 case V2SImode:
18736 if (mmx_ok)
18737 {
18738 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
18739 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
18740 if (elt == 0)
18741 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
18742 else
18743 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
18744 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18745 return;
18746 }
18747 break;
18748
18749 case V2DFmode:
18750 case V2DImode:
18751 {
18752 rtx op0, op1;
18753
18754 /* For the two element vectors, we implement a VEC_CONCAT with
18755 the extraction of the other element. */
18756
18757 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
18758 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
18759
18760 if (elt == 0)
18761 op0 = val, op1 = tmp;
18762 else
18763 op0 = tmp, op1 = val;
18764
18765 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
18766 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18767 }
18768 return;
18769
18770 case V4SFmode:
18771 switch (elt)
18772 {
18773 case 0:
18774 use_vec_merge = true;
18775 break;
18776
18777 case 1:
18778 /* tmp = target = A B C D */
18779 tmp = copy_to_reg (target);
18780 /* target = A A B B */
18781 emit_insn (gen_sse_unpcklps (target, target, target));
18782 /* target = X A B B */
18783 ix86_expand_vector_set (false, target, val, 0);
18784 /* target = A X C D */
18785 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18786 GEN_INT (1), GEN_INT (0),
18787 GEN_INT (2+4), GEN_INT (3+4)));
18788 return;
18789
18790 case 2:
18791 /* tmp = target = A B C D */
18792 tmp = copy_to_reg (target);
18793 /* tmp = X B C D */
18794 ix86_expand_vector_set (false, tmp, val, 0);
18795 /* target = A B X D */
18796 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18797 GEN_INT (0), GEN_INT (1),
18798 GEN_INT (0+4), GEN_INT (3+4)));
18799 return;
18800
18801 case 3:
18802 /* tmp = target = A B C D */
18803 tmp = copy_to_reg (target);
18804 /* tmp = X B C D */
18805 ix86_expand_vector_set (false, tmp, val, 0);
18806 /* target = A B X D */
18807 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18808 GEN_INT (0), GEN_INT (1),
18809 GEN_INT (2+4), GEN_INT (0+4)));
18810 return;
18811
18812 default:
18813 gcc_unreachable ();
18814 }
18815 break;
18816
18817 case V4SImode:
18818 /* Element 0 handled by vec_merge below. */
18819 if (elt == 0)
18820 {
18821 use_vec_merge = true;
18822 break;
18823 }
18824
18825 if (TARGET_SSE2)
18826 {
18827 /* With SSE2, use integer shuffles to swap element 0 and ELT,
18828 store into element 0, then shuffle them back. */
18829
18830 rtx order[4];
18831
18832 order[0] = GEN_INT (elt);
18833 order[1] = const1_rtx;
18834 order[2] = const2_rtx;
18835 order[3] = GEN_INT (3);
18836 order[elt] = const0_rtx;
18837
18838 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18839 order[1], order[2], order[3]));
18840
18841 ix86_expand_vector_set (false, target, val, 0);
18842
18843 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18844 order[1], order[2], order[3]));
18845 }
18846 else
18847 {
18848 /* For SSE1, we have to reuse the V4SF code. */
18849 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
18850 gen_lowpart (SFmode, val), elt);
18851 }
18852 return;
18853
18854 case V8HImode:
18855 use_vec_merge = TARGET_SSE2;
18856 break;
18857 case V4HImode:
18858 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18859 break;
18860
18861 case V16QImode:
18862 case V8QImode:
18863 default:
18864 break;
18865 }
18866
18867 if (use_vec_merge)
18868 {
18869 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
18870 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
18871 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18872 }
18873 else
18874 {
18875 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
18876
18877 emit_move_insn (mem, target);
18878
18879 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
18880 emit_move_insn (tmp, val);
18881
18882 emit_move_insn (target, mem);
18883 }
18884 }
18885
18886 void
18887 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
18888 {
18889 enum machine_mode mode = GET_MODE (vec);
18890 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18891 bool use_vec_extr = false;
18892 rtx tmp;
18893
18894 switch (mode)
18895 {
18896 case V2SImode:
18897 case V2SFmode:
18898 if (!mmx_ok)
18899 break;
18900 /* FALLTHRU */
18901
18902 case V2DFmode:
18903 case V2DImode:
18904 use_vec_extr = true;
18905 break;
18906
18907 case V4SFmode:
18908 switch (elt)
18909 {
18910 case 0:
18911 tmp = vec;
18912 break;
18913
18914 case 1:
18915 case 3:
18916 tmp = gen_reg_rtx (mode);
18917 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
18918 GEN_INT (elt), GEN_INT (elt),
18919 GEN_INT (elt+4), GEN_INT (elt+4)));
18920 break;
18921
18922 case 2:
18923 tmp = gen_reg_rtx (mode);
18924 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
18925 break;
18926
18927 default:
18928 gcc_unreachable ();
18929 }
18930 vec = tmp;
18931 use_vec_extr = true;
18932 elt = 0;
18933 break;
18934
18935 case V4SImode:
18936 if (TARGET_SSE2)
18937 {
18938 switch (elt)
18939 {
18940 case 0:
18941 tmp = vec;
18942 break;
18943
18944 case 1:
18945 case 3:
18946 tmp = gen_reg_rtx (mode);
18947 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
18948 GEN_INT (elt), GEN_INT (elt),
18949 GEN_INT (elt), GEN_INT (elt)));
18950 break;
18951
18952 case 2:
18953 tmp = gen_reg_rtx (mode);
18954 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
18955 break;
18956
18957 default:
18958 gcc_unreachable ();
18959 }
18960 vec = tmp;
18961 use_vec_extr = true;
18962 elt = 0;
18963 }
18964 else
18965 {
18966 /* For SSE1, we have to reuse the V4SF code. */
18967 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
18968 gen_lowpart (V4SFmode, vec), elt);
18969 return;
18970 }
18971 break;
18972
18973 case V8HImode:
18974 use_vec_extr = TARGET_SSE2;
18975 break;
18976 case V4HImode:
18977 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18978 break;
18979
18980 case V16QImode:
18981 case V8QImode:
18982 /* ??? Could extract the appropriate HImode element and shift. */
18983 default:
18984 break;
18985 }
18986
18987 if (use_vec_extr)
18988 {
18989 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
18990 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
18991
18992 /* Let the rtl optimizers know about the zero extension performed. */
18993 if (inner_mode == HImode)
18994 {
18995 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
18996 target = gen_lowpart (SImode, target);
18997 }
18998
18999 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19000 }
19001 else
19002 {
19003 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
19004
19005 emit_move_insn (mem, vec);
19006
19007 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
19008 emit_move_insn (target, tmp);
19009 }
19010 }
19011
19012 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
19013 pattern to reduce; DEST is the destination; IN is the input vector. */
19014
19015 void
19016 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
19017 {
19018 rtx tmp1, tmp2, tmp3;
19019
19020 tmp1 = gen_reg_rtx (V4SFmode);
19021 tmp2 = gen_reg_rtx (V4SFmode);
19022 tmp3 = gen_reg_rtx (V4SFmode);
19023
19024 emit_insn (gen_sse_movhlps (tmp1, in, in));
19025 emit_insn (fn (tmp2, tmp1, in));
19026
19027 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
19028 GEN_INT (1), GEN_INT (1),
19029 GEN_INT (1+4), GEN_INT (1+4)));
19030 emit_insn (fn (dest, tmp2, tmp3));
19031 }
19032 \f
19033 /* Target hook for scalar_mode_supported_p. */
19034 static bool
19035 ix86_scalar_mode_supported_p (enum machine_mode mode)
19036 {
19037 if (DECIMAL_FLOAT_MODE_P (mode))
19038 return true;
19039 else
19040 return default_scalar_mode_supported_p (mode);
19041 }
19042
19043 /* Implements target hook vector_mode_supported_p. */
19044 static bool
19045 ix86_vector_mode_supported_p (enum machine_mode mode)
19046 {
19047 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
19048 return true;
19049 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
19050 return true;
19051 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
19052 return true;
19053 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
19054 return true;
19055 return false;
19056 }
19057
19058 /* Worker function for TARGET_MD_ASM_CLOBBERS.
19059
19060 We do this in the new i386 backend to maintain source compatibility
19061 with the old cc0-based compiler. */
19062
19063 static tree
19064 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
19065 tree inputs ATTRIBUTE_UNUSED,
19066 tree clobbers)
19067 {
19068 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
19069 clobbers);
19070 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
19071 clobbers);
19072 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
19073 clobbers);
19074 return clobbers;
19075 }
19076
19077 /* Return true if this goes in small data/bss. */
19078
19079 static bool
19080 ix86_in_large_data_p (tree exp)
19081 {
19082 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
19083 return false;
19084
19085 /* Functions are never large data. */
19086 if (TREE_CODE (exp) == FUNCTION_DECL)
19087 return false;
19088
19089 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
19090 {
19091 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
19092 if (strcmp (section, ".ldata") == 0
19093 || strcmp (section, ".lbss") == 0)
19094 return true;
19095 return false;
19096 }
19097 else
19098 {
19099 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
19100
19101 /* If this is an incomplete type with size 0, then we can't put it
19102 in data because it might be too big when completed. */
19103 if (!size || size > ix86_section_threshold)
19104 return true;
19105 }
19106
19107 return false;
19108 }
19109 static void
19110 ix86_encode_section_info (tree decl, rtx rtl, int first)
19111 {
19112 default_encode_section_info (decl, rtl, first);
19113
19114 if (TREE_CODE (decl) == VAR_DECL
19115 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
19116 && ix86_in_large_data_p (decl))
19117 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
19118 }
19119
19120 /* Worker function for REVERSE_CONDITION. */
19121
19122 enum rtx_code
19123 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
19124 {
19125 return (mode != CCFPmode && mode != CCFPUmode
19126 ? reverse_condition (code)
19127 : reverse_condition_maybe_unordered (code));
19128 }
19129
19130 /* Output code to perform an x87 FP register move, from OPERANDS[1]
19131 to OPERANDS[0]. */
19132
19133 const char *
19134 output_387_reg_move (rtx insn, rtx *operands)
19135 {
19136 if (REG_P (operands[1])
19137 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
19138 {
19139 if (REGNO (operands[0]) == FIRST_STACK_REG)
19140 return output_387_ffreep (operands, 0);
19141 return "fstp\t%y0";
19142 }
19143 if (STACK_TOP_P (operands[0]))
19144 return "fld%z1\t%y1";
19145 return "fst\t%y0";
19146 }
19147
19148 /* Output code to perform a conditional jump to LABEL, if C2 flag in
19149 FP status register is set. */
19150
19151 void
19152 ix86_emit_fp_unordered_jump (rtx label)
19153 {
19154 rtx reg = gen_reg_rtx (HImode);
19155 rtx temp;
19156
19157 emit_insn (gen_x86_fnstsw_1 (reg));
19158
19159 if (TARGET_USE_SAHF)
19160 {
19161 emit_insn (gen_x86_sahf_1 (reg));
19162
19163 temp = gen_rtx_REG (CCmode, FLAGS_REG);
19164 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
19165 }
19166 else
19167 {
19168 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
19169
19170 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19171 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
19172 }
19173
19174 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
19175 gen_rtx_LABEL_REF (VOIDmode, label),
19176 pc_rtx);
19177 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
19178 emit_jump_insn (temp);
19179 }
19180
19181 /* Output code to perform a log1p XFmode calculation. */
19182
19183 void ix86_emit_i387_log1p (rtx op0, rtx op1)
19184 {
19185 rtx label1 = gen_label_rtx ();
19186 rtx label2 = gen_label_rtx ();
19187
19188 rtx tmp = gen_reg_rtx (XFmode);
19189 rtx tmp2 = gen_reg_rtx (XFmode);
19190
19191 emit_insn (gen_absxf2 (tmp, op1));
19192 emit_insn (gen_cmpxf (tmp,
19193 CONST_DOUBLE_FROM_REAL_VALUE (
19194 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
19195 XFmode)));
19196 emit_jump_insn (gen_bge (label1));
19197
19198 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19199 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
19200 emit_jump (label2);
19201
19202 emit_label (label1);
19203 emit_move_insn (tmp, CONST1_RTX (XFmode));
19204 emit_insn (gen_addxf3 (tmp, op1, tmp));
19205 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19206 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
19207
19208 emit_label (label2);
19209 }
19210
19211 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
19212
19213 static void
19214 i386_solaris_elf_named_section (const char *name, unsigned int flags,
19215 tree decl)
19216 {
19217 /* With Binutils 2.15, the "@unwind" marker must be specified on
19218 every occurrence of the ".eh_frame" section, not just the first
19219 one. */
19220 if (TARGET_64BIT
19221 && strcmp (name, ".eh_frame") == 0)
19222 {
19223 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
19224 flags & SECTION_WRITE ? "aw" : "a");
19225 return;
19226 }
19227 default_elf_asm_named_section (name, flags, decl);
19228 }
19229
19230 /* Return the mangling of TYPE if it is an extended fundamental type. */
19231
19232 static const char *
19233 ix86_mangle_fundamental_type (tree type)
19234 {
19235 switch (TYPE_MODE (type))
19236 {
19237 case TFmode:
19238 /* __float128 is "g". */
19239 return "g";
19240 case XFmode:
19241 /* "long double" or __float80 is "e". */
19242 return "e";
19243 default:
19244 return NULL;
19245 }
19246 }
19247
19248 /* For 32-bit code we can save PIC register setup by using
19249 __stack_chk_fail_local hidden function instead of calling
19250 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
19251 register, so it is better to call __stack_chk_fail directly. */
19252
19253 static tree
19254 ix86_stack_protect_fail (void)
19255 {
19256 return TARGET_64BIT
19257 ? default_external_stack_protect_fail ()
19258 : default_hidden_stack_protect_fail ();
19259 }
19260
19261 /* Select a format to encode pointers in exception handling data. CODE
19262 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
19263 true if the symbol may be affected by dynamic relocations.
19264
19265 ??? All x86 object file formats are capable of representing this.
19266 After all, the relocation needed is the same as for the call insn.
19267 Whether or not a particular assembler allows us to enter such, I
19268 guess we'll have to see. */
19269 int
19270 asm_preferred_eh_data_format (int code, int global)
19271 {
19272 if (flag_pic)
19273 {
19274 int type = DW_EH_PE_sdata8;
19275 if (!TARGET_64BIT
19276 || ix86_cmodel == CM_SMALL_PIC
19277 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
19278 type = DW_EH_PE_sdata4;
19279 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
19280 }
19281 if (ix86_cmodel == CM_SMALL
19282 || (ix86_cmodel == CM_MEDIUM && code))
19283 return DW_EH_PE_udata4;
19284 return DW_EH_PE_absptr;
19285 }
19286 \f
19287 /* Expand copysign from SIGN to the positive value ABS_VALUE
19288 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
19289 the sign-bit. */
19290 static void
19291 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
19292 {
19293 enum machine_mode mode = GET_MODE (sign);
19294 rtx sgn = gen_reg_rtx (mode);
19295 if (mask == NULL_RTX)
19296 {
19297 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
19298 if (!VECTOR_MODE_P (mode))
19299 {
19300 /* We need to generate a scalar mode mask in this case. */
19301 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
19302 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
19303 mask = gen_reg_rtx (mode);
19304 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
19305 }
19306 }
19307 else
19308 mask = gen_rtx_NOT (mode, mask);
19309 emit_insn (gen_rtx_SET (VOIDmode, sgn,
19310 gen_rtx_AND (mode, mask, sign)));
19311 emit_insn (gen_rtx_SET (VOIDmode, result,
19312 gen_rtx_IOR (mode, abs_value, sgn)));
19313 }
19314
19315 /* Expand fabs (OP0) and return a new rtx that holds the result. The
19316 mask for masking out the sign-bit is stored in *SMASK, if that is
19317 non-null. */
19318 static rtx
19319 ix86_expand_sse_fabs (rtx op0, rtx *smask)
19320 {
19321 enum machine_mode mode = GET_MODE (op0);
19322 rtx xa, mask;
19323
19324 xa = gen_reg_rtx (mode);
19325 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
19326 if (!VECTOR_MODE_P (mode))
19327 {
19328 /* We need to generate a scalar mode mask in this case. */
19329 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
19330 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
19331 mask = gen_reg_rtx (mode);
19332 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
19333 }
19334 emit_insn (gen_rtx_SET (VOIDmode, xa,
19335 gen_rtx_AND (mode, op0, mask)));
19336
19337 if (smask)
19338 *smask = mask;
19339
19340 return xa;
19341 }
19342
19343 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
19344 swapping the operands if SWAP_OPERANDS is true. The expanded
19345 code is a forward jump to a newly created label in case the
19346 comparison is true. The generated label rtx is returned. */
19347 static rtx
19348 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
19349 bool swap_operands)
19350 {
19351 rtx label, tmp;
19352
19353 if (swap_operands)
19354 {
19355 tmp = op0;
19356 op0 = op1;
19357 op1 = tmp;
19358 }
19359
19360 label = gen_label_rtx ();
19361 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
19362 emit_insn (gen_rtx_SET (VOIDmode, tmp,
19363 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
19364 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
19365 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19366 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
19367 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19368 JUMP_LABEL (tmp) = label;
19369
19370 return label;
19371 }
19372
19373 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
19374 using comparison code CODE. Operands are swapped for the comparison if
19375 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
19376 static rtx
19377 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
19378 bool swap_operands)
19379 {
19380 enum machine_mode mode = GET_MODE (op0);
19381 rtx mask = gen_reg_rtx (mode);
19382
19383 if (swap_operands)
19384 {
19385 rtx tmp = op0;
19386 op0 = op1;
19387 op1 = tmp;
19388 }
19389
19390 if (mode == DFmode)
19391 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
19392 gen_rtx_fmt_ee (code, mode, op0, op1)));
19393 else
19394 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
19395 gen_rtx_fmt_ee (code, mode, op0, op1)));
19396
19397 return mask;
19398 }
19399
19400 /* Generate and return a rtx of mode MODE for 2**n where n is the number
19401 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
19402 static rtx
19403 ix86_gen_TWO52 (enum machine_mode mode)
19404 {
19405 REAL_VALUE_TYPE TWO52r;
19406 rtx TWO52;
19407
19408 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
19409 TWO52 = const_double_from_real_value (TWO52r, mode);
19410 TWO52 = force_reg (mode, TWO52);
19411
19412 return TWO52;
19413 }
19414
19415 /* Expand SSE sequence for computing lround from OP1 storing
19416 into OP0. */
19417 void
19418 ix86_expand_lround (rtx op0, rtx op1)
19419 {
19420 /* C code for the stuff we're doing below:
19421 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
19422 return (long)tmp;
19423 */
19424 enum machine_mode mode = GET_MODE (op1);
19425 const struct real_format *fmt;
19426 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
19427 rtx adj;
19428
19429 /* load nextafter (0.5, 0.0) */
19430 fmt = REAL_MODE_FORMAT (mode);
19431 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
19432 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
19433
19434 /* adj = copysign (0.5, op1) */
19435 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
19436 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
19437
19438 /* adj = op1 + adj */
19439 expand_simple_binop (mode, PLUS, adj, op1, adj, 0, OPTAB_DIRECT);
19440
19441 /* op0 = (imode)adj */
19442 expand_fix (op0, adj, 0);
19443 }
19444
19445 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
19446 into OPERAND0. */
19447 void
19448 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
19449 {
19450 /* C code for the stuff we're doing below (for do_floor):
19451 xi = (long)op1;
19452 xi -= (double)xi > op1 ? 1 : 0;
19453 return xi;
19454 */
19455 enum machine_mode fmode = GET_MODE (op1);
19456 enum machine_mode imode = GET_MODE (op0);
19457 rtx ireg, freg, label;
19458
19459 /* reg = (long)op1 */
19460 ireg = gen_reg_rtx (imode);
19461 expand_fix (ireg, op1, 0);
19462
19463 /* freg = (double)reg */
19464 freg = gen_reg_rtx (fmode);
19465 expand_float (freg, ireg, 0);
19466
19467 /* ireg = (freg > op1) ? ireg - 1 : ireg */
19468 label = ix86_expand_sse_compare_and_jump (UNLE,
19469 freg, op1, !do_floor);
19470 expand_simple_binop (imode, do_floor ? MINUS : PLUS,
19471 ireg, const1_rtx, ireg, 0, OPTAB_DIRECT);
19472 emit_label (label);
19473 LABEL_NUSES (label) = 1;
19474
19475 emit_move_insn (op0, ireg);
19476 }
19477
19478 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
19479 result in OPERAND0. */
19480 void
19481 ix86_expand_rint (rtx operand0, rtx operand1)
19482 {
19483 /* C code for the stuff we're doing below:
19484 xa = fabs (operand1);
19485 if (!isless (xa, 2**52))
19486 return operand1;
19487 xa = xa + 2**52 - 2**52;
19488 return copysign (xa, operand1);
19489 */
19490 enum machine_mode mode = GET_MODE (operand0);
19491 rtx res, xa, label, TWO52, mask;
19492
19493 res = gen_reg_rtx (mode);
19494 emit_move_insn (res, operand1);
19495
19496 /* xa = abs (operand1) */
19497 xa = ix86_expand_sse_fabs (res, &mask);
19498
19499 /* if (!isless (xa, TWO52)) goto label; */
19500 TWO52 = ix86_gen_TWO52 (mode);
19501 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19502
19503 expand_simple_binop (mode, PLUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
19504 expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
19505
19506 ix86_sse_copysign_to_positive (res, xa, res, mask);
19507
19508 emit_label (label);
19509 LABEL_NUSES (label) = 1;
19510
19511 emit_move_insn (operand0, res);
19512 }
19513
19514 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
19515 into OPERAND0. */
19516 void
19517 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
19518 {
19519 /* C code for the stuff we expand below.
19520 double xa = fabs (x), x2;
19521 if (!isless (xa, TWO52))
19522 return x;
19523 xa = xa + TWO52 - TWO52;
19524 x2 = copysign (xa, x);
19525 Compensate. Floor:
19526 if (x2 > x)
19527 x2 -= 1;
19528 Compensate. Ceil:
19529 if (x2 < x)
19530 x2 -= -1;
19531 return x2;
19532 */
19533 enum machine_mode mode = GET_MODE (operand0);
19534 rtx xa, TWO52, tmp, label, one, res, mask;
19535
19536 TWO52 = ix86_gen_TWO52 (mode);
19537
19538 /* Temporary for holding the result, initialized to the input
19539 operand to ease control flow. */
19540 res = gen_reg_rtx (mode);
19541 emit_move_insn (res, operand1);
19542
19543 /* xa = abs (operand1) */
19544 xa = ix86_expand_sse_fabs (res, &mask);
19545
19546 /* if (!isless (xa, TWO52)) goto label; */
19547 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19548
19549 /* xa = xa + TWO52 - TWO52; */
19550 expand_simple_binop (mode, PLUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
19551 expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
19552
19553 /* xa = copysign (xa, operand1) */
19554 ix86_sse_copysign_to_positive (xa, xa, res, mask);
19555
19556 /* generate 1.0 or -1.0 */
19557 one = force_reg (mode,
19558 const_double_from_real_value (do_floor
19559 ? dconst1 : dconstm1, mode));
19560
19561 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
19562 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
19563 emit_insn (gen_rtx_SET (VOIDmode, tmp,
19564 gen_rtx_AND (mode, one, tmp)));
19565 /* We always need to subtract here to preserve signed zero. */
19566 expand_simple_binop (mode, MINUS,
19567 xa, tmp, res, 0, OPTAB_DIRECT);
19568
19569 emit_label (label);
19570 LABEL_NUSES (label) = 1;
19571
19572 emit_move_insn (operand0, res);
19573 }
19574
19575 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
19576 into OPERAND0. */
19577 void
19578 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
19579 {
19580 /* C code for the stuff we expand below.
19581 double xa = fabs (x), x2;
19582 if (!isless (xa, TWO52))
19583 return x;
19584 x2 = (double)(long)x;
19585 Compensate. Floor:
19586 if (x2 > x)
19587 x2 -= 1;
19588 Compensate. Ceil:
19589 if (x2 < x)
19590 x2 += 1;
19591 if (HONOR_SIGNED_ZEROS (mode))
19592 return copysign (x2, x);
19593 return x2;
19594 */
19595 enum machine_mode mode = GET_MODE (operand0);
19596 rtx xa, xi, TWO52, tmp, label, one, res, mask;
19597
19598 TWO52 = ix86_gen_TWO52 (mode);
19599
19600 /* Temporary for holding the result, initialized to the input
19601 operand to ease control flow. */
19602 res = gen_reg_rtx (mode);
19603 emit_move_insn (res, operand1);
19604
19605 /* xa = abs (operand1) */
19606 xa = ix86_expand_sse_fabs (res, &mask);
19607
19608 /* if (!isless (xa, TWO52)) goto label; */
19609 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19610
19611 /* xa = (double)(long)x */
19612 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
19613 expand_fix (xi, res, 0);
19614 expand_float (xa, xi, 0);
19615
19616 /* generate 1.0 */
19617 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
19618
19619 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
19620 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
19621 emit_insn (gen_rtx_SET (VOIDmode, tmp,
19622 gen_rtx_AND (mode, one, tmp)));
19623 expand_simple_binop (mode, do_floor ? MINUS : PLUS,
19624 xa, tmp, res, 0, OPTAB_DIRECT);
19625
19626 if (HONOR_SIGNED_ZEROS (mode))
19627 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
19628
19629 emit_label (label);
19630 LABEL_NUSES (label) = 1;
19631
19632 emit_move_insn (operand0, res);
19633 }
19634
19635 /* Expand SSE sequence for computing round from OPERAND1 storing
19636 into OPERAND0. Sequence that works without relying on DImode truncation
19637 via cvttsd2siq that is only available on 64bit targets. */
19638 void
19639 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
19640 {
19641 /* C code for the stuff we expand below.
19642 double xa = fabs (x), xa2, x2;
19643 if (!isless (xa, TWO52))
19644 return x;
19645 Using the absolute value and copying back sign makes
19646 -0.0 -> -0.0 correct.
19647 xa2 = xa + TWO52 - TWO52;
19648 Compensate.
19649 dxa = xa2 - xa;
19650 if (dxa <= -0.5)
19651 xa2 += 1;
19652 else if (dxa > 0.5)
19653 xa2 -= 1;
19654 x2 = copysign (xa2, x);
19655 return x2;
19656 */
19657 enum machine_mode mode = GET_MODE (operand0);
19658 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
19659
19660 TWO52 = ix86_gen_TWO52 (mode);
19661
19662 /* Temporary for holding the result, initialized to the input
19663 operand to ease control flow. */
19664 res = gen_reg_rtx (mode);
19665 emit_move_insn (res, operand1);
19666
19667 /* xa = abs (operand1) */
19668 xa = ix86_expand_sse_fabs (res, &mask);
19669
19670 /* if (!isless (xa, TWO52)) goto label; */
19671 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19672
19673 /* xa2 = xa + TWO52 - TWO52; */
19674 xa2 = gen_reg_rtx (mode);
19675 expand_simple_binop (mode, PLUS, xa, TWO52, xa2, 0, OPTAB_DIRECT);
19676 expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
19677
19678 /* dxa = xa2 - xa; */
19679 dxa = gen_reg_rtx (mode);
19680 expand_simple_binop (mode, MINUS, xa2, xa, dxa, 0, OPTAB_DIRECT);
19681
19682 /* generate 0.5, 1.0 and -0.5 */
19683 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
19684 one = gen_reg_rtx (mode);
19685 expand_simple_binop (mode, PLUS, half, half, one, 0, OPTAB_DIRECT);
19686 mhalf = gen_reg_rtx (mode);
19687 expand_simple_binop (mode, MINUS, half, one, mhalf, 0, OPTAB_DIRECT);
19688
19689 /* Compensate. */
19690 tmp = gen_reg_rtx (mode);
19691 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
19692 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
19693 emit_insn (gen_rtx_SET (VOIDmode, tmp,
19694 gen_rtx_AND (mode, one, tmp)));
19695 expand_simple_binop (mode, MINUS, xa2, tmp, xa2, 0, OPTAB_DIRECT);
19696 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
19697 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
19698 emit_insn (gen_rtx_SET (VOIDmode, tmp,
19699 gen_rtx_AND (mode, one, tmp)));
19700 expand_simple_binop (mode, PLUS, xa2, tmp, xa2, 0, OPTAB_DIRECT);
19701
19702 /* res = copysign (xa2, operand1) */
19703 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
19704
19705 emit_label (label);
19706 LABEL_NUSES (label) = 1;
19707
19708 emit_move_insn (operand0, res);
19709 }
19710
19711 /* Expand SSE sequence for computing trunc from OPERAND1 storing
19712 into OPERAND0. */
19713 void
19714 ix86_expand_trunc (rtx operand0, rtx operand1)
19715 {
19716 /* C code for SSE variant we expand below.
19717 double xa = fabs (x), x2;
19718 if (!isless (xa, TWO52))
19719 return x;
19720 x2 = (double)(long)x;
19721 if (HONOR_SIGNED_ZEROS (mode))
19722 return copysign (x2, x);
19723 return x2;
19724 */
19725 enum machine_mode mode = GET_MODE (operand0);
19726 rtx xa, xi, TWO52, label, res, mask;
19727
19728 TWO52 = ix86_gen_TWO52 (mode);
19729
19730 /* Temporary for holding the result, initialized to the input
19731 operand to ease control flow. */
19732 res = gen_reg_rtx (mode);
19733 emit_move_insn (res, operand1);
19734
19735 /* xa = abs (operand1) */
19736 xa = ix86_expand_sse_fabs (res, &mask);
19737
19738 /* if (!isless (xa, TWO52)) goto label; */
19739 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19740
19741 /* x = (double)(long)x */
19742 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
19743 expand_fix (xi, res, 0);
19744 expand_float (res, xi, 0);
19745
19746 if (HONOR_SIGNED_ZEROS (mode))
19747 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
19748
19749 emit_label (label);
19750 LABEL_NUSES (label) = 1;
19751
19752 emit_move_insn (operand0, res);
19753 }
19754
19755 /* Expand SSE sequence for computing trunc from OPERAND1 storing
19756 into OPERAND0. */
19757 void
19758 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
19759 {
19760 enum machine_mode mode = GET_MODE (operand0);
19761 rtx xa, mask, TWO52, label, one, res, smask;
19762
19763 /* C code for SSE variant we expand below.
19764 double xa = fabs (x), x2;
19765 if (!isless (xa, TWO52))
19766 return x;
19767 xa2 = xa + TWO52 - TWO52;
19768 Compensate:
19769 if (xa2 > xa)
19770 xa2 -= 1.0;
19771 x2 = copysign (xa2, x);
19772 return x2;
19773 */
19774
19775 TWO52 = ix86_gen_TWO52 (mode);
19776
19777 /* Temporary for holding the result, initialized to the input
19778 operand to ease control flow. */
19779 res = gen_reg_rtx (mode);
19780 emit_move_insn (res, operand1);
19781
19782 /* xa = abs (operand1) */
19783 xa = ix86_expand_sse_fabs (res, &smask);
19784
19785 /* if (!isless (xa, TWO52)) goto label; */
19786 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19787
19788 /* res = xa + TWO52 - TWO52; */
19789 expand_simple_binop (mode, PLUS, xa, TWO52, res, 0, OPTAB_DIRECT);
19790 expand_simple_binop (mode, MINUS, res, TWO52, res, 0, OPTAB_DIRECT);
19791
19792 /* generate 1.0 */
19793 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
19794
19795 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
19796 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
19797 emit_insn (gen_rtx_SET (VOIDmode, mask,
19798 gen_rtx_AND (mode, mask, one)));
19799 expand_simple_binop (mode, MINUS,
19800 res, mask, res, 0, OPTAB_DIRECT);
19801
19802 /* res = copysign (res, operand1) */
19803 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
19804
19805 emit_label (label);
19806 LABEL_NUSES (label) = 1;
19807
19808 emit_move_insn (operand0, res);
19809 }
19810
19811 /* Expand SSE sequence for computing round from OPERAND1 storing
19812 into OPERAND0. */
19813 void
19814 ix86_expand_round (rtx operand0, rtx operand1)
19815 {
19816 /* C code for the stuff we're doing below:
19817 double xa = fabs (x);
19818 if (!isless (xa, TWO52))
19819 return x;
19820 xa = (double)(long)(xa + nextafter (0.5, 0.0));
19821 return copysign (xa, x);
19822 */
19823 enum machine_mode mode = GET_MODE (operand0);
19824 rtx res, TWO52, xa, label, xi, half, mask;
19825 const struct real_format *fmt;
19826 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
19827
19828 /* Temporary for holding the result, initialized to the input
19829 operand to ease control flow. */
19830 res = gen_reg_rtx (mode);
19831 emit_move_insn (res, operand1);
19832
19833 TWO52 = ix86_gen_TWO52 (mode);
19834 xa = ix86_expand_sse_fabs (res, &mask);
19835 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19836
19837 /* load nextafter (0.5, 0.0) */
19838 fmt = REAL_MODE_FORMAT (mode);
19839 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
19840 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
19841
19842 /* xa = xa + 0.5 */
19843 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
19844 expand_simple_binop (mode, PLUS, xa, half, xa, 0, OPTAB_DIRECT);
19845
19846 /* xa = (double)(int64_t)xa */
19847 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
19848 expand_fix (xi, xa, 0);
19849 expand_float (xa, xi, 0);
19850
19851 /* res = copysign (xa, operand1) */
19852 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
19853
19854 emit_label (label);
19855 LABEL_NUSES (label) = 1;
19856
19857 emit_move_insn (operand0, res);
19858 }
19859
19860 #include "gt-i386.h"