i386-protos.h (ix86_expand_floorceil): Declare.
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51 #include "dwarf2.h"
52 #include "tm-constrs.h"
53
54 #ifndef CHECK_STACK_LIMIT
55 #define CHECK_STACK_LIMIT (-1)
56 #endif
57
58 /* Return index of given mode in mult and division cost tables. */
59 #define MODE_INDEX(mode) \
60 ((mode) == QImode ? 0 \
61 : (mode) == HImode ? 1 \
62 : (mode) == SImode ? 2 \
63 : (mode) == DImode ? 3 \
64 : 4)
65
66 /* Processor costs (relative to an add) */
67 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
68 #define COSTS_N_BYTES(N) ((N) * 2)
69
70 static const
71 struct processor_costs size_cost = { /* costs for tuning for size */
72 COSTS_N_BYTES (2), /* cost of an add instruction */
73 COSTS_N_BYTES (3), /* cost of a lea instruction */
74 COSTS_N_BYTES (2), /* variable shift costs */
75 COSTS_N_BYTES (3), /* constant shift costs */
76 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
77 COSTS_N_BYTES (3), /* HI */
78 COSTS_N_BYTES (3), /* SI */
79 COSTS_N_BYTES (3), /* DI */
80 COSTS_N_BYTES (5)}, /* other */
81 0, /* cost of multiply per each bit set */
82 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
83 COSTS_N_BYTES (3), /* HI */
84 COSTS_N_BYTES (3), /* SI */
85 COSTS_N_BYTES (3), /* DI */
86 COSTS_N_BYTES (5)}, /* other */
87 COSTS_N_BYTES (3), /* cost of movsx */
88 COSTS_N_BYTES (3), /* cost of movzx */
89 0, /* "large" insn */
90 2, /* MOVE_RATIO */
91 2, /* cost for loading QImode using movzbl */
92 {2, 2, 2}, /* cost of loading integer registers
93 in QImode, HImode and SImode.
94 Relative to reg-reg move (2). */
95 {2, 2, 2}, /* cost of storing integer registers */
96 2, /* cost of reg,reg fld/fst */
97 {2, 2, 2}, /* cost of loading fp registers
98 in SFmode, DFmode and XFmode */
99 {2, 2, 2}, /* cost of storing fp registers
100 in SFmode, DFmode and XFmode */
101 3, /* cost of moving MMX register */
102 {3, 3}, /* cost of loading MMX registers
103 in SImode and DImode */
104 {3, 3}, /* cost of storing MMX registers
105 in SImode and DImode */
106 3, /* cost of moving SSE register */
107 {3, 3, 3}, /* cost of loading SSE registers
108 in SImode, DImode and TImode */
109 {3, 3, 3}, /* cost of storing SSE registers
110 in SImode, DImode and TImode */
111 3, /* MMX or SSE register to integer */
112 0, /* size of prefetch block */
113 0, /* number of parallel prefetches */
114 2, /* Branch cost */
115 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
116 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
117 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
118 COSTS_N_BYTES (2), /* cost of FABS instruction. */
119 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
120 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
121 };
122
123 /* Processor costs (relative to an add) */
124 static const
125 struct processor_costs i386_cost = { /* 386 specific costs */
126 COSTS_N_INSNS (1), /* cost of an add instruction */
127 COSTS_N_INSNS (1), /* cost of a lea instruction */
128 COSTS_N_INSNS (3), /* variable shift costs */
129 COSTS_N_INSNS (2), /* constant shift costs */
130 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
131 COSTS_N_INSNS (6), /* HI */
132 COSTS_N_INSNS (6), /* SI */
133 COSTS_N_INSNS (6), /* DI */
134 COSTS_N_INSNS (6)}, /* other */
135 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
136 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
137 COSTS_N_INSNS (23), /* HI */
138 COSTS_N_INSNS (23), /* SI */
139 COSTS_N_INSNS (23), /* DI */
140 COSTS_N_INSNS (23)}, /* other */
141 COSTS_N_INSNS (3), /* cost of movsx */
142 COSTS_N_INSNS (2), /* cost of movzx */
143 15, /* "large" insn */
144 3, /* MOVE_RATIO */
145 4, /* cost for loading QImode using movzbl */
146 {2, 4, 2}, /* cost of loading integer registers
147 in QImode, HImode and SImode.
148 Relative to reg-reg move (2). */
149 {2, 4, 2}, /* cost of storing integer registers */
150 2, /* cost of reg,reg fld/fst */
151 {8, 8, 8}, /* cost of loading fp registers
152 in SFmode, DFmode and XFmode */
153 {8, 8, 8}, /* cost of storing fp registers
154 in SFmode, DFmode and XFmode */
155 2, /* cost of moving MMX register */
156 {4, 8}, /* cost of loading MMX registers
157 in SImode and DImode */
158 {4, 8}, /* cost of storing MMX registers
159 in SImode and DImode */
160 2, /* cost of moving SSE register */
161 {4, 8, 16}, /* cost of loading SSE registers
162 in SImode, DImode and TImode */
163 {4, 8, 16}, /* cost of storing SSE registers
164 in SImode, DImode and TImode */
165 3, /* MMX or SSE register to integer */
166 0, /* size of prefetch block */
167 0, /* number of parallel prefetches */
168 1, /* Branch cost */
169 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
170 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
171 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
172 COSTS_N_INSNS (22), /* cost of FABS instruction. */
173 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
174 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
175 };
176
177 static const
178 struct processor_costs i486_cost = { /* 486 specific costs */
179 COSTS_N_INSNS (1), /* cost of an add instruction */
180 COSTS_N_INSNS (1), /* cost of a lea instruction */
181 COSTS_N_INSNS (3), /* variable shift costs */
182 COSTS_N_INSNS (2), /* constant shift costs */
183 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
184 COSTS_N_INSNS (12), /* HI */
185 COSTS_N_INSNS (12), /* SI */
186 COSTS_N_INSNS (12), /* DI */
187 COSTS_N_INSNS (12)}, /* other */
188 1, /* cost of multiply per each bit set */
189 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
190 COSTS_N_INSNS (40), /* HI */
191 COSTS_N_INSNS (40), /* SI */
192 COSTS_N_INSNS (40), /* DI */
193 COSTS_N_INSNS (40)}, /* other */
194 COSTS_N_INSNS (3), /* cost of movsx */
195 COSTS_N_INSNS (2), /* cost of movzx */
196 15, /* "large" insn */
197 3, /* MOVE_RATIO */
198 4, /* cost for loading QImode using movzbl */
199 {2, 4, 2}, /* cost of loading integer registers
200 in QImode, HImode and SImode.
201 Relative to reg-reg move (2). */
202 {2, 4, 2}, /* cost of storing integer registers */
203 2, /* cost of reg,reg fld/fst */
204 {8, 8, 8}, /* cost of loading fp registers
205 in SFmode, DFmode and XFmode */
206 {8, 8, 8}, /* cost of storing fp registers
207 in SFmode, DFmode and XFmode */
208 2, /* cost of moving MMX register */
209 {4, 8}, /* cost of loading MMX registers
210 in SImode and DImode */
211 {4, 8}, /* cost of storing MMX registers
212 in SImode and DImode */
213 2, /* cost of moving SSE register */
214 {4, 8, 16}, /* cost of loading SSE registers
215 in SImode, DImode and TImode */
216 {4, 8, 16}, /* cost of storing SSE registers
217 in SImode, DImode and TImode */
218 3, /* MMX or SSE register to integer */
219 0, /* size of prefetch block */
220 0, /* number of parallel prefetches */
221 1, /* Branch cost */
222 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
223 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
224 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
225 COSTS_N_INSNS (3), /* cost of FABS instruction. */
226 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
227 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
228 };
229
230 static const
231 struct processor_costs pentium_cost = {
232 COSTS_N_INSNS (1), /* cost of an add instruction */
233 COSTS_N_INSNS (1), /* cost of a lea instruction */
234 COSTS_N_INSNS (4), /* variable shift costs */
235 COSTS_N_INSNS (1), /* constant shift costs */
236 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
237 COSTS_N_INSNS (11), /* HI */
238 COSTS_N_INSNS (11), /* SI */
239 COSTS_N_INSNS (11), /* DI */
240 COSTS_N_INSNS (11)}, /* other */
241 0, /* cost of multiply per each bit set */
242 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
243 COSTS_N_INSNS (25), /* HI */
244 COSTS_N_INSNS (25), /* SI */
245 COSTS_N_INSNS (25), /* DI */
246 COSTS_N_INSNS (25)}, /* other */
247 COSTS_N_INSNS (3), /* cost of movsx */
248 COSTS_N_INSNS (2), /* cost of movzx */
249 8, /* "large" insn */
250 6, /* MOVE_RATIO */
251 6, /* cost for loading QImode using movzbl */
252 {2, 4, 2}, /* cost of loading integer registers
253 in QImode, HImode and SImode.
254 Relative to reg-reg move (2). */
255 {2, 4, 2}, /* cost of storing integer registers */
256 2, /* cost of reg,reg fld/fst */
257 {2, 2, 6}, /* cost of loading fp registers
258 in SFmode, DFmode and XFmode */
259 {4, 4, 6}, /* cost of storing fp registers
260 in SFmode, DFmode and XFmode */
261 8, /* cost of moving MMX register */
262 {8, 8}, /* cost of loading MMX registers
263 in SImode and DImode */
264 {8, 8}, /* cost of storing MMX registers
265 in SImode and DImode */
266 2, /* cost of moving SSE register */
267 {4, 8, 16}, /* cost of loading SSE registers
268 in SImode, DImode and TImode */
269 {4, 8, 16}, /* cost of storing SSE registers
270 in SImode, DImode and TImode */
271 3, /* MMX or SSE register to integer */
272 0, /* size of prefetch block */
273 0, /* number of parallel prefetches */
274 2, /* Branch cost */
275 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
276 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
277 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
278 COSTS_N_INSNS (1), /* cost of FABS instruction. */
279 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
280 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
281 };
282
283 static const
284 struct processor_costs pentiumpro_cost = {
285 COSTS_N_INSNS (1), /* cost of an add instruction */
286 COSTS_N_INSNS (1), /* cost of a lea instruction */
287 COSTS_N_INSNS (1), /* variable shift costs */
288 COSTS_N_INSNS (1), /* constant shift costs */
289 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
290 COSTS_N_INSNS (4), /* HI */
291 COSTS_N_INSNS (4), /* SI */
292 COSTS_N_INSNS (4), /* DI */
293 COSTS_N_INSNS (4)}, /* other */
294 0, /* cost of multiply per each bit set */
295 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
296 COSTS_N_INSNS (17), /* HI */
297 COSTS_N_INSNS (17), /* SI */
298 COSTS_N_INSNS (17), /* DI */
299 COSTS_N_INSNS (17)}, /* other */
300 COSTS_N_INSNS (1), /* cost of movsx */
301 COSTS_N_INSNS (1), /* cost of movzx */
302 8, /* "large" insn */
303 6, /* MOVE_RATIO */
304 2, /* cost for loading QImode using movzbl */
305 {4, 4, 4}, /* cost of loading integer registers
306 in QImode, HImode and SImode.
307 Relative to reg-reg move (2). */
308 {2, 2, 2}, /* cost of storing integer registers */
309 2, /* cost of reg,reg fld/fst */
310 {2, 2, 6}, /* cost of loading fp registers
311 in SFmode, DFmode and XFmode */
312 {4, 4, 6}, /* cost of storing fp registers
313 in SFmode, DFmode and XFmode */
314 2, /* cost of moving MMX register */
315 {2, 2}, /* cost of loading MMX registers
316 in SImode and DImode */
317 {2, 2}, /* cost of storing MMX registers
318 in SImode and DImode */
319 2, /* cost of moving SSE register */
320 {2, 2, 8}, /* cost of loading SSE registers
321 in SImode, DImode and TImode */
322 {2, 2, 8}, /* cost of storing SSE registers
323 in SImode, DImode and TImode */
324 3, /* MMX or SSE register to integer */
325 32, /* size of prefetch block */
326 6, /* number of parallel prefetches */
327 2, /* Branch cost */
328 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
329 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
330 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
331 COSTS_N_INSNS (2), /* cost of FABS instruction. */
332 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
333 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
334 };
335
336 static const
337 struct processor_costs geode_cost = {
338 COSTS_N_INSNS (1), /* cost of an add instruction */
339 COSTS_N_INSNS (1), /* cost of a lea instruction */
340 COSTS_N_INSNS (2), /* variable shift costs */
341 COSTS_N_INSNS (1), /* constant shift costs */
342 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
343 COSTS_N_INSNS (4), /* HI */
344 COSTS_N_INSNS (7), /* SI */
345 COSTS_N_INSNS (7), /* DI */
346 COSTS_N_INSNS (7)}, /* other */
347 0, /* cost of multiply per each bit set */
348 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
349 COSTS_N_INSNS (23), /* HI */
350 COSTS_N_INSNS (39), /* SI */
351 COSTS_N_INSNS (39), /* DI */
352 COSTS_N_INSNS (39)}, /* other */
353 COSTS_N_INSNS (1), /* cost of movsx */
354 COSTS_N_INSNS (1), /* cost of movzx */
355 8, /* "large" insn */
356 4, /* MOVE_RATIO */
357 1, /* cost for loading QImode using movzbl */
358 {1, 1, 1}, /* cost of loading integer registers
359 in QImode, HImode and SImode.
360 Relative to reg-reg move (2). */
361 {1, 1, 1}, /* cost of storing integer registers */
362 1, /* cost of reg,reg fld/fst */
363 {1, 1, 1}, /* cost of loading fp registers
364 in SFmode, DFmode and XFmode */
365 {4, 6, 6}, /* cost of storing fp registers
366 in SFmode, DFmode and XFmode */
367
368 1, /* cost of moving MMX register */
369 {1, 1}, /* cost of loading MMX registers
370 in SImode and DImode */
371 {1, 1}, /* cost of storing MMX registers
372 in SImode and DImode */
373 1, /* cost of moving SSE register */
374 {1, 1, 1}, /* cost of loading SSE registers
375 in SImode, DImode and TImode */
376 {1, 1, 1}, /* cost of storing SSE registers
377 in SImode, DImode and TImode */
378 1, /* MMX or SSE register to integer */
379 32, /* size of prefetch block */
380 1, /* number of parallel prefetches */
381 1, /* Branch cost */
382 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
383 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
384 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
385 COSTS_N_INSNS (1), /* cost of FABS instruction. */
386 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
387 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
388 };
389
390 static const
391 struct processor_costs k6_cost = {
392 COSTS_N_INSNS (1), /* cost of an add instruction */
393 COSTS_N_INSNS (2), /* cost of a lea instruction */
394 COSTS_N_INSNS (1), /* variable shift costs */
395 COSTS_N_INSNS (1), /* constant shift costs */
396 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
397 COSTS_N_INSNS (3), /* HI */
398 COSTS_N_INSNS (3), /* SI */
399 COSTS_N_INSNS (3), /* DI */
400 COSTS_N_INSNS (3)}, /* other */
401 0, /* cost of multiply per each bit set */
402 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
403 COSTS_N_INSNS (18), /* HI */
404 COSTS_N_INSNS (18), /* SI */
405 COSTS_N_INSNS (18), /* DI */
406 COSTS_N_INSNS (18)}, /* other */
407 COSTS_N_INSNS (2), /* cost of movsx */
408 COSTS_N_INSNS (2), /* cost of movzx */
409 8, /* "large" insn */
410 4, /* MOVE_RATIO */
411 3, /* cost for loading QImode using movzbl */
412 {4, 5, 4}, /* cost of loading integer registers
413 in QImode, HImode and SImode.
414 Relative to reg-reg move (2). */
415 {2, 3, 2}, /* cost of storing integer registers */
416 4, /* cost of reg,reg fld/fst */
417 {6, 6, 6}, /* cost of loading fp registers
418 in SFmode, DFmode and XFmode */
419 {4, 4, 4}, /* cost of storing fp registers
420 in SFmode, DFmode and XFmode */
421 2, /* cost of moving MMX register */
422 {2, 2}, /* cost of loading MMX registers
423 in SImode and DImode */
424 {2, 2}, /* cost of storing MMX registers
425 in SImode and DImode */
426 2, /* cost of moving SSE register */
427 {2, 2, 8}, /* cost of loading SSE registers
428 in SImode, DImode and TImode */
429 {2, 2, 8}, /* cost of storing SSE registers
430 in SImode, DImode and TImode */
431 6, /* MMX or SSE register to integer */
432 32, /* size of prefetch block */
433 1, /* number of parallel prefetches */
434 1, /* Branch cost */
435 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
436 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
437 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
438 COSTS_N_INSNS (2), /* cost of FABS instruction. */
439 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
440 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
441 };
442
443 static const
444 struct processor_costs athlon_cost = {
445 COSTS_N_INSNS (1), /* cost of an add instruction */
446 COSTS_N_INSNS (2), /* cost of a lea instruction */
447 COSTS_N_INSNS (1), /* variable shift costs */
448 COSTS_N_INSNS (1), /* constant shift costs */
449 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
450 COSTS_N_INSNS (5), /* HI */
451 COSTS_N_INSNS (5), /* SI */
452 COSTS_N_INSNS (5), /* DI */
453 COSTS_N_INSNS (5)}, /* other */
454 0, /* cost of multiply per each bit set */
455 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
456 COSTS_N_INSNS (26), /* HI */
457 COSTS_N_INSNS (42), /* SI */
458 COSTS_N_INSNS (74), /* DI */
459 COSTS_N_INSNS (74)}, /* other */
460 COSTS_N_INSNS (1), /* cost of movsx */
461 COSTS_N_INSNS (1), /* cost of movzx */
462 8, /* "large" insn */
463 9, /* MOVE_RATIO */
464 4, /* cost for loading QImode using movzbl */
465 {3, 4, 3}, /* cost of loading integer registers
466 in QImode, HImode and SImode.
467 Relative to reg-reg move (2). */
468 {3, 4, 3}, /* cost of storing integer registers */
469 4, /* cost of reg,reg fld/fst */
470 {4, 4, 12}, /* cost of loading fp registers
471 in SFmode, DFmode and XFmode */
472 {6, 6, 8}, /* cost of storing fp registers
473 in SFmode, DFmode and XFmode */
474 2, /* cost of moving MMX register */
475 {4, 4}, /* cost of loading MMX registers
476 in SImode and DImode */
477 {4, 4}, /* cost of storing MMX registers
478 in SImode and DImode */
479 2, /* cost of moving SSE register */
480 {4, 4, 6}, /* cost of loading SSE registers
481 in SImode, DImode and TImode */
482 {4, 4, 5}, /* cost of storing SSE registers
483 in SImode, DImode and TImode */
484 5, /* MMX or SSE register to integer */
485 64, /* size of prefetch block */
486 6, /* number of parallel prefetches */
487 5, /* Branch cost */
488 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
489 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
490 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
491 COSTS_N_INSNS (2), /* cost of FABS instruction. */
492 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
493 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
494 };
495
496 static const
497 struct processor_costs k8_cost = {
498 COSTS_N_INSNS (1), /* cost of an add instruction */
499 COSTS_N_INSNS (2), /* cost of a lea instruction */
500 COSTS_N_INSNS (1), /* variable shift costs */
501 COSTS_N_INSNS (1), /* constant shift costs */
502 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
503 COSTS_N_INSNS (4), /* HI */
504 COSTS_N_INSNS (3), /* SI */
505 COSTS_N_INSNS (4), /* DI */
506 COSTS_N_INSNS (5)}, /* other */
507 0, /* cost of multiply per each bit set */
508 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
509 COSTS_N_INSNS (26), /* HI */
510 COSTS_N_INSNS (42), /* SI */
511 COSTS_N_INSNS (74), /* DI */
512 COSTS_N_INSNS (74)}, /* other */
513 COSTS_N_INSNS (1), /* cost of movsx */
514 COSTS_N_INSNS (1), /* cost of movzx */
515 8, /* "large" insn */
516 9, /* MOVE_RATIO */
517 4, /* cost for loading QImode using movzbl */
518 {3, 4, 3}, /* cost of loading integer registers
519 in QImode, HImode and SImode.
520 Relative to reg-reg move (2). */
521 {3, 4, 3}, /* cost of storing integer registers */
522 4, /* cost of reg,reg fld/fst */
523 {4, 4, 12}, /* cost of loading fp registers
524 in SFmode, DFmode and XFmode */
525 {6, 6, 8}, /* cost of storing fp registers
526 in SFmode, DFmode and XFmode */
527 2, /* cost of moving MMX register */
528 {3, 3}, /* cost of loading MMX registers
529 in SImode and DImode */
530 {4, 4}, /* cost of storing MMX registers
531 in SImode and DImode */
532 2, /* cost of moving SSE register */
533 {4, 3, 6}, /* cost of loading SSE registers
534 in SImode, DImode and TImode */
535 {4, 4, 5}, /* cost of storing SSE registers
536 in SImode, DImode and TImode */
537 5, /* MMX or SSE register to integer */
538 64, /* size of prefetch block */
539 6, /* number of parallel prefetches */
540 5, /* Branch cost */
541 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
542 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
543 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
544 COSTS_N_INSNS (2), /* cost of FABS instruction. */
545 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
546 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
547 };
548
549 static const
550 struct processor_costs pentium4_cost = {
551 COSTS_N_INSNS (1), /* cost of an add instruction */
552 COSTS_N_INSNS (3), /* cost of a lea instruction */
553 COSTS_N_INSNS (4), /* variable shift costs */
554 COSTS_N_INSNS (4), /* constant shift costs */
555 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
556 COSTS_N_INSNS (15), /* HI */
557 COSTS_N_INSNS (15), /* SI */
558 COSTS_N_INSNS (15), /* DI */
559 COSTS_N_INSNS (15)}, /* other */
560 0, /* cost of multiply per each bit set */
561 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
562 COSTS_N_INSNS (56), /* HI */
563 COSTS_N_INSNS (56), /* SI */
564 COSTS_N_INSNS (56), /* DI */
565 COSTS_N_INSNS (56)}, /* other */
566 COSTS_N_INSNS (1), /* cost of movsx */
567 COSTS_N_INSNS (1), /* cost of movzx */
568 16, /* "large" insn */
569 6, /* MOVE_RATIO */
570 2, /* cost for loading QImode using movzbl */
571 {4, 5, 4}, /* cost of loading integer registers
572 in QImode, HImode and SImode.
573 Relative to reg-reg move (2). */
574 {2, 3, 2}, /* cost of storing integer registers */
575 2, /* cost of reg,reg fld/fst */
576 {2, 2, 6}, /* cost of loading fp registers
577 in SFmode, DFmode and XFmode */
578 {4, 4, 6}, /* cost of storing fp registers
579 in SFmode, DFmode and XFmode */
580 2, /* cost of moving MMX register */
581 {2, 2}, /* cost of loading MMX registers
582 in SImode and DImode */
583 {2, 2}, /* cost of storing MMX registers
584 in SImode and DImode */
585 12, /* cost of moving SSE register */
586 {12, 12, 12}, /* cost of loading SSE registers
587 in SImode, DImode and TImode */
588 {2, 2, 8}, /* cost of storing SSE registers
589 in SImode, DImode and TImode */
590 10, /* MMX or SSE register to integer */
591 64, /* size of prefetch block */
592 6, /* number of parallel prefetches */
593 2, /* Branch cost */
594 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
595 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
596 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
597 COSTS_N_INSNS (2), /* cost of FABS instruction. */
598 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
599 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
600 };
601
602 static const
603 struct processor_costs nocona_cost = {
604 COSTS_N_INSNS (1), /* cost of an add instruction */
605 COSTS_N_INSNS (1), /* cost of a lea instruction */
606 COSTS_N_INSNS (1), /* variable shift costs */
607 COSTS_N_INSNS (1), /* constant shift costs */
608 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
609 COSTS_N_INSNS (10), /* HI */
610 COSTS_N_INSNS (10), /* SI */
611 COSTS_N_INSNS (10), /* DI */
612 COSTS_N_INSNS (10)}, /* other */
613 0, /* cost of multiply per each bit set */
614 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
615 COSTS_N_INSNS (66), /* HI */
616 COSTS_N_INSNS (66), /* SI */
617 COSTS_N_INSNS (66), /* DI */
618 COSTS_N_INSNS (66)}, /* other */
619 COSTS_N_INSNS (1), /* cost of movsx */
620 COSTS_N_INSNS (1), /* cost of movzx */
621 16, /* "large" insn */
622 17, /* MOVE_RATIO */
623 4, /* cost for loading QImode using movzbl */
624 {4, 4, 4}, /* cost of loading integer registers
625 in QImode, HImode and SImode.
626 Relative to reg-reg move (2). */
627 {4, 4, 4}, /* cost of storing integer registers */
628 3, /* cost of reg,reg fld/fst */
629 {12, 12, 12}, /* cost of loading fp registers
630 in SFmode, DFmode and XFmode */
631 {4, 4, 4}, /* cost of storing fp registers
632 in SFmode, DFmode and XFmode */
633 6, /* cost of moving MMX register */
634 {12, 12}, /* cost of loading MMX registers
635 in SImode and DImode */
636 {12, 12}, /* cost of storing MMX registers
637 in SImode and DImode */
638 6, /* cost of moving SSE register */
639 {12, 12, 12}, /* cost of loading SSE registers
640 in SImode, DImode and TImode */
641 {12, 12, 12}, /* cost of storing SSE registers
642 in SImode, DImode and TImode */
643 8, /* MMX or SSE register to integer */
644 128, /* size of prefetch block */
645 8, /* number of parallel prefetches */
646 1, /* Branch cost */
647 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
648 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
649 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
650 COSTS_N_INSNS (3), /* cost of FABS instruction. */
651 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
652 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
653 };
654
655 /* Generic64 should produce code tuned for Nocona and K8. */
656 static const
657 struct processor_costs generic64_cost = {
658 COSTS_N_INSNS (1), /* cost of an add instruction */
659 /* On all chips taken into consideration lea is 2 cycles and more. With
660 this cost however our current implementation of synth_mult results in
661 use of unnecessary temporary registers causing regression on several
662 SPECfp benchmarks. */
663 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
664 COSTS_N_INSNS (1), /* variable shift costs */
665 COSTS_N_INSNS (1), /* constant shift costs */
666 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
667 COSTS_N_INSNS (4), /* HI */
668 COSTS_N_INSNS (3), /* SI */
669 COSTS_N_INSNS (4), /* DI */
670 COSTS_N_INSNS (2)}, /* other */
671 0, /* cost of multiply per each bit set */
672 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
673 COSTS_N_INSNS (26), /* HI */
674 COSTS_N_INSNS (42), /* SI */
675 COSTS_N_INSNS (74), /* DI */
676 COSTS_N_INSNS (74)}, /* other */
677 COSTS_N_INSNS (1), /* cost of movsx */
678 COSTS_N_INSNS (1), /* cost of movzx */
679 8, /* "large" insn */
680 17, /* MOVE_RATIO */
681 4, /* cost for loading QImode using movzbl */
682 {4, 4, 4}, /* cost of loading integer registers
683 in QImode, HImode and SImode.
684 Relative to reg-reg move (2). */
685 {4, 4, 4}, /* cost of storing integer registers */
686 4, /* cost of reg,reg fld/fst */
687 {12, 12, 12}, /* cost of loading fp registers
688 in SFmode, DFmode and XFmode */
689 {6, 6, 8}, /* cost of storing fp registers
690 in SFmode, DFmode and XFmode */
691 2, /* cost of moving MMX register */
692 {8, 8}, /* cost of loading MMX registers
693 in SImode and DImode */
694 {8, 8}, /* cost of storing MMX registers
695 in SImode and DImode */
696 2, /* cost of moving SSE register */
697 {8, 8, 8}, /* cost of loading SSE registers
698 in SImode, DImode and TImode */
699 {8, 8, 8}, /* cost of storing SSE registers
700 in SImode, DImode and TImode */
701 5, /* MMX or SSE register to integer */
702 64, /* size of prefetch block */
703 6, /* number of parallel prefetches */
704 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
705 is increased to perhaps more appropriate value of 5. */
706 3, /* Branch cost */
707 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (8), /* cost of FABS instruction. */
711 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
713 };
714
715 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
716 static const
717 struct processor_costs generic32_cost = {
718 COSTS_N_INSNS (1), /* cost of an add instruction */
719 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
720 COSTS_N_INSNS (1), /* variable shift costs */
721 COSTS_N_INSNS (1), /* constant shift costs */
722 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
723 COSTS_N_INSNS (4), /* HI */
724 COSTS_N_INSNS (3), /* SI */
725 COSTS_N_INSNS (4), /* DI */
726 COSTS_N_INSNS (2)}, /* other */
727 0, /* cost of multiply per each bit set */
728 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
729 COSTS_N_INSNS (26), /* HI */
730 COSTS_N_INSNS (42), /* SI */
731 COSTS_N_INSNS (74), /* DI */
732 COSTS_N_INSNS (74)}, /* other */
733 COSTS_N_INSNS (1), /* cost of movsx */
734 COSTS_N_INSNS (1), /* cost of movzx */
735 8, /* "large" insn */
736 17, /* MOVE_RATIO */
737 4, /* cost for loading QImode using movzbl */
738 {4, 4, 4}, /* cost of loading integer registers
739 in QImode, HImode and SImode.
740 Relative to reg-reg move (2). */
741 {4, 4, 4}, /* cost of storing integer registers */
742 4, /* cost of reg,reg fld/fst */
743 {12, 12, 12}, /* cost of loading fp registers
744 in SFmode, DFmode and XFmode */
745 {6, 6, 8}, /* cost of storing fp registers
746 in SFmode, DFmode and XFmode */
747 2, /* cost of moving MMX register */
748 {8, 8}, /* cost of loading MMX registers
749 in SImode and DImode */
750 {8, 8}, /* cost of storing MMX registers
751 in SImode and DImode */
752 2, /* cost of moving SSE register */
753 {8, 8, 8}, /* cost of loading SSE registers
754 in SImode, DImode and TImode */
755 {8, 8, 8}, /* cost of storing SSE registers
756 in SImode, DImode and TImode */
757 5, /* MMX or SSE register to integer */
758 64, /* size of prefetch block */
759 6, /* number of parallel prefetches */
760 3, /* Branch cost */
761 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
762 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
763 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
764 COSTS_N_INSNS (8), /* cost of FABS instruction. */
765 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
766 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
767 };
768
769 const struct processor_costs *ix86_cost = &pentium_cost;
770
771 /* Processor feature/optimization bitmasks. */
772 #define m_386 (1<<PROCESSOR_I386)
773 #define m_486 (1<<PROCESSOR_I486)
774 #define m_PENT (1<<PROCESSOR_PENTIUM)
775 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
776 #define m_GEODE (1<<PROCESSOR_GEODE)
777 #define m_K6_GEODE (m_K6 | m_GEODE)
778 #define m_K6 (1<<PROCESSOR_K6)
779 #define m_ATHLON (1<<PROCESSOR_ATHLON)
780 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
781 #define m_K8 (1<<PROCESSOR_K8)
782 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
783 #define m_NOCONA (1<<PROCESSOR_NOCONA)
784 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
785 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
786 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
787
788 /* Generic instruction choice should be common subset of supported CPUs
789 (PPro/PENT4/NOCONA/Athlon/K8). */
790
791 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
792 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
793 generic because it is not working well with PPro base chips. */
794 const int x86_use_leave = m_386 | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC64;
795 const int x86_push_memory = m_386 | m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
796 const int x86_zero_extend_with_and = m_486 | m_PENT;
797 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC | m_GEODE /* m_386 | m_K6 */;
798 const int x86_double_with_add = ~m_386;
799 const int x86_use_bit_test = m_386;
800 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_GENERIC;
801 const int x86_cmove = m_PPRO | m_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
802 const int x86_3dnow_a = m_ATHLON_K8;
803 const int x86_deep_branch = m_PPRO | m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
804 /* Branch hints were put in P4 based on simulation result. But
805 after P4 was made, no performance benefit was observed with
806 branch hints. It also increases the code size. As the result,
807 icc never generates branch hints. */
808 const int x86_branch_hints = 0;
809 const int x86_use_sahf = m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
810 /* We probably ought to watch for partial register stalls on Generic32
811 compilation setting as well. However in current implementation the
812 partial register stalls are not eliminated very well - they can
813 be introduced via subregs synthesized by combine and can happen
814 in caller/callee saving sequences.
815 Because this option pays back little on PPro based chips and is in conflict
816 with partial reg. dependencies used by Athlon/P4 based chips, it is better
817 to leave it off for generic32 for now. */
818 const int x86_partial_reg_stall = m_PPRO;
819 const int x86_partial_flag_reg_stall = m_GENERIC;
820 const int x86_use_himode_fiop = m_386 | m_486 | m_K6_GEODE;
821 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_GENERIC);
822 const int x86_use_mov0 = m_K6;
823 const int x86_use_cltd = ~(m_PENT | m_K6 | m_GENERIC);
824 const int x86_read_modify_write = ~m_PENT;
825 const int x86_read_modify = ~(m_PENT | m_PPRO);
826 const int x86_split_long_moves = m_PPRO;
827 const int x86_promote_QImode = m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_GENERIC; /* m_PENT4 ? */
828 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
829 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
830 const int x86_qimode_math = ~(0);
831 const int x86_promote_qi_regs = 0;
832 /* On PPro this flag is meant to avoid partial register stalls. Just like
833 the x86_partial_reg_stall this option might be considered for Generic32
834 if our scheme for avoiding partial stalls was more effective. */
835 const int x86_himode_math = ~(m_PPRO);
836 const int x86_promote_hi_regs = m_PPRO;
837 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC;
838 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
839 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC;
840 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6_GEODE | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
841 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC | m_GEODE);
842 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
843 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
844 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
845 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
846 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
847 const int x86_shift1 = ~m_486;
848 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
849 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
850 that thread 128bit SSE registers as single units versus K8 based chips that
851 divide SSE registers to two 64bit halves.
852 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
853 to allow register renaming on 128bit SSE units, but usually results in one
854 extra microop on 64bit SSE units. Experimental results shows that disabling
855 this option on P4 brings over 20% SPECfp regression, while enabling it on
856 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
857 of moves. */
858 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
859 /* Set for machines where the type and dependencies are resolved on SSE
860 register parts instead of whole registers, so we may maintain just
861 lower part of scalar values in proper format leaving the upper part
862 undefined. */
863 const int x86_sse_split_regs = m_ATHLON_K8;
864 const int x86_sse_typeless_stores = m_ATHLON_K8;
865 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
866 const int x86_use_ffreep = m_ATHLON_K8;
867 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6_GEODE;
868 const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_GENERIC);
869
870 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
871 integer data in xmm registers. Which results in pretty abysmal code. */
872 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
873
874 const int x86_ext_80387_constants = m_K6_GEODE | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC32;
875 /* Some CPU cores are not able to predict more than 4 branch instructions in
876 the 16 byte window. */
877 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
878 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6_GEODE | m_PENT | m_GENERIC;
879 const int x86_use_bt = m_ATHLON_K8;
880 /* Compare and exchange was added for 80486. */
881 const int x86_cmpxchg = ~m_386;
882 /* Compare and exchange 8 bytes was added for pentium. */
883 const int x86_cmpxchg8b = ~(m_386 | m_486);
884 /* Compare and exchange 16 bytes was added for nocona. */
885 const int x86_cmpxchg16b = m_NOCONA;
886 /* Exchange and add was added for 80486. */
887 const int x86_xadd = ~m_386;
888 const int x86_pad_returns = m_ATHLON_K8 | m_GENERIC;
889
890 /* In case the average insn count for single function invocation is
891 lower than this constant, emit fast (but longer) prologue and
892 epilogue code. */
893 #define FAST_PROLOGUE_INSN_COUNT 20
894
895 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
896 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
897 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
898 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
899
900 /* Array of the smallest class containing reg number REGNO, indexed by
901 REGNO. Used by REGNO_REG_CLASS in i386.h. */
902
903 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
904 {
905 /* ax, dx, cx, bx */
906 AREG, DREG, CREG, BREG,
907 /* si, di, bp, sp */
908 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
909 /* FP registers */
910 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
911 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
912 /* arg pointer */
913 NON_Q_REGS,
914 /* flags, fpsr, fpcr, dirflag, frame */
915 NO_REGS, NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
916 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
917 SSE_REGS, SSE_REGS,
918 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
919 MMX_REGS, MMX_REGS,
920 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
921 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
922 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
923 SSE_REGS, SSE_REGS,
924 };
925
926 /* The "default" register map used in 32bit mode. */
927
928 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
929 {
930 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
931 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
932 -1, -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
933 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
934 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
935 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
936 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
937 };
938
939 static int const x86_64_int_parameter_registers[6] =
940 {
941 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
942 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
943 };
944
945 static int const x86_64_int_return_registers[4] =
946 {
947 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
948 };
949
950 /* The "default" register map used in 64bit mode. */
951 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
952 {
953 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
954 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
955 -1, -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
956 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
957 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
958 8,9,10,11,12,13,14,15, /* extended integer registers */
959 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
960 };
961
962 /* Define the register numbers to be used in Dwarf debugging information.
963 The SVR4 reference port C compiler uses the following register numbers
964 in its Dwarf output code:
965 0 for %eax (gcc regno = 0)
966 1 for %ecx (gcc regno = 2)
967 2 for %edx (gcc regno = 1)
968 3 for %ebx (gcc regno = 3)
969 4 for %esp (gcc regno = 7)
970 5 for %ebp (gcc regno = 6)
971 6 for %esi (gcc regno = 4)
972 7 for %edi (gcc regno = 5)
973 The following three DWARF register numbers are never generated by
974 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
975 believes these numbers have these meanings.
976 8 for %eip (no gcc equivalent)
977 9 for %eflags (gcc regno = 17)
978 10 for %trapno (no gcc equivalent)
979 It is not at all clear how we should number the FP stack registers
980 for the x86 architecture. If the version of SDB on x86/svr4 were
981 a bit less brain dead with respect to floating-point then we would
982 have a precedent to follow with respect to DWARF register numbers
983 for x86 FP registers, but the SDB on x86/svr4 is so completely
984 broken with respect to FP registers that it is hardly worth thinking
985 of it as something to strive for compatibility with.
986 The version of x86/svr4 SDB I have at the moment does (partially)
987 seem to believe that DWARF register number 11 is associated with
988 the x86 register %st(0), but that's about all. Higher DWARF
989 register numbers don't seem to be associated with anything in
990 particular, and even for DWARF regno 11, SDB only seems to under-
991 stand that it should say that a variable lives in %st(0) (when
992 asked via an `=' command) if we said it was in DWARF regno 11,
993 but SDB still prints garbage when asked for the value of the
994 variable in question (via a `/' command).
995 (Also note that the labels SDB prints for various FP stack regs
996 when doing an `x' command are all wrong.)
997 Note that these problems generally don't affect the native SVR4
998 C compiler because it doesn't allow the use of -O with -g and
999 because when it is *not* optimizing, it allocates a memory
1000 location for each floating-point variable, and the memory
1001 location is what gets described in the DWARF AT_location
1002 attribute for the variable in question.
1003 Regardless of the severe mental illness of the x86/svr4 SDB, we
1004 do something sensible here and we use the following DWARF
1005 register numbers. Note that these are all stack-top-relative
1006 numbers.
1007 11 for %st(0) (gcc regno = 8)
1008 12 for %st(1) (gcc regno = 9)
1009 13 for %st(2) (gcc regno = 10)
1010 14 for %st(3) (gcc regno = 11)
1011 15 for %st(4) (gcc regno = 12)
1012 16 for %st(5) (gcc regno = 13)
1013 17 for %st(6) (gcc regno = 14)
1014 18 for %st(7) (gcc regno = 15)
1015 */
1016 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1017 {
1018 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1019 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1020 -1, 9, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
1021 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1022 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1023 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1024 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1025 };
1026
1027 /* Test and compare insns in i386.md store the information needed to
1028 generate branch and scc insns here. */
1029
1030 rtx ix86_compare_op0 = NULL_RTX;
1031 rtx ix86_compare_op1 = NULL_RTX;
1032 rtx ix86_compare_emitted = NULL_RTX;
1033
1034 /* Size of the register save area. */
1035 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1036
1037 /* Define the structure for the machine field in struct function. */
1038
1039 struct stack_local_entry GTY(())
1040 {
1041 unsigned short mode;
1042 unsigned short n;
1043 rtx rtl;
1044 struct stack_local_entry *next;
1045 };
1046
1047 /* Structure describing stack frame layout.
1048 Stack grows downward:
1049
1050 [arguments]
1051 <- ARG_POINTER
1052 saved pc
1053
1054 saved frame pointer if frame_pointer_needed
1055 <- HARD_FRAME_POINTER
1056 [saved regs]
1057
1058 [padding1] \
1059 )
1060 [va_arg registers] (
1061 > to_allocate <- FRAME_POINTER
1062 [frame] (
1063 )
1064 [padding2] /
1065 */
1066 struct ix86_frame
1067 {
1068 int nregs;
1069 int padding1;
1070 int va_arg_size;
1071 HOST_WIDE_INT frame;
1072 int padding2;
1073 int outgoing_arguments_size;
1074 int red_zone_size;
1075
1076 HOST_WIDE_INT to_allocate;
1077 /* The offsets relative to ARG_POINTER. */
1078 HOST_WIDE_INT frame_pointer_offset;
1079 HOST_WIDE_INT hard_frame_pointer_offset;
1080 HOST_WIDE_INT stack_pointer_offset;
1081
1082 /* When save_regs_using_mov is set, emit prologue using
1083 move instead of push instructions. */
1084 bool save_regs_using_mov;
1085 };
1086
1087 /* Code model option. */
1088 enum cmodel ix86_cmodel;
1089 /* Asm dialect. */
1090 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1091 /* TLS dialects. */
1092 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1093
1094 /* Which unit we are generating floating point math for. */
1095 enum fpmath_unit ix86_fpmath;
1096
1097 /* Which cpu are we scheduling for. */
1098 enum processor_type ix86_tune;
1099 /* Which instruction set architecture to use. */
1100 enum processor_type ix86_arch;
1101
1102 /* true if sse prefetch instruction is not NOOP. */
1103 int x86_prefetch_sse;
1104
1105 /* ix86_regparm_string as a number */
1106 static int ix86_regparm;
1107
1108 /* -mstackrealign option */
1109 extern int ix86_force_align_arg_pointer;
1110 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1111
1112 /* Preferred alignment for stack boundary in bits. */
1113 unsigned int ix86_preferred_stack_boundary;
1114
1115 /* Values 1-5: see jump.c */
1116 int ix86_branch_cost;
1117
1118 /* Variables which are this size or smaller are put in the data/bss
1119 or ldata/lbss sections. */
1120
1121 int ix86_section_threshold = 65536;
1122
1123 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1124 char internal_label_prefix[16];
1125 int internal_label_prefix_len;
1126 \f
1127 static bool ix86_handle_option (size_t, const char *, int);
1128 static void output_pic_addr_const (FILE *, rtx, int);
1129 static void put_condition_code (enum rtx_code, enum machine_mode,
1130 int, int, FILE *);
1131 static const char *get_some_local_dynamic_name (void);
1132 static int get_some_local_dynamic_name_1 (rtx *, void *);
1133 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1134 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1135 rtx *);
1136 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1137 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1138 enum machine_mode);
1139 static rtx get_thread_pointer (int);
1140 static rtx legitimize_tls_address (rtx, enum tls_model, int);
1141 static void get_pc_thunk_name (char [32], unsigned int);
1142 static rtx gen_push (rtx);
1143 static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1144 static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1145 static struct machine_function * ix86_init_machine_status (void);
1146 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1147 static int ix86_nsaved_regs (void);
1148 static void ix86_emit_save_regs (void);
1149 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1150 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1151 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1152 static HOST_WIDE_INT ix86_GOT_alias_set (void);
1153 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1154 static rtx ix86_expand_aligntest (rtx, int);
1155 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1156 static int ix86_issue_rate (void);
1157 static int ix86_adjust_cost (rtx, rtx, rtx, int);
1158 static int ia32_multipass_dfa_lookahead (void);
1159 static void ix86_init_mmx_sse_builtins (void);
1160 static rtx x86_this_parameter (tree);
1161 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1162 HOST_WIDE_INT, tree);
1163 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1164 static void x86_file_start (void);
1165 static void ix86_reorg (void);
1166 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1167 static tree ix86_build_builtin_va_list (void);
1168 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1169 tree, int *, int);
1170 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1171 static bool ix86_scalar_mode_supported_p (enum machine_mode);
1172 static bool ix86_vector_mode_supported_p (enum machine_mode);
1173
1174 static int ix86_address_cost (rtx);
1175 static bool ix86_cannot_force_const_mem (rtx);
1176 static rtx ix86_delegitimize_address (rtx);
1177
1178 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1179
1180 struct builtin_description;
1181 static rtx ix86_expand_sse_comi (const struct builtin_description *,
1182 tree, rtx);
1183 static rtx ix86_expand_sse_compare (const struct builtin_description *,
1184 tree, rtx);
1185 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1186 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1187 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1188 static rtx ix86_expand_store_builtin (enum insn_code, tree);
1189 static rtx safe_vector_operand (rtx, enum machine_mode);
1190 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1191 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1192 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1193 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1194 static int ix86_fp_comparison_cost (enum rtx_code code);
1195 static unsigned int ix86_select_alt_pic_regnum (void);
1196 static int ix86_save_reg (unsigned int, int);
1197 static void ix86_compute_frame_layout (struct ix86_frame *);
1198 static int ix86_comp_type_attributes (tree, tree);
1199 static int ix86_function_regparm (tree, tree);
1200 const struct attribute_spec ix86_attribute_table[];
1201 static bool ix86_function_ok_for_sibcall (tree, tree);
1202 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1203 static int ix86_value_regno (enum machine_mode, tree, tree);
1204 static bool contains_128bit_aligned_vector_p (tree);
1205 static rtx ix86_struct_value_rtx (tree, int);
1206 static bool ix86_ms_bitfield_layout_p (tree);
1207 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1208 static int extended_reg_mentioned_1 (rtx *, void *);
1209 static bool ix86_rtx_costs (rtx, int, int, int *);
1210 static int min_insn_size (rtx);
1211 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1212 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1213 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1214 tree, bool);
1215 static void ix86_init_builtins (void);
1216 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1217 static const char *ix86_mangle_fundamental_type (tree);
1218 static tree ix86_stack_protect_fail (void);
1219 static rtx ix86_internal_arg_pointer (void);
1220 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1221
1222 /* This function is only used on Solaris. */
1223 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1224 ATTRIBUTE_UNUSED;
1225
1226 /* Register class used for passing given 64bit part of the argument.
1227 These represent classes as documented by the PS ABI, with the exception
1228 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1229 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1230
1231 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1232 whenever possible (upper half does contain padding).
1233 */
1234 enum x86_64_reg_class
1235 {
1236 X86_64_NO_CLASS,
1237 X86_64_INTEGER_CLASS,
1238 X86_64_INTEGERSI_CLASS,
1239 X86_64_SSE_CLASS,
1240 X86_64_SSESF_CLASS,
1241 X86_64_SSEDF_CLASS,
1242 X86_64_SSEUP_CLASS,
1243 X86_64_X87_CLASS,
1244 X86_64_X87UP_CLASS,
1245 X86_64_COMPLEX_X87_CLASS,
1246 X86_64_MEMORY_CLASS
1247 };
1248 static const char * const x86_64_reg_class_name[] = {
1249 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1250 "sseup", "x87", "x87up", "cplx87", "no"
1251 };
1252
1253 #define MAX_CLASSES 4
1254
1255 /* Table of constants used by fldpi, fldln2, etc.... */
1256 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1257 static bool ext_80387_constants_init = 0;
1258 static void init_ext_80387_constants (void);
1259 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1260 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1261 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1262 static section *x86_64_elf_select_section (tree decl, int reloc,
1263 unsigned HOST_WIDE_INT align)
1264 ATTRIBUTE_UNUSED;
1265 \f
1266 /* Initialize the GCC target structure. */
1267 #undef TARGET_ATTRIBUTE_TABLE
1268 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1269 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1270 # undef TARGET_MERGE_DECL_ATTRIBUTES
1271 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1272 #endif
1273
1274 #undef TARGET_COMP_TYPE_ATTRIBUTES
1275 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1276
1277 #undef TARGET_INIT_BUILTINS
1278 #define TARGET_INIT_BUILTINS ix86_init_builtins
1279 #undef TARGET_EXPAND_BUILTIN
1280 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1281
1282 #undef TARGET_ASM_FUNCTION_EPILOGUE
1283 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1284
1285 #undef TARGET_ENCODE_SECTION_INFO
1286 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1287 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1288 #else
1289 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1290 #endif
1291
1292 #undef TARGET_ASM_OPEN_PAREN
1293 #define TARGET_ASM_OPEN_PAREN ""
1294 #undef TARGET_ASM_CLOSE_PAREN
1295 #define TARGET_ASM_CLOSE_PAREN ""
1296
1297 #undef TARGET_ASM_ALIGNED_HI_OP
1298 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1299 #undef TARGET_ASM_ALIGNED_SI_OP
1300 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1301 #ifdef ASM_QUAD
1302 #undef TARGET_ASM_ALIGNED_DI_OP
1303 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1304 #endif
1305
1306 #undef TARGET_ASM_UNALIGNED_HI_OP
1307 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1308 #undef TARGET_ASM_UNALIGNED_SI_OP
1309 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1310 #undef TARGET_ASM_UNALIGNED_DI_OP
1311 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1312
1313 #undef TARGET_SCHED_ADJUST_COST
1314 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1315 #undef TARGET_SCHED_ISSUE_RATE
1316 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1317 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1318 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1319 ia32_multipass_dfa_lookahead
1320
1321 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1322 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1323
1324 #ifdef HAVE_AS_TLS
1325 #undef TARGET_HAVE_TLS
1326 #define TARGET_HAVE_TLS true
1327 #endif
1328 #undef TARGET_CANNOT_FORCE_CONST_MEM
1329 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1330 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1331 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1332
1333 #undef TARGET_DELEGITIMIZE_ADDRESS
1334 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1335
1336 #undef TARGET_MS_BITFIELD_LAYOUT_P
1337 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1338
1339 #if TARGET_MACHO
1340 #undef TARGET_BINDS_LOCAL_P
1341 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1342 #endif
1343
1344 #undef TARGET_ASM_OUTPUT_MI_THUNK
1345 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1346 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1347 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1348
1349 #undef TARGET_ASM_FILE_START
1350 #define TARGET_ASM_FILE_START x86_file_start
1351
1352 #undef TARGET_DEFAULT_TARGET_FLAGS
1353 #define TARGET_DEFAULT_TARGET_FLAGS \
1354 (TARGET_DEFAULT \
1355 | TARGET_64BIT_DEFAULT \
1356 | TARGET_SUBTARGET_DEFAULT \
1357 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1358
1359 #undef TARGET_HANDLE_OPTION
1360 #define TARGET_HANDLE_OPTION ix86_handle_option
1361
1362 #undef TARGET_RTX_COSTS
1363 #define TARGET_RTX_COSTS ix86_rtx_costs
1364 #undef TARGET_ADDRESS_COST
1365 #define TARGET_ADDRESS_COST ix86_address_cost
1366
1367 #undef TARGET_FIXED_CONDITION_CODE_REGS
1368 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1369 #undef TARGET_CC_MODES_COMPATIBLE
1370 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1371
1372 #undef TARGET_MACHINE_DEPENDENT_REORG
1373 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1374
1375 #undef TARGET_BUILD_BUILTIN_VA_LIST
1376 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1377
1378 #undef TARGET_MD_ASM_CLOBBERS
1379 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1380
1381 #undef TARGET_PROMOTE_PROTOTYPES
1382 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1383 #undef TARGET_STRUCT_VALUE_RTX
1384 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1385 #undef TARGET_SETUP_INCOMING_VARARGS
1386 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1387 #undef TARGET_MUST_PASS_IN_STACK
1388 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1389 #undef TARGET_PASS_BY_REFERENCE
1390 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1391 #undef TARGET_INTERNAL_ARG_POINTER
1392 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1393 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1394 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1395
1396 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1397 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1398
1399 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1400 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1401
1402 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1403 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1404
1405 #ifdef HAVE_AS_TLS
1406 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1407 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1408 #endif
1409
1410 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1411 #undef TARGET_INSERT_ATTRIBUTES
1412 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1413 #endif
1414
1415 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1416 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1417
1418 #undef TARGET_STACK_PROTECT_FAIL
1419 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1420
1421 #undef TARGET_FUNCTION_VALUE
1422 #define TARGET_FUNCTION_VALUE ix86_function_value
1423
1424 struct gcc_target targetm = TARGET_INITIALIZER;
1425
1426 \f
1427 /* The svr4 ABI for the i386 says that records and unions are returned
1428 in memory. */
1429 #ifndef DEFAULT_PCC_STRUCT_RETURN
1430 #define DEFAULT_PCC_STRUCT_RETURN 1
1431 #endif
1432
1433 /* Implement TARGET_HANDLE_OPTION. */
1434
1435 static bool
1436 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1437 {
1438 switch (code)
1439 {
1440 case OPT_m3dnow:
1441 if (!value)
1442 {
1443 target_flags &= ~MASK_3DNOW_A;
1444 target_flags_explicit |= MASK_3DNOW_A;
1445 }
1446 return true;
1447
1448 case OPT_mmmx:
1449 if (!value)
1450 {
1451 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1452 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1453 }
1454 return true;
1455
1456 case OPT_msse:
1457 if (!value)
1458 {
1459 target_flags &= ~(MASK_SSE2 | MASK_SSE3);
1460 target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
1461 }
1462 return true;
1463
1464 case OPT_msse2:
1465 if (!value)
1466 {
1467 target_flags &= ~MASK_SSE3;
1468 target_flags_explicit |= MASK_SSE3;
1469 }
1470 return true;
1471
1472 default:
1473 return true;
1474 }
1475 }
1476
1477 /* Sometimes certain combinations of command options do not make
1478 sense on a particular target machine. You can define a macro
1479 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1480 defined, is executed once just after all the command options have
1481 been parsed.
1482
1483 Don't use this macro to turn on various extra optimizations for
1484 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1485
1486 void
1487 override_options (void)
1488 {
1489 int i;
1490 int ix86_tune_defaulted = 0;
1491
1492 /* Comes from final.c -- no real reason to change it. */
1493 #define MAX_CODE_ALIGN 16
1494
1495 static struct ptt
1496 {
1497 const struct processor_costs *cost; /* Processor costs */
1498 const int target_enable; /* Target flags to enable. */
1499 const int target_disable; /* Target flags to disable. */
1500 const int align_loop; /* Default alignments. */
1501 const int align_loop_max_skip;
1502 const int align_jump;
1503 const int align_jump_max_skip;
1504 const int align_func;
1505 }
1506 const processor_target_table[PROCESSOR_max] =
1507 {
1508 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1509 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1510 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1511 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1512 {&geode_cost, 0, 0, 0, 0, 0, 0, 0},
1513 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1514 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1515 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1516 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1517 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1518 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1519 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16}
1520 };
1521
1522 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1523 static struct pta
1524 {
1525 const char *const name; /* processor name or nickname. */
1526 const enum processor_type processor;
1527 const enum pta_flags
1528 {
1529 PTA_SSE = 1,
1530 PTA_SSE2 = 2,
1531 PTA_SSE3 = 4,
1532 PTA_MMX = 8,
1533 PTA_PREFETCH_SSE = 16,
1534 PTA_3DNOW = 32,
1535 PTA_3DNOW_A = 64,
1536 PTA_64BIT = 128,
1537 PTA_SSSE3 = 256
1538 } flags;
1539 }
1540 const processor_alias_table[] =
1541 {
1542 {"i386", PROCESSOR_I386, 0},
1543 {"i486", PROCESSOR_I486, 0},
1544 {"i586", PROCESSOR_PENTIUM, 0},
1545 {"pentium", PROCESSOR_PENTIUM, 0},
1546 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1547 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1548 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1549 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1550 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1551 {"i686", PROCESSOR_PENTIUMPRO, 0},
1552 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1553 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1554 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1555 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1556 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1557 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1558 | PTA_MMX | PTA_PREFETCH_SSE},
1559 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1560 | PTA_MMX | PTA_PREFETCH_SSE},
1561 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1562 | PTA_MMX | PTA_PREFETCH_SSE},
1563 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1564 | PTA_MMX | PTA_PREFETCH_SSE},
1565 {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1566 | PTA_3DNOW_A},
1567 {"k6", PROCESSOR_K6, PTA_MMX},
1568 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1569 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1570 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1571 | PTA_3DNOW_A},
1572 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1573 | PTA_3DNOW | PTA_3DNOW_A},
1574 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1575 | PTA_3DNOW_A | PTA_SSE},
1576 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1577 | PTA_3DNOW_A | PTA_SSE},
1578 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1579 | PTA_3DNOW_A | PTA_SSE},
1580 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1581 | PTA_SSE | PTA_SSE2 },
1582 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1583 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1584 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1585 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1586 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1587 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1588 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1589 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1590 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1591 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1592 };
1593
1594 int const pta_size = ARRAY_SIZE (processor_alias_table);
1595
1596 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1597 SUBTARGET_OVERRIDE_OPTIONS;
1598 #endif
1599
1600 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1601 SUBSUBTARGET_OVERRIDE_OPTIONS;
1602 #endif
1603
1604 /* -fPIC is the default for x86_64. */
1605 if (TARGET_MACHO && TARGET_64BIT)
1606 flag_pic = 2;
1607
1608 /* Set the default values for switches whose default depends on TARGET_64BIT
1609 in case they weren't overwritten by command line options. */
1610 if (TARGET_64BIT)
1611 {
1612 /* Mach-O doesn't support omitting the frame pointer for now. */
1613 if (flag_omit_frame_pointer == 2)
1614 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1615 if (flag_asynchronous_unwind_tables == 2)
1616 flag_asynchronous_unwind_tables = 1;
1617 if (flag_pcc_struct_return == 2)
1618 flag_pcc_struct_return = 0;
1619 }
1620 else
1621 {
1622 if (flag_omit_frame_pointer == 2)
1623 flag_omit_frame_pointer = 0;
1624 if (flag_asynchronous_unwind_tables == 2)
1625 flag_asynchronous_unwind_tables = 0;
1626 if (flag_pcc_struct_return == 2)
1627 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1628 }
1629
1630 /* Need to check -mtune=generic first. */
1631 if (ix86_tune_string)
1632 {
1633 if (!strcmp (ix86_tune_string, "generic")
1634 || !strcmp (ix86_tune_string, "i686")
1635 /* As special support for cross compilers we read -mtune=native
1636 as -mtune=generic. With native compilers we won't see the
1637 -mtune=native, as it was changed by the driver. */
1638 || !strcmp (ix86_tune_string, "native"))
1639 {
1640 if (TARGET_64BIT)
1641 ix86_tune_string = "generic64";
1642 else
1643 ix86_tune_string = "generic32";
1644 }
1645 else if (!strncmp (ix86_tune_string, "generic", 7))
1646 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1647 }
1648 else
1649 {
1650 if (ix86_arch_string)
1651 ix86_tune_string = ix86_arch_string;
1652 if (!ix86_tune_string)
1653 {
1654 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1655 ix86_tune_defaulted = 1;
1656 }
1657
1658 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1659 need to use a sensible tune option. */
1660 if (!strcmp (ix86_tune_string, "generic")
1661 || !strcmp (ix86_tune_string, "x86-64")
1662 || !strcmp (ix86_tune_string, "i686"))
1663 {
1664 if (TARGET_64BIT)
1665 ix86_tune_string = "generic64";
1666 else
1667 ix86_tune_string = "generic32";
1668 }
1669 }
1670 if (!strcmp (ix86_tune_string, "x86-64"))
1671 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1672 "-mtune=generic instead as appropriate.");
1673
1674 if (!ix86_arch_string)
1675 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1676 if (!strcmp (ix86_arch_string, "generic"))
1677 error ("generic CPU can be used only for -mtune= switch");
1678 if (!strncmp (ix86_arch_string, "generic", 7))
1679 error ("bad value (%s) for -march= switch", ix86_arch_string);
1680
1681 if (ix86_cmodel_string != 0)
1682 {
1683 if (!strcmp (ix86_cmodel_string, "small"))
1684 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1685 else if (!strcmp (ix86_cmodel_string, "medium"))
1686 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1687 else if (flag_pic)
1688 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1689 else if (!strcmp (ix86_cmodel_string, "32"))
1690 ix86_cmodel = CM_32;
1691 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1692 ix86_cmodel = CM_KERNEL;
1693 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1694 ix86_cmodel = CM_LARGE;
1695 else
1696 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1697 }
1698 else
1699 {
1700 ix86_cmodel = CM_32;
1701 if (TARGET_64BIT)
1702 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1703 }
1704 if (ix86_asm_string != 0)
1705 {
1706 if (! TARGET_MACHO
1707 && !strcmp (ix86_asm_string, "intel"))
1708 ix86_asm_dialect = ASM_INTEL;
1709 else if (!strcmp (ix86_asm_string, "att"))
1710 ix86_asm_dialect = ASM_ATT;
1711 else
1712 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1713 }
1714 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1715 error ("code model %qs not supported in the %s bit mode",
1716 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1717 if (ix86_cmodel == CM_LARGE)
1718 sorry ("code model %<large%> not supported yet");
1719 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1720 sorry ("%i-bit mode not compiled in",
1721 (target_flags & MASK_64BIT) ? 64 : 32);
1722
1723 for (i = 0; i < pta_size; i++)
1724 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1725 {
1726 ix86_arch = processor_alias_table[i].processor;
1727 /* Default cpu tuning to the architecture. */
1728 ix86_tune = ix86_arch;
1729 if (processor_alias_table[i].flags & PTA_MMX
1730 && !(target_flags_explicit & MASK_MMX))
1731 target_flags |= MASK_MMX;
1732 if (processor_alias_table[i].flags & PTA_3DNOW
1733 && !(target_flags_explicit & MASK_3DNOW))
1734 target_flags |= MASK_3DNOW;
1735 if (processor_alias_table[i].flags & PTA_3DNOW_A
1736 && !(target_flags_explicit & MASK_3DNOW_A))
1737 target_flags |= MASK_3DNOW_A;
1738 if (processor_alias_table[i].flags & PTA_SSE
1739 && !(target_flags_explicit & MASK_SSE))
1740 target_flags |= MASK_SSE;
1741 if (processor_alias_table[i].flags & PTA_SSE2
1742 && !(target_flags_explicit & MASK_SSE2))
1743 target_flags |= MASK_SSE2;
1744 if (processor_alias_table[i].flags & PTA_SSE3
1745 && !(target_flags_explicit & MASK_SSE3))
1746 target_flags |= MASK_SSE3;
1747 if (processor_alias_table[i].flags & PTA_SSSE3
1748 && !(target_flags_explicit & MASK_SSSE3))
1749 target_flags |= MASK_SSSE3;
1750 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1751 x86_prefetch_sse = true;
1752 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1753 error ("CPU you selected does not support x86-64 "
1754 "instruction set");
1755 break;
1756 }
1757
1758 if (i == pta_size)
1759 error ("bad value (%s) for -march= switch", ix86_arch_string);
1760
1761 for (i = 0; i < pta_size; i++)
1762 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1763 {
1764 ix86_tune = processor_alias_table[i].processor;
1765 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1766 {
1767 if (ix86_tune_defaulted)
1768 {
1769 ix86_tune_string = "x86-64";
1770 for (i = 0; i < pta_size; i++)
1771 if (! strcmp (ix86_tune_string,
1772 processor_alias_table[i].name))
1773 break;
1774 ix86_tune = processor_alias_table[i].processor;
1775 }
1776 else
1777 error ("CPU you selected does not support x86-64 "
1778 "instruction set");
1779 }
1780 /* Intel CPUs have always interpreted SSE prefetch instructions as
1781 NOPs; so, we can enable SSE prefetch instructions even when
1782 -mtune (rather than -march) points us to a processor that has them.
1783 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1784 higher processors. */
1785 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1786 x86_prefetch_sse = true;
1787 break;
1788 }
1789 if (i == pta_size)
1790 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1791
1792 if (optimize_size)
1793 ix86_cost = &size_cost;
1794 else
1795 ix86_cost = processor_target_table[ix86_tune].cost;
1796 target_flags |= processor_target_table[ix86_tune].target_enable;
1797 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1798
1799 /* Arrange to set up i386_stack_locals for all functions. */
1800 init_machine_status = ix86_init_machine_status;
1801
1802 /* Validate -mregparm= value. */
1803 if (ix86_regparm_string)
1804 {
1805 i = atoi (ix86_regparm_string);
1806 if (i < 0 || i > REGPARM_MAX)
1807 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1808 else
1809 ix86_regparm = i;
1810 }
1811 else
1812 if (TARGET_64BIT)
1813 ix86_regparm = REGPARM_MAX;
1814
1815 /* If the user has provided any of the -malign-* options,
1816 warn and use that value only if -falign-* is not set.
1817 Remove this code in GCC 3.2 or later. */
1818 if (ix86_align_loops_string)
1819 {
1820 warning (0, "-malign-loops is obsolete, use -falign-loops");
1821 if (align_loops == 0)
1822 {
1823 i = atoi (ix86_align_loops_string);
1824 if (i < 0 || i > MAX_CODE_ALIGN)
1825 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1826 else
1827 align_loops = 1 << i;
1828 }
1829 }
1830
1831 if (ix86_align_jumps_string)
1832 {
1833 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1834 if (align_jumps == 0)
1835 {
1836 i = atoi (ix86_align_jumps_string);
1837 if (i < 0 || i > MAX_CODE_ALIGN)
1838 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1839 else
1840 align_jumps = 1 << i;
1841 }
1842 }
1843
1844 if (ix86_align_funcs_string)
1845 {
1846 warning (0, "-malign-functions is obsolete, use -falign-functions");
1847 if (align_functions == 0)
1848 {
1849 i = atoi (ix86_align_funcs_string);
1850 if (i < 0 || i > MAX_CODE_ALIGN)
1851 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1852 else
1853 align_functions = 1 << i;
1854 }
1855 }
1856
1857 /* Default align_* from the processor table. */
1858 if (align_loops == 0)
1859 {
1860 align_loops = processor_target_table[ix86_tune].align_loop;
1861 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1862 }
1863 if (align_jumps == 0)
1864 {
1865 align_jumps = processor_target_table[ix86_tune].align_jump;
1866 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1867 }
1868 if (align_functions == 0)
1869 {
1870 align_functions = processor_target_table[ix86_tune].align_func;
1871 }
1872
1873 /* Validate -mbranch-cost= value, or provide default. */
1874 ix86_branch_cost = ix86_cost->branch_cost;
1875 if (ix86_branch_cost_string)
1876 {
1877 i = atoi (ix86_branch_cost_string);
1878 if (i < 0 || i > 5)
1879 error ("-mbranch-cost=%d is not between 0 and 5", i);
1880 else
1881 ix86_branch_cost = i;
1882 }
1883 if (ix86_section_threshold_string)
1884 {
1885 i = atoi (ix86_section_threshold_string);
1886 if (i < 0)
1887 error ("-mlarge-data-threshold=%d is negative", i);
1888 else
1889 ix86_section_threshold = i;
1890 }
1891
1892 if (ix86_tls_dialect_string)
1893 {
1894 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1895 ix86_tls_dialect = TLS_DIALECT_GNU;
1896 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
1897 ix86_tls_dialect = TLS_DIALECT_GNU2;
1898 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1899 ix86_tls_dialect = TLS_DIALECT_SUN;
1900 else
1901 error ("bad value (%s) for -mtls-dialect= switch",
1902 ix86_tls_dialect_string);
1903 }
1904
1905 /* Keep nonleaf frame pointers. */
1906 if (flag_omit_frame_pointer)
1907 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1908 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1909 flag_omit_frame_pointer = 1;
1910
1911 /* If we're doing fast math, we don't care about comparison order
1912 wrt NaNs. This lets us use a shorter comparison sequence. */
1913 if (flag_finite_math_only)
1914 target_flags &= ~MASK_IEEE_FP;
1915
1916 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1917 since the insns won't need emulation. */
1918 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1919 target_flags &= ~MASK_NO_FANCY_MATH_387;
1920
1921 /* Likewise, if the target doesn't have a 387, or we've specified
1922 software floating point, don't use 387 inline intrinsics. */
1923 if (!TARGET_80387)
1924 target_flags |= MASK_NO_FANCY_MATH_387;
1925
1926 /* Turn on SSE3 builtins for -mssse3. */
1927 if (TARGET_SSSE3)
1928 target_flags |= MASK_SSE3;
1929
1930 /* Turn on SSE2 builtins for -msse3. */
1931 if (TARGET_SSE3)
1932 target_flags |= MASK_SSE2;
1933
1934 /* Turn on SSE builtins for -msse2. */
1935 if (TARGET_SSE2)
1936 target_flags |= MASK_SSE;
1937
1938 /* Turn on MMX builtins for -msse. */
1939 if (TARGET_SSE)
1940 {
1941 target_flags |= MASK_MMX & ~target_flags_explicit;
1942 x86_prefetch_sse = true;
1943 }
1944
1945 /* Turn on MMX builtins for 3Dnow. */
1946 if (TARGET_3DNOW)
1947 target_flags |= MASK_MMX;
1948
1949 if (TARGET_64BIT)
1950 {
1951 if (TARGET_ALIGN_DOUBLE)
1952 error ("-malign-double makes no sense in the 64bit mode");
1953 if (TARGET_RTD)
1954 error ("-mrtd calling convention not supported in the 64bit mode");
1955
1956 /* Enable by default the SSE and MMX builtins. Do allow the user to
1957 explicitly disable any of these. In particular, disabling SSE and
1958 MMX for kernel code is extremely useful. */
1959 target_flags
1960 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1961 & ~target_flags_explicit);
1962 }
1963 else
1964 {
1965 /* i386 ABI does not specify red zone. It still makes sense to use it
1966 when programmer takes care to stack from being destroyed. */
1967 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1968 target_flags |= MASK_NO_RED_ZONE;
1969 }
1970
1971 /* Validate -mpreferred-stack-boundary= value, or provide default.
1972 The default of 128 bits is for Pentium III's SSE __m128. We can't
1973 change it because of optimize_size. Otherwise, we can't mix object
1974 files compiled with -Os and -On. */
1975 ix86_preferred_stack_boundary = 128;
1976 if (ix86_preferred_stack_boundary_string)
1977 {
1978 i = atoi (ix86_preferred_stack_boundary_string);
1979 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1980 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1981 TARGET_64BIT ? 4 : 2);
1982 else
1983 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1984 }
1985
1986 /* Accept -msseregparm only if at least SSE support is enabled. */
1987 if (TARGET_SSEREGPARM
1988 && ! TARGET_SSE)
1989 error ("-msseregparm used without SSE enabled");
1990
1991 ix86_fpmath = TARGET_FPMATH_DEFAULT;
1992
1993 if (ix86_fpmath_string != 0)
1994 {
1995 if (! strcmp (ix86_fpmath_string, "387"))
1996 ix86_fpmath = FPMATH_387;
1997 else if (! strcmp (ix86_fpmath_string, "sse"))
1998 {
1999 if (!TARGET_SSE)
2000 {
2001 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2002 ix86_fpmath = FPMATH_387;
2003 }
2004 else
2005 ix86_fpmath = FPMATH_SSE;
2006 }
2007 else if (! strcmp (ix86_fpmath_string, "387,sse")
2008 || ! strcmp (ix86_fpmath_string, "sse,387"))
2009 {
2010 if (!TARGET_SSE)
2011 {
2012 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2013 ix86_fpmath = FPMATH_387;
2014 }
2015 else if (!TARGET_80387)
2016 {
2017 warning (0, "387 instruction set disabled, using SSE arithmetics");
2018 ix86_fpmath = FPMATH_SSE;
2019 }
2020 else
2021 ix86_fpmath = FPMATH_SSE | FPMATH_387;
2022 }
2023 else
2024 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2025 }
2026
2027 /* If the i387 is disabled, then do not return values in it. */
2028 if (!TARGET_80387)
2029 target_flags &= ~MASK_FLOAT_RETURNS;
2030
2031 if ((x86_accumulate_outgoing_args & TUNEMASK)
2032 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2033 && !optimize_size)
2034 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2035
2036 /* ??? Unwind info is not correct around the CFG unless either a frame
2037 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2038 unwind info generation to be aware of the CFG and propagating states
2039 around edges. */
2040 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2041 || flag_exceptions || flag_non_call_exceptions)
2042 && flag_omit_frame_pointer
2043 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2044 {
2045 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2046 warning (0, "unwind tables currently require either a frame pointer "
2047 "or -maccumulate-outgoing-args for correctness");
2048 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2049 }
2050
2051 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2052 {
2053 char *p;
2054 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2055 p = strchr (internal_label_prefix, 'X');
2056 internal_label_prefix_len = p - internal_label_prefix;
2057 *p = '\0';
2058 }
2059
2060 /* When scheduling description is not available, disable scheduler pass
2061 so it won't slow down the compilation and make x87 code slower. */
2062 if (!TARGET_SCHEDULE)
2063 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2064 }
2065 \f
2066 /* switch to the appropriate section for output of DECL.
2067 DECL is either a `VAR_DECL' node or a constant of some sort.
2068 RELOC indicates whether forming the initial value of DECL requires
2069 link-time relocations. */
2070
2071 static section *
2072 x86_64_elf_select_section (tree decl, int reloc,
2073 unsigned HOST_WIDE_INT align)
2074 {
2075 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2076 && ix86_in_large_data_p (decl))
2077 {
2078 const char *sname = NULL;
2079 unsigned int flags = SECTION_WRITE;
2080 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2081 {
2082 case SECCAT_DATA:
2083 sname = ".ldata";
2084 break;
2085 case SECCAT_DATA_REL:
2086 sname = ".ldata.rel";
2087 break;
2088 case SECCAT_DATA_REL_LOCAL:
2089 sname = ".ldata.rel.local";
2090 break;
2091 case SECCAT_DATA_REL_RO:
2092 sname = ".ldata.rel.ro";
2093 break;
2094 case SECCAT_DATA_REL_RO_LOCAL:
2095 sname = ".ldata.rel.ro.local";
2096 break;
2097 case SECCAT_BSS:
2098 sname = ".lbss";
2099 flags |= SECTION_BSS;
2100 break;
2101 case SECCAT_RODATA:
2102 case SECCAT_RODATA_MERGE_STR:
2103 case SECCAT_RODATA_MERGE_STR_INIT:
2104 case SECCAT_RODATA_MERGE_CONST:
2105 sname = ".lrodata";
2106 flags = 0;
2107 break;
2108 case SECCAT_SRODATA:
2109 case SECCAT_SDATA:
2110 case SECCAT_SBSS:
2111 gcc_unreachable ();
2112 case SECCAT_TEXT:
2113 case SECCAT_TDATA:
2114 case SECCAT_TBSS:
2115 /* We don't split these for medium model. Place them into
2116 default sections and hope for best. */
2117 break;
2118 }
2119 if (sname)
2120 {
2121 /* We might get called with string constants, but get_named_section
2122 doesn't like them as they are not DECLs. Also, we need to set
2123 flags in that case. */
2124 if (!DECL_P (decl))
2125 return get_section (sname, flags, NULL);
2126 return get_named_section (decl, sname, reloc);
2127 }
2128 }
2129 return default_elf_select_section (decl, reloc, align);
2130 }
2131
2132 /* Build up a unique section name, expressed as a
2133 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2134 RELOC indicates whether the initial value of EXP requires
2135 link-time relocations. */
2136
2137 static void
2138 x86_64_elf_unique_section (tree decl, int reloc)
2139 {
2140 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2141 && ix86_in_large_data_p (decl))
2142 {
2143 const char *prefix = NULL;
2144 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2145 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2146
2147 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2148 {
2149 case SECCAT_DATA:
2150 case SECCAT_DATA_REL:
2151 case SECCAT_DATA_REL_LOCAL:
2152 case SECCAT_DATA_REL_RO:
2153 case SECCAT_DATA_REL_RO_LOCAL:
2154 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2155 break;
2156 case SECCAT_BSS:
2157 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2158 break;
2159 case SECCAT_RODATA:
2160 case SECCAT_RODATA_MERGE_STR:
2161 case SECCAT_RODATA_MERGE_STR_INIT:
2162 case SECCAT_RODATA_MERGE_CONST:
2163 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2164 break;
2165 case SECCAT_SRODATA:
2166 case SECCAT_SDATA:
2167 case SECCAT_SBSS:
2168 gcc_unreachable ();
2169 case SECCAT_TEXT:
2170 case SECCAT_TDATA:
2171 case SECCAT_TBSS:
2172 /* We don't split these for medium model. Place them into
2173 default sections and hope for best. */
2174 break;
2175 }
2176 if (prefix)
2177 {
2178 const char *name;
2179 size_t nlen, plen;
2180 char *string;
2181 plen = strlen (prefix);
2182
2183 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2184 name = targetm.strip_name_encoding (name);
2185 nlen = strlen (name);
2186
2187 string = alloca (nlen + plen + 1);
2188 memcpy (string, prefix, plen);
2189 memcpy (string + plen, name, nlen + 1);
2190
2191 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2192 return;
2193 }
2194 }
2195 default_unique_section (decl, reloc);
2196 }
2197
2198 #ifdef COMMON_ASM_OP
2199 /* This says how to output assembler code to declare an
2200 uninitialized external linkage data object.
2201
2202 For medium model x86-64 we need to use .largecomm opcode for
2203 large objects. */
2204 void
2205 x86_elf_aligned_common (FILE *file,
2206 const char *name, unsigned HOST_WIDE_INT size,
2207 int align)
2208 {
2209 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2210 && size > (unsigned int)ix86_section_threshold)
2211 fprintf (file, ".largecomm\t");
2212 else
2213 fprintf (file, "%s", COMMON_ASM_OP);
2214 assemble_name (file, name);
2215 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2216 size, align / BITS_PER_UNIT);
2217 }
2218
2219 /* Utility function for targets to use in implementing
2220 ASM_OUTPUT_ALIGNED_BSS. */
2221
2222 void
2223 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2224 const char *name, unsigned HOST_WIDE_INT size,
2225 int align)
2226 {
2227 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2228 && size > (unsigned int)ix86_section_threshold)
2229 switch_to_section (get_named_section (decl, ".lbss", 0));
2230 else
2231 switch_to_section (bss_section);
2232 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2233 #ifdef ASM_DECLARE_OBJECT_NAME
2234 last_assemble_variable_decl = decl;
2235 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2236 #else
2237 /* Standard thing is just output label for the object. */
2238 ASM_OUTPUT_LABEL (file, name);
2239 #endif /* ASM_DECLARE_OBJECT_NAME */
2240 ASM_OUTPUT_SKIP (file, size ? size : 1);
2241 }
2242 #endif
2243 \f
2244 void
2245 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2246 {
2247 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2248 make the problem with not enough registers even worse. */
2249 #ifdef INSN_SCHEDULING
2250 if (level > 1)
2251 flag_schedule_insns = 0;
2252 #endif
2253
2254 if (TARGET_MACHO)
2255 /* The Darwin libraries never set errno, so we might as well
2256 avoid calling them when that's the only reason we would. */
2257 flag_errno_math = 0;
2258
2259 /* The default values of these switches depend on the TARGET_64BIT
2260 that is not known at this moment. Mark these values with 2 and
2261 let user the to override these. In case there is no command line option
2262 specifying them, we will set the defaults in override_options. */
2263 if (optimize >= 1)
2264 flag_omit_frame_pointer = 2;
2265 flag_pcc_struct_return = 2;
2266 flag_asynchronous_unwind_tables = 2;
2267 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2268 SUBTARGET_OPTIMIZATION_OPTIONS;
2269 #endif
2270 }
2271 \f
2272 /* Table of valid machine attributes. */
2273 const struct attribute_spec ix86_attribute_table[] =
2274 {
2275 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2276 /* Stdcall attribute says callee is responsible for popping arguments
2277 if they are not variable. */
2278 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2279 /* Fastcall attribute says callee is responsible for popping arguments
2280 if they are not variable. */
2281 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2282 /* Cdecl attribute says the callee is a normal C declaration */
2283 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2284 /* Regparm attribute specifies how many integer arguments are to be
2285 passed in registers. */
2286 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2287 /* Sseregparm attribute says we are using x86_64 calling conventions
2288 for FP arguments. */
2289 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2290 /* force_align_arg_pointer says this function realigns the stack at entry. */
2291 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2292 false, true, true, ix86_handle_cconv_attribute },
2293 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2294 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2295 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2296 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2297 #endif
2298 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2299 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2300 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2301 SUBTARGET_ATTRIBUTE_TABLE,
2302 #endif
2303 { NULL, 0, 0, false, false, false, NULL }
2304 };
2305
2306 /* Decide whether we can make a sibling call to a function. DECL is the
2307 declaration of the function being targeted by the call and EXP is the
2308 CALL_EXPR representing the call. */
2309
2310 static bool
2311 ix86_function_ok_for_sibcall (tree decl, tree exp)
2312 {
2313 tree func;
2314 rtx a, b;
2315
2316 /* If we are generating position-independent code, we cannot sibcall
2317 optimize any indirect call, or a direct call to a global function,
2318 as the PLT requires %ebx be live. */
2319 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2320 return false;
2321
2322 if (decl)
2323 func = decl;
2324 else
2325 {
2326 func = TREE_TYPE (TREE_OPERAND (exp, 0));
2327 if (POINTER_TYPE_P (func))
2328 func = TREE_TYPE (func);
2329 }
2330
2331 /* Check that the return value locations are the same. Like
2332 if we are returning floats on the 80387 register stack, we cannot
2333 make a sibcall from a function that doesn't return a float to a
2334 function that does or, conversely, from a function that does return
2335 a float to a function that doesn't; the necessary stack adjustment
2336 would not be executed. This is also the place we notice
2337 differences in the return value ABI. Note that it is ok for one
2338 of the functions to have void return type as long as the return
2339 value of the other is passed in a register. */
2340 a = ix86_function_value (TREE_TYPE (exp), func, false);
2341 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2342 cfun->decl, false);
2343 if (STACK_REG_P (a) || STACK_REG_P (b))
2344 {
2345 if (!rtx_equal_p (a, b))
2346 return false;
2347 }
2348 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2349 ;
2350 else if (!rtx_equal_p (a, b))
2351 return false;
2352
2353 /* If this call is indirect, we'll need to be able to use a call-clobbered
2354 register for the address of the target function. Make sure that all
2355 such registers are not used for passing parameters. */
2356 if (!decl && !TARGET_64BIT)
2357 {
2358 tree type;
2359
2360 /* We're looking at the CALL_EXPR, we need the type of the function. */
2361 type = TREE_OPERAND (exp, 0); /* pointer expression */
2362 type = TREE_TYPE (type); /* pointer type */
2363 type = TREE_TYPE (type); /* function type */
2364
2365 if (ix86_function_regparm (type, NULL) >= 3)
2366 {
2367 /* ??? Need to count the actual number of registers to be used,
2368 not the possible number of registers. Fix later. */
2369 return false;
2370 }
2371 }
2372
2373 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2374 /* Dllimport'd functions are also called indirectly. */
2375 if (decl && DECL_DLLIMPORT_P (decl)
2376 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2377 return false;
2378 #endif
2379
2380 /* If we forced aligned the stack, then sibcalling would unalign the
2381 stack, which may break the called function. */
2382 if (cfun->machine->force_align_arg_pointer)
2383 return false;
2384
2385 /* Otherwise okay. That also includes certain types of indirect calls. */
2386 return true;
2387 }
2388
2389 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2390 calling convention attributes;
2391 arguments as in struct attribute_spec.handler. */
2392
2393 static tree
2394 ix86_handle_cconv_attribute (tree *node, tree name,
2395 tree args,
2396 int flags ATTRIBUTE_UNUSED,
2397 bool *no_add_attrs)
2398 {
2399 if (TREE_CODE (*node) != FUNCTION_TYPE
2400 && TREE_CODE (*node) != METHOD_TYPE
2401 && TREE_CODE (*node) != FIELD_DECL
2402 && TREE_CODE (*node) != TYPE_DECL)
2403 {
2404 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2405 IDENTIFIER_POINTER (name));
2406 *no_add_attrs = true;
2407 return NULL_TREE;
2408 }
2409
2410 /* Can combine regparm with all attributes but fastcall. */
2411 if (is_attribute_p ("regparm", name))
2412 {
2413 tree cst;
2414
2415 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2416 {
2417 error ("fastcall and regparm attributes are not compatible");
2418 }
2419
2420 cst = TREE_VALUE (args);
2421 if (TREE_CODE (cst) != INTEGER_CST)
2422 {
2423 warning (OPT_Wattributes,
2424 "%qs attribute requires an integer constant argument",
2425 IDENTIFIER_POINTER (name));
2426 *no_add_attrs = true;
2427 }
2428 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2429 {
2430 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2431 IDENTIFIER_POINTER (name), REGPARM_MAX);
2432 *no_add_attrs = true;
2433 }
2434
2435 if (!TARGET_64BIT
2436 && lookup_attribute (ix86_force_align_arg_pointer_string,
2437 TYPE_ATTRIBUTES (*node))
2438 && compare_tree_int (cst, REGPARM_MAX-1))
2439 {
2440 error ("%s functions limited to %d register parameters",
2441 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2442 }
2443
2444 return NULL_TREE;
2445 }
2446
2447 if (TARGET_64BIT)
2448 {
2449 warning (OPT_Wattributes, "%qs attribute ignored",
2450 IDENTIFIER_POINTER (name));
2451 *no_add_attrs = true;
2452 return NULL_TREE;
2453 }
2454
2455 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2456 if (is_attribute_p ("fastcall", name))
2457 {
2458 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2459 {
2460 error ("fastcall and cdecl attributes are not compatible");
2461 }
2462 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2463 {
2464 error ("fastcall and stdcall attributes are not compatible");
2465 }
2466 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2467 {
2468 error ("fastcall and regparm attributes are not compatible");
2469 }
2470 }
2471
2472 /* Can combine stdcall with fastcall (redundant), regparm and
2473 sseregparm. */
2474 else if (is_attribute_p ("stdcall", name))
2475 {
2476 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2477 {
2478 error ("stdcall and cdecl attributes are not compatible");
2479 }
2480 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2481 {
2482 error ("stdcall and fastcall attributes are not compatible");
2483 }
2484 }
2485
2486 /* Can combine cdecl with regparm and sseregparm. */
2487 else if (is_attribute_p ("cdecl", name))
2488 {
2489 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2490 {
2491 error ("stdcall and cdecl attributes are not compatible");
2492 }
2493 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2494 {
2495 error ("fastcall and cdecl attributes are not compatible");
2496 }
2497 }
2498
2499 /* Can combine sseregparm with all attributes. */
2500
2501 return NULL_TREE;
2502 }
2503
2504 /* Return 0 if the attributes for two types are incompatible, 1 if they
2505 are compatible, and 2 if they are nearly compatible (which causes a
2506 warning to be generated). */
2507
2508 static int
2509 ix86_comp_type_attributes (tree type1, tree type2)
2510 {
2511 /* Check for mismatch of non-default calling convention. */
2512 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2513
2514 if (TREE_CODE (type1) != FUNCTION_TYPE)
2515 return 1;
2516
2517 /* Check for mismatched fastcall/regparm types. */
2518 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2519 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2520 || (ix86_function_regparm (type1, NULL)
2521 != ix86_function_regparm (type2, NULL)))
2522 return 0;
2523
2524 /* Check for mismatched sseregparm types. */
2525 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2526 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2527 return 0;
2528
2529 /* Check for mismatched return types (cdecl vs stdcall). */
2530 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2531 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2532 return 0;
2533
2534 return 1;
2535 }
2536 \f
2537 /* Return the regparm value for a function with the indicated TYPE and DECL.
2538 DECL may be NULL when calling function indirectly
2539 or considering a libcall. */
2540
2541 static int
2542 ix86_function_regparm (tree type, tree decl)
2543 {
2544 tree attr;
2545 int regparm = ix86_regparm;
2546 bool user_convention = false;
2547
2548 if (!TARGET_64BIT)
2549 {
2550 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2551 if (attr)
2552 {
2553 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2554 user_convention = true;
2555 }
2556
2557 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2558 {
2559 regparm = 2;
2560 user_convention = true;
2561 }
2562
2563 /* Use register calling convention for local functions when possible. */
2564 if (!TARGET_64BIT && !user_convention && decl
2565 && flag_unit_at_a_time && !profile_flag)
2566 {
2567 struct cgraph_local_info *i = cgraph_local_info (decl);
2568 if (i && i->local)
2569 {
2570 int local_regparm, globals = 0, regno;
2571
2572 /* Make sure no regparm register is taken by a global register
2573 variable. */
2574 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2575 if (global_regs[local_regparm])
2576 break;
2577 /* We can't use regparm(3) for nested functions as these use
2578 static chain pointer in third argument. */
2579 if (local_regparm == 3
2580 && decl_function_context (decl)
2581 && !DECL_NO_STATIC_CHAIN (decl))
2582 local_regparm = 2;
2583 /* If the function realigns its stackpointer, the
2584 prologue will clobber %ecx. If we've already
2585 generated code for the callee, the callee
2586 DECL_STRUCT_FUNCTION is gone, so we fall back to
2587 scanning the attributes for the self-realigning
2588 property. */
2589 if ((DECL_STRUCT_FUNCTION (decl)
2590 && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
2591 || (!DECL_STRUCT_FUNCTION (decl)
2592 && lookup_attribute (ix86_force_align_arg_pointer_string,
2593 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2594 local_regparm = 2;
2595 /* Each global register variable increases register preassure,
2596 so the more global reg vars there are, the smaller regparm
2597 optimization use, unless requested by the user explicitly. */
2598 for (regno = 0; regno < 6; regno++)
2599 if (global_regs[regno])
2600 globals++;
2601 local_regparm
2602 = globals < local_regparm ? local_regparm - globals : 0;
2603
2604 if (local_regparm > regparm)
2605 regparm = local_regparm;
2606 }
2607 }
2608 }
2609 return regparm;
2610 }
2611
2612 /* Return 1 or 2, if we can pass up to 8 SFmode (1) and DFmode (2) arguments
2613 in SSE registers for a function with the indicated TYPE and DECL.
2614 DECL may be NULL when calling function indirectly
2615 or considering a libcall. Otherwise return 0. */
2616
2617 static int
2618 ix86_function_sseregparm (tree type, tree decl)
2619 {
2620 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2621 by the sseregparm attribute. */
2622 if (TARGET_SSEREGPARM
2623 || (type
2624 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2625 {
2626 if (!TARGET_SSE)
2627 {
2628 if (decl)
2629 error ("Calling %qD with attribute sseregparm without "
2630 "SSE/SSE2 enabled", decl);
2631 else
2632 error ("Calling %qT with attribute sseregparm without "
2633 "SSE/SSE2 enabled", type);
2634 return 0;
2635 }
2636
2637 return 2;
2638 }
2639
2640 /* For local functions, pass SFmode (and DFmode for SSE2) arguments
2641 in SSE registers even for 32-bit mode and not just 3, but up to
2642 8 SSE arguments in registers. */
2643 if (!TARGET_64BIT && decl
2644 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2645 {
2646 struct cgraph_local_info *i = cgraph_local_info (decl);
2647 if (i && i->local)
2648 return TARGET_SSE2 ? 2 : 1;
2649 }
2650
2651 return 0;
2652 }
2653
2654 /* Return true if EAX is live at the start of the function. Used by
2655 ix86_expand_prologue to determine if we need special help before
2656 calling allocate_stack_worker. */
2657
2658 static bool
2659 ix86_eax_live_at_start_p (void)
2660 {
2661 /* Cheat. Don't bother working forward from ix86_function_regparm
2662 to the function type to whether an actual argument is located in
2663 eax. Instead just look at cfg info, which is still close enough
2664 to correct at this point. This gives false positives for broken
2665 functions that might use uninitialized data that happens to be
2666 allocated in eax, but who cares? */
2667 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2668 }
2669
2670 /* Value is the number of bytes of arguments automatically
2671 popped when returning from a subroutine call.
2672 FUNDECL is the declaration node of the function (as a tree),
2673 FUNTYPE is the data type of the function (as a tree),
2674 or for a library call it is an identifier node for the subroutine name.
2675 SIZE is the number of bytes of arguments passed on the stack.
2676
2677 On the 80386, the RTD insn may be used to pop them if the number
2678 of args is fixed, but if the number is variable then the caller
2679 must pop them all. RTD can't be used for library calls now
2680 because the library is compiled with the Unix compiler.
2681 Use of RTD is a selectable option, since it is incompatible with
2682 standard Unix calling sequences. If the option is not selected,
2683 the caller must always pop the args.
2684
2685 The attribute stdcall is equivalent to RTD on a per module basis. */
2686
2687 int
2688 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2689 {
2690 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2691
2692 /* Cdecl functions override -mrtd, and never pop the stack. */
2693 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2694
2695 /* Stdcall and fastcall functions will pop the stack if not
2696 variable args. */
2697 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2698 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2699 rtd = 1;
2700
2701 if (rtd
2702 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2703 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2704 == void_type_node)))
2705 return size;
2706 }
2707
2708 /* Lose any fake structure return argument if it is passed on the stack. */
2709 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2710 && !TARGET_64BIT
2711 && !KEEP_AGGREGATE_RETURN_POINTER)
2712 {
2713 int nregs = ix86_function_regparm (funtype, fundecl);
2714
2715 if (!nregs)
2716 return GET_MODE_SIZE (Pmode);
2717 }
2718
2719 return 0;
2720 }
2721 \f
2722 /* Argument support functions. */
2723
2724 /* Return true when register may be used to pass function parameters. */
2725 bool
2726 ix86_function_arg_regno_p (int regno)
2727 {
2728 int i;
2729 if (!TARGET_64BIT)
2730 return (regno < REGPARM_MAX
2731 || (TARGET_MMX && MMX_REGNO_P (regno)
2732 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2733 || (TARGET_SSE && SSE_REGNO_P (regno)
2734 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2735
2736 if (TARGET_SSE && SSE_REGNO_P (regno)
2737 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2738 return true;
2739 /* RAX is used as hidden argument to va_arg functions. */
2740 if (!regno)
2741 return true;
2742 for (i = 0; i < REGPARM_MAX; i++)
2743 if (regno == x86_64_int_parameter_registers[i])
2744 return true;
2745 return false;
2746 }
2747
2748 /* Return if we do not know how to pass TYPE solely in registers. */
2749
2750 static bool
2751 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2752 {
2753 if (must_pass_in_stack_var_size_or_pad (mode, type))
2754 return true;
2755
2756 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2757 The layout_type routine is crafty and tries to trick us into passing
2758 currently unsupported vector types on the stack by using TImode. */
2759 return (!TARGET_64BIT && mode == TImode
2760 && type && TREE_CODE (type) != VECTOR_TYPE);
2761 }
2762
2763 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2764 for a call to a function whose data type is FNTYPE.
2765 For a library call, FNTYPE is 0. */
2766
2767 void
2768 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
2769 tree fntype, /* tree ptr for function decl */
2770 rtx libname, /* SYMBOL_REF of library name or 0 */
2771 tree fndecl)
2772 {
2773 static CUMULATIVE_ARGS zero_cum;
2774 tree param, next_param;
2775
2776 if (TARGET_DEBUG_ARG)
2777 {
2778 fprintf (stderr, "\ninit_cumulative_args (");
2779 if (fntype)
2780 fprintf (stderr, "fntype code = %s, ret code = %s",
2781 tree_code_name[(int) TREE_CODE (fntype)],
2782 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
2783 else
2784 fprintf (stderr, "no fntype");
2785
2786 if (libname)
2787 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
2788 }
2789
2790 *cum = zero_cum;
2791
2792 /* Set up the number of registers to use for passing arguments. */
2793 cum->nregs = ix86_regparm;
2794 if (TARGET_SSE)
2795 cum->sse_nregs = SSE_REGPARM_MAX;
2796 if (TARGET_MMX)
2797 cum->mmx_nregs = MMX_REGPARM_MAX;
2798 cum->warn_sse = true;
2799 cum->warn_mmx = true;
2800 cum->maybe_vaarg = false;
2801
2802 /* Use ecx and edx registers if function has fastcall attribute,
2803 else look for regparm information. */
2804 if (fntype && !TARGET_64BIT)
2805 {
2806 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2807 {
2808 cum->nregs = 2;
2809 cum->fastcall = 1;
2810 }
2811 else
2812 cum->nregs = ix86_function_regparm (fntype, fndecl);
2813 }
2814
2815 /* Set up the number of SSE registers used for passing SFmode
2816 and DFmode arguments. Warn for mismatching ABI. */
2817 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
2818
2819 /* Determine if this function has variable arguments. This is
2820 indicated by the last argument being 'void_type_mode' if there
2821 are no variable arguments. If there are variable arguments, then
2822 we won't pass anything in registers in 32-bit mode. */
2823
2824 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2825 {
2826 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2827 param != 0; param = next_param)
2828 {
2829 next_param = TREE_CHAIN (param);
2830 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2831 {
2832 if (!TARGET_64BIT)
2833 {
2834 cum->nregs = 0;
2835 cum->sse_nregs = 0;
2836 cum->mmx_nregs = 0;
2837 cum->warn_sse = 0;
2838 cum->warn_mmx = 0;
2839 cum->fastcall = 0;
2840 cum->float_in_sse = 0;
2841 }
2842 cum->maybe_vaarg = true;
2843 }
2844 }
2845 }
2846 if ((!fntype && !libname)
2847 || (fntype && !TYPE_ARG_TYPES (fntype)))
2848 cum->maybe_vaarg = true;
2849
2850 if (TARGET_DEBUG_ARG)
2851 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2852
2853 return;
2854 }
2855
2856 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2857 But in the case of vector types, it is some vector mode.
2858
2859 When we have only some of our vector isa extensions enabled, then there
2860 are some modes for which vector_mode_supported_p is false. For these
2861 modes, the generic vector support in gcc will choose some non-vector mode
2862 in order to implement the type. By computing the natural mode, we'll
2863 select the proper ABI location for the operand and not depend on whatever
2864 the middle-end decides to do with these vector types. */
2865
2866 static enum machine_mode
2867 type_natural_mode (tree type)
2868 {
2869 enum machine_mode mode = TYPE_MODE (type);
2870
2871 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2872 {
2873 HOST_WIDE_INT size = int_size_in_bytes (type);
2874 if ((size == 8 || size == 16)
2875 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2876 && TYPE_VECTOR_SUBPARTS (type) > 1)
2877 {
2878 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2879
2880 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2881 mode = MIN_MODE_VECTOR_FLOAT;
2882 else
2883 mode = MIN_MODE_VECTOR_INT;
2884
2885 /* Get the mode which has this inner mode and number of units. */
2886 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2887 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2888 && GET_MODE_INNER (mode) == innermode)
2889 return mode;
2890
2891 gcc_unreachable ();
2892 }
2893 }
2894
2895 return mode;
2896 }
2897
2898 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2899 this may not agree with the mode that the type system has chosen for the
2900 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2901 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2902
2903 static rtx
2904 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2905 unsigned int regno)
2906 {
2907 rtx tmp;
2908
2909 if (orig_mode != BLKmode)
2910 tmp = gen_rtx_REG (orig_mode, regno);
2911 else
2912 {
2913 tmp = gen_rtx_REG (mode, regno);
2914 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2915 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2916 }
2917
2918 return tmp;
2919 }
2920
2921 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2922 of this code is to classify each 8bytes of incoming argument by the register
2923 class and assign registers accordingly. */
2924
2925 /* Return the union class of CLASS1 and CLASS2.
2926 See the x86-64 PS ABI for details. */
2927
2928 static enum x86_64_reg_class
2929 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2930 {
2931 /* Rule #1: If both classes are equal, this is the resulting class. */
2932 if (class1 == class2)
2933 return class1;
2934
2935 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2936 the other class. */
2937 if (class1 == X86_64_NO_CLASS)
2938 return class2;
2939 if (class2 == X86_64_NO_CLASS)
2940 return class1;
2941
2942 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2943 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2944 return X86_64_MEMORY_CLASS;
2945
2946 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2947 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2948 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2949 return X86_64_INTEGERSI_CLASS;
2950 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2951 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2952 return X86_64_INTEGER_CLASS;
2953
2954 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2955 MEMORY is used. */
2956 if (class1 == X86_64_X87_CLASS
2957 || class1 == X86_64_X87UP_CLASS
2958 || class1 == X86_64_COMPLEX_X87_CLASS
2959 || class2 == X86_64_X87_CLASS
2960 || class2 == X86_64_X87UP_CLASS
2961 || class2 == X86_64_COMPLEX_X87_CLASS)
2962 return X86_64_MEMORY_CLASS;
2963
2964 /* Rule #6: Otherwise class SSE is used. */
2965 return X86_64_SSE_CLASS;
2966 }
2967
2968 /* Classify the argument of type TYPE and mode MODE.
2969 CLASSES will be filled by the register class used to pass each word
2970 of the operand. The number of words is returned. In case the parameter
2971 should be passed in memory, 0 is returned. As a special case for zero
2972 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2973
2974 BIT_OFFSET is used internally for handling records and specifies offset
2975 of the offset in bits modulo 256 to avoid overflow cases.
2976
2977 See the x86-64 PS ABI for details.
2978 */
2979
2980 static int
2981 classify_argument (enum machine_mode mode, tree type,
2982 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2983 {
2984 HOST_WIDE_INT bytes =
2985 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2986 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2987
2988 /* Variable sized entities are always passed/returned in memory. */
2989 if (bytes < 0)
2990 return 0;
2991
2992 if (mode != VOIDmode
2993 && targetm.calls.must_pass_in_stack (mode, type))
2994 return 0;
2995
2996 if (type && AGGREGATE_TYPE_P (type))
2997 {
2998 int i;
2999 tree field;
3000 enum x86_64_reg_class subclasses[MAX_CLASSES];
3001
3002 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3003 if (bytes > 16)
3004 return 0;
3005
3006 for (i = 0; i < words; i++)
3007 classes[i] = X86_64_NO_CLASS;
3008
3009 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3010 signalize memory class, so handle it as special case. */
3011 if (!words)
3012 {
3013 classes[0] = X86_64_NO_CLASS;
3014 return 1;
3015 }
3016
3017 /* Classify each field of record and merge classes. */
3018 switch (TREE_CODE (type))
3019 {
3020 case RECORD_TYPE:
3021 /* And now merge the fields of structure. */
3022 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3023 {
3024 if (TREE_CODE (field) == FIELD_DECL)
3025 {
3026 int num;
3027
3028 if (TREE_TYPE (field) == error_mark_node)
3029 continue;
3030
3031 /* Bitfields are always classified as integer. Handle them
3032 early, since later code would consider them to be
3033 misaligned integers. */
3034 if (DECL_BIT_FIELD (field))
3035 {
3036 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3037 i < ((int_bit_position (field) + (bit_offset % 64))
3038 + tree_low_cst (DECL_SIZE (field), 0)
3039 + 63) / 8 / 8; i++)
3040 classes[i] =
3041 merge_classes (X86_64_INTEGER_CLASS,
3042 classes[i]);
3043 }
3044 else
3045 {
3046 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3047 TREE_TYPE (field), subclasses,
3048 (int_bit_position (field)
3049 + bit_offset) % 256);
3050 if (!num)
3051 return 0;
3052 for (i = 0; i < num; i++)
3053 {
3054 int pos =
3055 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3056 classes[i + pos] =
3057 merge_classes (subclasses[i], classes[i + pos]);
3058 }
3059 }
3060 }
3061 }
3062 break;
3063
3064 case ARRAY_TYPE:
3065 /* Arrays are handled as small records. */
3066 {
3067 int num;
3068 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3069 TREE_TYPE (type), subclasses, bit_offset);
3070 if (!num)
3071 return 0;
3072
3073 /* The partial classes are now full classes. */
3074 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3075 subclasses[0] = X86_64_SSE_CLASS;
3076 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3077 subclasses[0] = X86_64_INTEGER_CLASS;
3078
3079 for (i = 0; i < words; i++)
3080 classes[i] = subclasses[i % num];
3081
3082 break;
3083 }
3084 case UNION_TYPE:
3085 case QUAL_UNION_TYPE:
3086 /* Unions are similar to RECORD_TYPE but offset is always 0.
3087 */
3088 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3089 {
3090 if (TREE_CODE (field) == FIELD_DECL)
3091 {
3092 int num;
3093
3094 if (TREE_TYPE (field) == error_mark_node)
3095 continue;
3096
3097 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3098 TREE_TYPE (field), subclasses,
3099 bit_offset);
3100 if (!num)
3101 return 0;
3102 for (i = 0; i < num; i++)
3103 classes[i] = merge_classes (subclasses[i], classes[i]);
3104 }
3105 }
3106 break;
3107
3108 default:
3109 gcc_unreachable ();
3110 }
3111
3112 /* Final merger cleanup. */
3113 for (i = 0; i < words; i++)
3114 {
3115 /* If one class is MEMORY, everything should be passed in
3116 memory. */
3117 if (classes[i] == X86_64_MEMORY_CLASS)
3118 return 0;
3119
3120 /* The X86_64_SSEUP_CLASS should be always preceded by
3121 X86_64_SSE_CLASS. */
3122 if (classes[i] == X86_64_SSEUP_CLASS
3123 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3124 classes[i] = X86_64_SSE_CLASS;
3125
3126 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3127 if (classes[i] == X86_64_X87UP_CLASS
3128 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3129 classes[i] = X86_64_SSE_CLASS;
3130 }
3131 return words;
3132 }
3133
3134 /* Compute alignment needed. We align all types to natural boundaries with
3135 exception of XFmode that is aligned to 64bits. */
3136 if (mode != VOIDmode && mode != BLKmode)
3137 {
3138 int mode_alignment = GET_MODE_BITSIZE (mode);
3139
3140 if (mode == XFmode)
3141 mode_alignment = 128;
3142 else if (mode == XCmode)
3143 mode_alignment = 256;
3144 if (COMPLEX_MODE_P (mode))
3145 mode_alignment /= 2;
3146 /* Misaligned fields are always returned in memory. */
3147 if (bit_offset % mode_alignment)
3148 return 0;
3149 }
3150
3151 /* for V1xx modes, just use the base mode */
3152 if (VECTOR_MODE_P (mode)
3153 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3154 mode = GET_MODE_INNER (mode);
3155
3156 /* Classification of atomic types. */
3157 switch (mode)
3158 {
3159 case SDmode:
3160 case DDmode:
3161 classes[0] = X86_64_SSE_CLASS;
3162 return 1;
3163 case TDmode:
3164 classes[0] = X86_64_SSE_CLASS;
3165 classes[1] = X86_64_SSEUP_CLASS;
3166 return 2;
3167 case DImode:
3168 case SImode:
3169 case HImode:
3170 case QImode:
3171 case CSImode:
3172 case CHImode:
3173 case CQImode:
3174 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3175 classes[0] = X86_64_INTEGERSI_CLASS;
3176 else
3177 classes[0] = X86_64_INTEGER_CLASS;
3178 return 1;
3179 case CDImode:
3180 case TImode:
3181 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3182 return 2;
3183 case CTImode:
3184 return 0;
3185 case SFmode:
3186 if (!(bit_offset % 64))
3187 classes[0] = X86_64_SSESF_CLASS;
3188 else
3189 classes[0] = X86_64_SSE_CLASS;
3190 return 1;
3191 case DFmode:
3192 classes[0] = X86_64_SSEDF_CLASS;
3193 return 1;
3194 case XFmode:
3195 classes[0] = X86_64_X87_CLASS;
3196 classes[1] = X86_64_X87UP_CLASS;
3197 return 2;
3198 case TFmode:
3199 classes[0] = X86_64_SSE_CLASS;
3200 classes[1] = X86_64_SSEUP_CLASS;
3201 return 2;
3202 case SCmode:
3203 classes[0] = X86_64_SSE_CLASS;
3204 return 1;
3205 case DCmode:
3206 classes[0] = X86_64_SSEDF_CLASS;
3207 classes[1] = X86_64_SSEDF_CLASS;
3208 return 2;
3209 case XCmode:
3210 classes[0] = X86_64_COMPLEX_X87_CLASS;
3211 return 1;
3212 case TCmode:
3213 /* This modes is larger than 16 bytes. */
3214 return 0;
3215 case V4SFmode:
3216 case V4SImode:
3217 case V16QImode:
3218 case V8HImode:
3219 case V2DFmode:
3220 case V2DImode:
3221 classes[0] = X86_64_SSE_CLASS;
3222 classes[1] = X86_64_SSEUP_CLASS;
3223 return 2;
3224 case V2SFmode:
3225 case V2SImode:
3226 case V4HImode:
3227 case V8QImode:
3228 classes[0] = X86_64_SSE_CLASS;
3229 return 1;
3230 case BLKmode:
3231 case VOIDmode:
3232 return 0;
3233 default:
3234 gcc_assert (VECTOR_MODE_P (mode));
3235
3236 if (bytes > 16)
3237 return 0;
3238
3239 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3240
3241 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3242 classes[0] = X86_64_INTEGERSI_CLASS;
3243 else
3244 classes[0] = X86_64_INTEGER_CLASS;
3245 classes[1] = X86_64_INTEGER_CLASS;
3246 return 1 + (bytes > 8);
3247 }
3248 }
3249
3250 /* Examine the argument and return set number of register required in each
3251 class. Return 0 iff parameter should be passed in memory. */
3252 static int
3253 examine_argument (enum machine_mode mode, tree type, int in_return,
3254 int *int_nregs, int *sse_nregs)
3255 {
3256 enum x86_64_reg_class class[MAX_CLASSES];
3257 int n = classify_argument (mode, type, class, 0);
3258
3259 *int_nregs = 0;
3260 *sse_nregs = 0;
3261 if (!n)
3262 return 0;
3263 for (n--; n >= 0; n--)
3264 switch (class[n])
3265 {
3266 case X86_64_INTEGER_CLASS:
3267 case X86_64_INTEGERSI_CLASS:
3268 (*int_nregs)++;
3269 break;
3270 case X86_64_SSE_CLASS:
3271 case X86_64_SSESF_CLASS:
3272 case X86_64_SSEDF_CLASS:
3273 (*sse_nregs)++;
3274 break;
3275 case X86_64_NO_CLASS:
3276 case X86_64_SSEUP_CLASS:
3277 break;
3278 case X86_64_X87_CLASS:
3279 case X86_64_X87UP_CLASS:
3280 if (!in_return)
3281 return 0;
3282 break;
3283 case X86_64_COMPLEX_X87_CLASS:
3284 return in_return ? 2 : 0;
3285 case X86_64_MEMORY_CLASS:
3286 gcc_unreachable ();
3287 }
3288 return 1;
3289 }
3290
3291 /* Construct container for the argument used by GCC interface. See
3292 FUNCTION_ARG for the detailed description. */
3293
3294 static rtx
3295 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3296 tree type, int in_return, int nintregs, int nsseregs,
3297 const int *intreg, int sse_regno)
3298 {
3299 /* The following variables hold the static issued_error state. */
3300 static bool issued_sse_arg_error;
3301 static bool issued_sse_ret_error;
3302 static bool issued_x87_ret_error;
3303
3304 enum machine_mode tmpmode;
3305 int bytes =
3306 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3307 enum x86_64_reg_class class[MAX_CLASSES];
3308 int n;
3309 int i;
3310 int nexps = 0;
3311 int needed_sseregs, needed_intregs;
3312 rtx exp[MAX_CLASSES];
3313 rtx ret;
3314
3315 n = classify_argument (mode, type, class, 0);
3316 if (TARGET_DEBUG_ARG)
3317 {
3318 if (!n)
3319 fprintf (stderr, "Memory class\n");
3320 else
3321 {
3322 fprintf (stderr, "Classes:");
3323 for (i = 0; i < n; i++)
3324 {
3325 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3326 }
3327 fprintf (stderr, "\n");
3328 }
3329 }
3330 if (!n)
3331 return NULL;
3332 if (!examine_argument (mode, type, in_return, &needed_intregs,
3333 &needed_sseregs))
3334 return NULL;
3335 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3336 return NULL;
3337
3338 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3339 some less clueful developer tries to use floating-point anyway. */
3340 if (needed_sseregs && !TARGET_SSE)
3341 {
3342 if (in_return)
3343 {
3344 if (!issued_sse_ret_error)
3345 {
3346 error ("SSE register return with SSE disabled");
3347 issued_sse_ret_error = true;
3348 }
3349 }
3350 else if (!issued_sse_arg_error)
3351 {
3352 error ("SSE register argument with SSE disabled");
3353 issued_sse_arg_error = true;
3354 }
3355 return NULL;
3356 }
3357
3358 /* Likewise, error if the ABI requires us to return values in the
3359 x87 registers and the user specified -mno-80387. */
3360 if (!TARGET_80387 && in_return)
3361 for (i = 0; i < n; i++)
3362 if (class[i] == X86_64_X87_CLASS
3363 || class[i] == X86_64_X87UP_CLASS
3364 || class[i] == X86_64_COMPLEX_X87_CLASS)
3365 {
3366 if (!issued_x87_ret_error)
3367 {
3368 error ("x87 register return with x87 disabled");
3369 issued_x87_ret_error = true;
3370 }
3371 return NULL;
3372 }
3373
3374 /* First construct simple cases. Avoid SCmode, since we want to use
3375 single register to pass this type. */
3376 if (n == 1 && mode != SCmode)
3377 switch (class[0])
3378 {
3379 case X86_64_INTEGER_CLASS:
3380 case X86_64_INTEGERSI_CLASS:
3381 return gen_rtx_REG (mode, intreg[0]);
3382 case X86_64_SSE_CLASS:
3383 case X86_64_SSESF_CLASS:
3384 case X86_64_SSEDF_CLASS:
3385 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3386 case X86_64_X87_CLASS:
3387 case X86_64_COMPLEX_X87_CLASS:
3388 return gen_rtx_REG (mode, FIRST_STACK_REG);
3389 case X86_64_NO_CLASS:
3390 /* Zero sized array, struct or class. */
3391 return NULL;
3392 default:
3393 gcc_unreachable ();
3394 }
3395 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3396 && mode != BLKmode)
3397 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3398 if (n == 2
3399 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3400 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3401 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3402 && class[1] == X86_64_INTEGER_CLASS
3403 && (mode == CDImode || mode == TImode || mode == TFmode)
3404 && intreg[0] + 1 == intreg[1])
3405 return gen_rtx_REG (mode, intreg[0]);
3406
3407 /* Otherwise figure out the entries of the PARALLEL. */
3408 for (i = 0; i < n; i++)
3409 {
3410 switch (class[i])
3411 {
3412 case X86_64_NO_CLASS:
3413 break;
3414 case X86_64_INTEGER_CLASS:
3415 case X86_64_INTEGERSI_CLASS:
3416 /* Merge TImodes on aligned occasions here too. */
3417 if (i * 8 + 8 > bytes)
3418 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3419 else if (class[i] == X86_64_INTEGERSI_CLASS)
3420 tmpmode = SImode;
3421 else
3422 tmpmode = DImode;
3423 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3424 if (tmpmode == BLKmode)
3425 tmpmode = DImode;
3426 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3427 gen_rtx_REG (tmpmode, *intreg),
3428 GEN_INT (i*8));
3429 intreg++;
3430 break;
3431 case X86_64_SSESF_CLASS:
3432 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3433 gen_rtx_REG (SFmode,
3434 SSE_REGNO (sse_regno)),
3435 GEN_INT (i*8));
3436 sse_regno++;
3437 break;
3438 case X86_64_SSEDF_CLASS:
3439 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3440 gen_rtx_REG (DFmode,
3441 SSE_REGNO (sse_regno)),
3442 GEN_INT (i*8));
3443 sse_regno++;
3444 break;
3445 case X86_64_SSE_CLASS:
3446 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3447 tmpmode = TImode;
3448 else
3449 tmpmode = DImode;
3450 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3451 gen_rtx_REG (tmpmode,
3452 SSE_REGNO (sse_regno)),
3453 GEN_INT (i*8));
3454 if (tmpmode == TImode)
3455 i++;
3456 sse_regno++;
3457 break;
3458 default:
3459 gcc_unreachable ();
3460 }
3461 }
3462
3463 /* Empty aligned struct, union or class. */
3464 if (nexps == 0)
3465 return NULL;
3466
3467 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3468 for (i = 0; i < nexps; i++)
3469 XVECEXP (ret, 0, i) = exp [i];
3470 return ret;
3471 }
3472
3473 /* Update the data in CUM to advance over an argument
3474 of mode MODE and data type TYPE.
3475 (TYPE is null for libcalls where that information may not be available.) */
3476
3477 void
3478 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3479 tree type, int named)
3480 {
3481 int bytes =
3482 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3483 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3484
3485 if (type)
3486 mode = type_natural_mode (type);
3487
3488 if (TARGET_DEBUG_ARG)
3489 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3490 "mode=%s, named=%d)\n\n",
3491 words, cum->words, cum->nregs, cum->sse_nregs,
3492 GET_MODE_NAME (mode), named);
3493
3494 if (TARGET_64BIT)
3495 {
3496 int int_nregs, sse_nregs;
3497 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3498 cum->words += words;
3499 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3500 {
3501 cum->nregs -= int_nregs;
3502 cum->sse_nregs -= sse_nregs;
3503 cum->regno += int_nregs;
3504 cum->sse_regno += sse_nregs;
3505 }
3506 else
3507 cum->words += words;
3508 }
3509 else
3510 {
3511 switch (mode)
3512 {
3513 default:
3514 break;
3515
3516 case BLKmode:
3517 if (bytes < 0)
3518 break;
3519 /* FALLTHRU */
3520
3521 case DImode:
3522 case SImode:
3523 case HImode:
3524 case QImode:
3525 cum->words += words;
3526 cum->nregs -= words;
3527 cum->regno += words;
3528
3529 if (cum->nregs <= 0)
3530 {
3531 cum->nregs = 0;
3532 cum->regno = 0;
3533 }
3534 break;
3535
3536 case DFmode:
3537 if (cum->float_in_sse < 2)
3538 break;
3539 case SFmode:
3540 if (cum->float_in_sse < 1)
3541 break;
3542 /* FALLTHRU */
3543
3544 case TImode:
3545 case V16QImode:
3546 case V8HImode:
3547 case V4SImode:
3548 case V2DImode:
3549 case V4SFmode:
3550 case V2DFmode:
3551 if (!type || !AGGREGATE_TYPE_P (type))
3552 {
3553 cum->sse_words += words;
3554 cum->sse_nregs -= 1;
3555 cum->sse_regno += 1;
3556 if (cum->sse_nregs <= 0)
3557 {
3558 cum->sse_nregs = 0;
3559 cum->sse_regno = 0;
3560 }
3561 }
3562 break;
3563
3564 case V8QImode:
3565 case V4HImode:
3566 case V2SImode:
3567 case V2SFmode:
3568 if (!type || !AGGREGATE_TYPE_P (type))
3569 {
3570 cum->mmx_words += words;
3571 cum->mmx_nregs -= 1;
3572 cum->mmx_regno += 1;
3573 if (cum->mmx_nregs <= 0)
3574 {
3575 cum->mmx_nregs = 0;
3576 cum->mmx_regno = 0;
3577 }
3578 }
3579 break;
3580 }
3581 }
3582 }
3583
3584 /* Define where to put the arguments to a function.
3585 Value is zero to push the argument on the stack,
3586 or a hard register in which to store the argument.
3587
3588 MODE is the argument's machine mode.
3589 TYPE is the data type of the argument (as a tree).
3590 This is null for libcalls where that information may
3591 not be available.
3592 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3593 the preceding args and about the function being called.
3594 NAMED is nonzero if this argument is a named parameter
3595 (otherwise it is an extra parameter matching an ellipsis). */
3596
3597 rtx
3598 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3599 tree type, int named)
3600 {
3601 enum machine_mode mode = orig_mode;
3602 rtx ret = NULL_RTX;
3603 int bytes =
3604 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3605 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3606 static bool warnedsse, warnedmmx;
3607
3608 /* To simplify the code below, represent vector types with a vector mode
3609 even if MMX/SSE are not active. */
3610 if (type && TREE_CODE (type) == VECTOR_TYPE)
3611 mode = type_natural_mode (type);
3612
3613 /* Handle a hidden AL argument containing number of registers for varargs
3614 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
3615 any AL settings. */
3616 if (mode == VOIDmode)
3617 {
3618 if (TARGET_64BIT)
3619 return GEN_INT (cum->maybe_vaarg
3620 ? (cum->sse_nregs < 0
3621 ? SSE_REGPARM_MAX
3622 : cum->sse_regno)
3623 : -1);
3624 else
3625 return constm1_rtx;
3626 }
3627 if (TARGET_64BIT)
3628 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
3629 cum->sse_nregs,
3630 &x86_64_int_parameter_registers [cum->regno],
3631 cum->sse_regno);
3632 else
3633 switch (mode)
3634 {
3635 /* For now, pass fp/complex values on the stack. */
3636 default:
3637 break;
3638
3639 case BLKmode:
3640 if (bytes < 0)
3641 break;
3642 /* FALLTHRU */
3643 case DImode:
3644 case SImode:
3645 case HImode:
3646 case QImode:
3647 if (words <= cum->nregs)
3648 {
3649 int regno = cum->regno;
3650
3651 /* Fastcall allocates the first two DWORD (SImode) or
3652 smaller arguments to ECX and EDX. */
3653 if (cum->fastcall)
3654 {
3655 if (mode == BLKmode || mode == DImode)
3656 break;
3657
3658 /* ECX not EAX is the first allocated register. */
3659 if (regno == 0)
3660 regno = 2;
3661 }
3662 ret = gen_rtx_REG (mode, regno);
3663 }
3664 break;
3665 case DFmode:
3666 if (cum->float_in_sse < 2)
3667 break;
3668 case SFmode:
3669 if (cum->float_in_sse < 1)
3670 break;
3671 /* FALLTHRU */
3672 case TImode:
3673 case V16QImode:
3674 case V8HImode:
3675 case V4SImode:
3676 case V2DImode:
3677 case V4SFmode:
3678 case V2DFmode:
3679 if (!type || !AGGREGATE_TYPE_P (type))
3680 {
3681 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3682 {
3683 warnedsse = true;
3684 warning (0, "SSE vector argument without SSE enabled "
3685 "changes the ABI");
3686 }
3687 if (cum->sse_nregs)
3688 ret = gen_reg_or_parallel (mode, orig_mode,
3689 cum->sse_regno + FIRST_SSE_REG);
3690 }
3691 break;
3692 case V8QImode:
3693 case V4HImode:
3694 case V2SImode:
3695 case V2SFmode:
3696 if (!type || !AGGREGATE_TYPE_P (type))
3697 {
3698 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3699 {
3700 warnedmmx = true;
3701 warning (0, "MMX vector argument without MMX enabled "
3702 "changes the ABI");
3703 }
3704 if (cum->mmx_nregs)
3705 ret = gen_reg_or_parallel (mode, orig_mode,
3706 cum->mmx_regno + FIRST_MMX_REG);
3707 }
3708 break;
3709 }
3710
3711 if (TARGET_DEBUG_ARG)
3712 {
3713 fprintf (stderr,
3714 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3715 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
3716
3717 if (ret)
3718 print_simple_rtl (stderr, ret);
3719 else
3720 fprintf (stderr, ", stack");
3721
3722 fprintf (stderr, " )\n");
3723 }
3724
3725 return ret;
3726 }
3727
3728 /* A C expression that indicates when an argument must be passed by
3729 reference. If nonzero for an argument, a copy of that argument is
3730 made in memory and a pointer to the argument is passed instead of
3731 the argument itself. The pointer is passed in whatever way is
3732 appropriate for passing a pointer to that type. */
3733
3734 static bool
3735 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3736 enum machine_mode mode ATTRIBUTE_UNUSED,
3737 tree type, bool named ATTRIBUTE_UNUSED)
3738 {
3739 if (!TARGET_64BIT)
3740 return 0;
3741
3742 if (type && int_size_in_bytes (type) == -1)
3743 {
3744 if (TARGET_DEBUG_ARG)
3745 fprintf (stderr, "function_arg_pass_by_reference\n");
3746 return 1;
3747 }
3748
3749 return 0;
3750 }
3751
3752 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
3753 ABI. Only called if TARGET_SSE. */
3754 static bool
3755 contains_128bit_aligned_vector_p (tree type)
3756 {
3757 enum machine_mode mode = TYPE_MODE (type);
3758 if (SSE_REG_MODE_P (mode)
3759 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3760 return true;
3761 if (TYPE_ALIGN (type) < 128)
3762 return false;
3763
3764 if (AGGREGATE_TYPE_P (type))
3765 {
3766 /* Walk the aggregates recursively. */
3767 switch (TREE_CODE (type))
3768 {
3769 case RECORD_TYPE:
3770 case UNION_TYPE:
3771 case QUAL_UNION_TYPE:
3772 {
3773 tree field;
3774
3775 /* Walk all the structure fields. */
3776 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3777 {
3778 if (TREE_CODE (field) == FIELD_DECL
3779 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
3780 return true;
3781 }
3782 break;
3783 }
3784
3785 case ARRAY_TYPE:
3786 /* Just for use if some languages passes arrays by value. */
3787 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
3788 return true;
3789 break;
3790
3791 default:
3792 gcc_unreachable ();
3793 }
3794 }
3795 return false;
3796 }
3797
3798 /* Gives the alignment boundary, in bits, of an argument with the
3799 specified mode and type. */
3800
3801 int
3802 ix86_function_arg_boundary (enum machine_mode mode, tree type)
3803 {
3804 int align;
3805 if (type)
3806 align = TYPE_ALIGN (type);
3807 else
3808 align = GET_MODE_ALIGNMENT (mode);
3809 if (align < PARM_BOUNDARY)
3810 align = PARM_BOUNDARY;
3811 if (!TARGET_64BIT)
3812 {
3813 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3814 make an exception for SSE modes since these require 128bit
3815 alignment.
3816
3817 The handling here differs from field_alignment. ICC aligns MMX
3818 arguments to 4 byte boundaries, while structure fields are aligned
3819 to 8 byte boundaries. */
3820 if (!TARGET_SSE)
3821 align = PARM_BOUNDARY;
3822 else if (!type)
3823 {
3824 if (!SSE_REG_MODE_P (mode))
3825 align = PARM_BOUNDARY;
3826 }
3827 else
3828 {
3829 if (!contains_128bit_aligned_vector_p (type))
3830 align = PARM_BOUNDARY;
3831 }
3832 }
3833 if (align > 128)
3834 align = 128;
3835 return align;
3836 }
3837
3838 /* Return true if N is a possible register number of function value. */
3839 bool
3840 ix86_function_value_regno_p (int regno)
3841 {
3842 if (regno == 0
3843 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3844 || (regno == FIRST_SSE_REG && TARGET_SSE))
3845 return true;
3846
3847 if (!TARGET_64BIT
3848 && (regno == FIRST_MMX_REG && TARGET_MMX))
3849 return true;
3850
3851 return false;
3852 }
3853
3854 /* Define how to find the value returned by a function.
3855 VALTYPE is the data type of the value (as a tree).
3856 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3857 otherwise, FUNC is 0. */
3858 rtx
3859 ix86_function_value (tree valtype, tree fntype_or_decl,
3860 bool outgoing ATTRIBUTE_UNUSED)
3861 {
3862 enum machine_mode natmode = type_natural_mode (valtype);
3863
3864 if (TARGET_64BIT)
3865 {
3866 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
3867 1, REGPARM_MAX, SSE_REGPARM_MAX,
3868 x86_64_int_return_registers, 0);
3869 /* For zero sized structures, construct_container return NULL, but we
3870 need to keep rest of compiler happy by returning meaningful value. */
3871 if (!ret)
3872 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
3873 return ret;
3874 }
3875 else
3876 {
3877 tree fn = NULL_TREE, fntype;
3878 if (fntype_or_decl
3879 && DECL_P (fntype_or_decl))
3880 fn = fntype_or_decl;
3881 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
3882 return gen_rtx_REG (TYPE_MODE (valtype),
3883 ix86_value_regno (natmode, fn, fntype));
3884 }
3885 }
3886
3887 /* Return true iff type is returned in memory. */
3888 int
3889 ix86_return_in_memory (tree type)
3890 {
3891 int needed_intregs, needed_sseregs, size;
3892 enum machine_mode mode = type_natural_mode (type);
3893
3894 if (TARGET_64BIT)
3895 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
3896
3897 if (mode == BLKmode)
3898 return 1;
3899
3900 size = int_size_in_bytes (type);
3901
3902 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3903 return 0;
3904
3905 if (VECTOR_MODE_P (mode) || mode == TImode)
3906 {
3907 /* User-created vectors small enough to fit in EAX. */
3908 if (size < 8)
3909 return 0;
3910
3911 /* MMX/3dNow values are returned in MM0,
3912 except when it doesn't exits. */
3913 if (size == 8)
3914 return (TARGET_MMX ? 0 : 1);
3915
3916 /* SSE values are returned in XMM0, except when it doesn't exist. */
3917 if (size == 16)
3918 return (TARGET_SSE ? 0 : 1);
3919 }
3920
3921 if (mode == XFmode)
3922 return 0;
3923
3924 if (mode == TDmode)
3925 return 1;
3926
3927 if (size > 12)
3928 return 1;
3929 return 0;
3930 }
3931
3932 /* When returning SSE vector types, we have a choice of either
3933 (1) being abi incompatible with a -march switch, or
3934 (2) generating an error.
3935 Given no good solution, I think the safest thing is one warning.
3936 The user won't be able to use -Werror, but....
3937
3938 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3939 called in response to actually generating a caller or callee that
3940 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3941 via aggregate_value_p for general type probing from tree-ssa. */
3942
3943 static rtx
3944 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3945 {
3946 static bool warnedsse, warnedmmx;
3947
3948 if (type)
3949 {
3950 /* Look at the return type of the function, not the function type. */
3951 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3952
3953 if (!TARGET_SSE && !warnedsse)
3954 {
3955 if (mode == TImode
3956 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3957 {
3958 warnedsse = true;
3959 warning (0, "SSE vector return without SSE enabled "
3960 "changes the ABI");
3961 }
3962 }
3963
3964 if (!TARGET_MMX && !warnedmmx)
3965 {
3966 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
3967 {
3968 warnedmmx = true;
3969 warning (0, "MMX vector return without MMX enabled "
3970 "changes the ABI");
3971 }
3972 }
3973 }
3974
3975 return NULL;
3976 }
3977
3978 /* Define how to find the value returned by a library function
3979 assuming the value has mode MODE. */
3980 rtx
3981 ix86_libcall_value (enum machine_mode mode)
3982 {
3983 if (TARGET_64BIT)
3984 {
3985 switch (mode)
3986 {
3987 case SFmode:
3988 case SCmode:
3989 case DFmode:
3990 case DCmode:
3991 case TFmode:
3992 case SDmode:
3993 case DDmode:
3994 case TDmode:
3995 return gen_rtx_REG (mode, FIRST_SSE_REG);
3996 case XFmode:
3997 case XCmode:
3998 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
3999 case TCmode:
4000 return NULL;
4001 default:
4002 return gen_rtx_REG (mode, 0);
4003 }
4004 }
4005 else
4006 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
4007 }
4008
4009 /* Given a mode, return the register to use for a return value. */
4010
4011 static int
4012 ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
4013 {
4014 gcc_assert (!TARGET_64BIT);
4015
4016 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4017 we normally prevent this case when mmx is not available. However
4018 some ABIs may require the result to be returned like DImode. */
4019 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4020 return TARGET_MMX ? FIRST_MMX_REG : 0;
4021
4022 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4023 we prevent this case when sse is not available. However some ABIs
4024 may require the result to be returned like integer TImode. */
4025 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4026 return TARGET_SSE ? FIRST_SSE_REG : 0;
4027
4028 /* Decimal floating point values can go in %eax, unlike other float modes. */
4029 if (DECIMAL_FLOAT_MODE_P (mode))
4030 return 0;
4031
4032 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4033 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4034 return 0;
4035
4036 /* Floating point return values in %st(0), except for local functions when
4037 SSE math is enabled or for functions with sseregparm attribute. */
4038 if ((func || fntype)
4039 && (mode == SFmode || mode == DFmode))
4040 {
4041 int sse_level = ix86_function_sseregparm (fntype, func);
4042 if ((sse_level >= 1 && mode == SFmode)
4043 || (sse_level == 2 && mode == DFmode))
4044 return FIRST_SSE_REG;
4045 }
4046
4047 return FIRST_FLOAT_REG;
4048 }
4049 \f
4050 /* Create the va_list data type. */
4051
4052 static tree
4053 ix86_build_builtin_va_list (void)
4054 {
4055 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4056
4057 /* For i386 we use plain pointer to argument area. */
4058 if (!TARGET_64BIT)
4059 return build_pointer_type (char_type_node);
4060
4061 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4062 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4063
4064 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4065 unsigned_type_node);
4066 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4067 unsigned_type_node);
4068 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4069 ptr_type_node);
4070 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4071 ptr_type_node);
4072
4073 va_list_gpr_counter_field = f_gpr;
4074 va_list_fpr_counter_field = f_fpr;
4075
4076 DECL_FIELD_CONTEXT (f_gpr) = record;
4077 DECL_FIELD_CONTEXT (f_fpr) = record;
4078 DECL_FIELD_CONTEXT (f_ovf) = record;
4079 DECL_FIELD_CONTEXT (f_sav) = record;
4080
4081 TREE_CHAIN (record) = type_decl;
4082 TYPE_NAME (record) = type_decl;
4083 TYPE_FIELDS (record) = f_gpr;
4084 TREE_CHAIN (f_gpr) = f_fpr;
4085 TREE_CHAIN (f_fpr) = f_ovf;
4086 TREE_CHAIN (f_ovf) = f_sav;
4087
4088 layout_type (record);
4089
4090 /* The correct type is an array type of one element. */
4091 return build_array_type (record, build_index_type (size_zero_node));
4092 }
4093
4094 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4095
4096 static void
4097 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4098 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4099 int no_rtl)
4100 {
4101 CUMULATIVE_ARGS next_cum;
4102 rtx save_area = NULL_RTX, mem;
4103 rtx label;
4104 rtx label_ref;
4105 rtx tmp_reg;
4106 rtx nsse_reg;
4107 int set;
4108 tree fntype;
4109 int stdarg_p;
4110 int i;
4111
4112 if (!TARGET_64BIT)
4113 return;
4114
4115 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4116 return;
4117
4118 /* Indicate to allocate space on the stack for varargs save area. */
4119 ix86_save_varrargs_registers = 1;
4120
4121 cfun->stack_alignment_needed = 128;
4122
4123 fntype = TREE_TYPE (current_function_decl);
4124 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4125 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4126 != void_type_node));
4127
4128 /* For varargs, we do not want to skip the dummy va_dcl argument.
4129 For stdargs, we do want to skip the last named argument. */
4130 next_cum = *cum;
4131 if (stdarg_p)
4132 function_arg_advance (&next_cum, mode, type, 1);
4133
4134 if (!no_rtl)
4135 save_area = frame_pointer_rtx;
4136
4137 set = get_varargs_alias_set ();
4138
4139 for (i = next_cum.regno;
4140 i < ix86_regparm
4141 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4142 i++)
4143 {
4144 mem = gen_rtx_MEM (Pmode,
4145 plus_constant (save_area, i * UNITS_PER_WORD));
4146 MEM_NOTRAP_P (mem) = 1;
4147 set_mem_alias_set (mem, set);
4148 emit_move_insn (mem, gen_rtx_REG (Pmode,
4149 x86_64_int_parameter_registers[i]));
4150 }
4151
4152 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4153 {
4154 /* Now emit code to save SSE registers. The AX parameter contains number
4155 of SSE parameter registers used to call this function. We use
4156 sse_prologue_save insn template that produces computed jump across
4157 SSE saves. We need some preparation work to get this working. */
4158
4159 label = gen_label_rtx ();
4160 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4161
4162 /* Compute address to jump to :
4163 label - 5*eax + nnamed_sse_arguments*5 */
4164 tmp_reg = gen_reg_rtx (Pmode);
4165 nsse_reg = gen_reg_rtx (Pmode);
4166 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4167 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4168 gen_rtx_MULT (Pmode, nsse_reg,
4169 GEN_INT (4))));
4170 if (next_cum.sse_regno)
4171 emit_move_insn
4172 (nsse_reg,
4173 gen_rtx_CONST (DImode,
4174 gen_rtx_PLUS (DImode,
4175 label_ref,
4176 GEN_INT (next_cum.sse_regno * 4))));
4177 else
4178 emit_move_insn (nsse_reg, label_ref);
4179 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4180
4181 /* Compute address of memory block we save into. We always use pointer
4182 pointing 127 bytes after first byte to store - this is needed to keep
4183 instruction size limited by 4 bytes. */
4184 tmp_reg = gen_reg_rtx (Pmode);
4185 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4186 plus_constant (save_area,
4187 8 * REGPARM_MAX + 127)));
4188 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4189 MEM_NOTRAP_P (mem) = 1;
4190 set_mem_alias_set (mem, set);
4191 set_mem_align (mem, BITS_PER_WORD);
4192
4193 /* And finally do the dirty job! */
4194 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4195 GEN_INT (next_cum.sse_regno), label));
4196 }
4197
4198 }
4199
4200 /* Implement va_start. */
4201
4202 void
4203 ix86_va_start (tree valist, rtx nextarg)
4204 {
4205 HOST_WIDE_INT words, n_gpr, n_fpr;
4206 tree f_gpr, f_fpr, f_ovf, f_sav;
4207 tree gpr, fpr, ovf, sav, t;
4208 tree type;
4209
4210 /* Only 64bit target needs something special. */
4211 if (!TARGET_64BIT)
4212 {
4213 std_expand_builtin_va_start (valist, nextarg);
4214 return;
4215 }
4216
4217 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4218 f_fpr = TREE_CHAIN (f_gpr);
4219 f_ovf = TREE_CHAIN (f_fpr);
4220 f_sav = TREE_CHAIN (f_ovf);
4221
4222 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4223 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4224 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4225 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4226 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4227
4228 /* Count number of gp and fp argument registers used. */
4229 words = current_function_args_info.words;
4230 n_gpr = current_function_args_info.regno;
4231 n_fpr = current_function_args_info.sse_regno;
4232
4233 if (TARGET_DEBUG_ARG)
4234 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4235 (int) words, (int) n_gpr, (int) n_fpr);
4236
4237 if (cfun->va_list_gpr_size)
4238 {
4239 type = TREE_TYPE (gpr);
4240 t = build2 (MODIFY_EXPR, type, gpr,
4241 build_int_cst (type, n_gpr * 8));
4242 TREE_SIDE_EFFECTS (t) = 1;
4243 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4244 }
4245
4246 if (cfun->va_list_fpr_size)
4247 {
4248 type = TREE_TYPE (fpr);
4249 t = build2 (MODIFY_EXPR, type, fpr,
4250 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4251 TREE_SIDE_EFFECTS (t) = 1;
4252 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4253 }
4254
4255 /* Find the overflow area. */
4256 type = TREE_TYPE (ovf);
4257 t = make_tree (type, virtual_incoming_args_rtx);
4258 if (words != 0)
4259 t = build2 (PLUS_EXPR, type, t,
4260 build_int_cst (type, words * UNITS_PER_WORD));
4261 t = build2 (MODIFY_EXPR, type, ovf, t);
4262 TREE_SIDE_EFFECTS (t) = 1;
4263 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4264
4265 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4266 {
4267 /* Find the register save area.
4268 Prologue of the function save it right above stack frame. */
4269 type = TREE_TYPE (sav);
4270 t = make_tree (type, frame_pointer_rtx);
4271 t = build2 (MODIFY_EXPR, type, sav, t);
4272 TREE_SIDE_EFFECTS (t) = 1;
4273 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4274 }
4275 }
4276
4277 /* Implement va_arg. */
4278
4279 tree
4280 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4281 {
4282 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4283 tree f_gpr, f_fpr, f_ovf, f_sav;
4284 tree gpr, fpr, ovf, sav, t;
4285 int size, rsize;
4286 tree lab_false, lab_over = NULL_TREE;
4287 tree addr, t2;
4288 rtx container;
4289 int indirect_p = 0;
4290 tree ptrtype;
4291 enum machine_mode nat_mode;
4292
4293 /* Only 64bit target needs something special. */
4294 if (!TARGET_64BIT)
4295 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4296
4297 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4298 f_fpr = TREE_CHAIN (f_gpr);
4299 f_ovf = TREE_CHAIN (f_fpr);
4300 f_sav = TREE_CHAIN (f_ovf);
4301
4302 valist = build_va_arg_indirect_ref (valist);
4303 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4304 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4305 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4306 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4307
4308 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4309 if (indirect_p)
4310 type = build_pointer_type (type);
4311 size = int_size_in_bytes (type);
4312 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4313
4314 nat_mode = type_natural_mode (type);
4315 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4316 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4317
4318 /* Pull the value out of the saved registers. */
4319
4320 addr = create_tmp_var (ptr_type_node, "addr");
4321 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4322
4323 if (container)
4324 {
4325 int needed_intregs, needed_sseregs;
4326 bool need_temp;
4327 tree int_addr, sse_addr;
4328
4329 lab_false = create_artificial_label ();
4330 lab_over = create_artificial_label ();
4331
4332 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4333
4334 need_temp = (!REG_P (container)
4335 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4336 || TYPE_ALIGN (type) > 128));
4337
4338 /* In case we are passing structure, verify that it is consecutive block
4339 on the register save area. If not we need to do moves. */
4340 if (!need_temp && !REG_P (container))
4341 {
4342 /* Verify that all registers are strictly consecutive */
4343 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4344 {
4345 int i;
4346
4347 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4348 {
4349 rtx slot = XVECEXP (container, 0, i);
4350 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4351 || INTVAL (XEXP (slot, 1)) != i * 16)
4352 need_temp = 1;
4353 }
4354 }
4355 else
4356 {
4357 int i;
4358
4359 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4360 {
4361 rtx slot = XVECEXP (container, 0, i);
4362 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4363 || INTVAL (XEXP (slot, 1)) != i * 8)
4364 need_temp = 1;
4365 }
4366 }
4367 }
4368 if (!need_temp)
4369 {
4370 int_addr = addr;
4371 sse_addr = addr;
4372 }
4373 else
4374 {
4375 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4376 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4377 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4378 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4379 }
4380
4381 /* First ensure that we fit completely in registers. */
4382 if (needed_intregs)
4383 {
4384 t = build_int_cst (TREE_TYPE (gpr),
4385 (REGPARM_MAX - needed_intregs + 1) * 8);
4386 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4387 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4388 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4389 gimplify_and_add (t, pre_p);
4390 }
4391 if (needed_sseregs)
4392 {
4393 t = build_int_cst (TREE_TYPE (fpr),
4394 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4395 + REGPARM_MAX * 8);
4396 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4397 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4398 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4399 gimplify_and_add (t, pre_p);
4400 }
4401
4402 /* Compute index to start of area used for integer regs. */
4403 if (needed_intregs)
4404 {
4405 /* int_addr = gpr + sav; */
4406 t = fold_convert (ptr_type_node, gpr);
4407 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4408 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
4409 gimplify_and_add (t, pre_p);
4410 }
4411 if (needed_sseregs)
4412 {
4413 /* sse_addr = fpr + sav; */
4414 t = fold_convert (ptr_type_node, fpr);
4415 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4416 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
4417 gimplify_and_add (t, pre_p);
4418 }
4419 if (need_temp)
4420 {
4421 int i;
4422 tree temp = create_tmp_var (type, "va_arg_tmp");
4423
4424 /* addr = &temp; */
4425 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4426 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
4427 gimplify_and_add (t, pre_p);
4428
4429 for (i = 0; i < XVECLEN (container, 0); i++)
4430 {
4431 rtx slot = XVECEXP (container, 0, i);
4432 rtx reg = XEXP (slot, 0);
4433 enum machine_mode mode = GET_MODE (reg);
4434 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4435 tree addr_type = build_pointer_type (piece_type);
4436 tree src_addr, src;
4437 int src_offset;
4438 tree dest_addr, dest;
4439
4440 if (SSE_REGNO_P (REGNO (reg)))
4441 {
4442 src_addr = sse_addr;
4443 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4444 }
4445 else
4446 {
4447 src_addr = int_addr;
4448 src_offset = REGNO (reg) * 8;
4449 }
4450 src_addr = fold_convert (addr_type, src_addr);
4451 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4452 size_int (src_offset)));
4453 src = build_va_arg_indirect_ref (src_addr);
4454
4455 dest_addr = fold_convert (addr_type, addr);
4456 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4457 size_int (INTVAL (XEXP (slot, 1)))));
4458 dest = build_va_arg_indirect_ref (dest_addr);
4459
4460 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
4461 gimplify_and_add (t, pre_p);
4462 }
4463 }
4464
4465 if (needed_intregs)
4466 {
4467 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4468 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4469 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
4470 gimplify_and_add (t, pre_p);
4471 }
4472 if (needed_sseregs)
4473 {
4474 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4475 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4476 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
4477 gimplify_and_add (t, pre_p);
4478 }
4479
4480 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4481 gimplify_and_add (t, pre_p);
4482
4483 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4484 append_to_statement_list (t, pre_p);
4485 }
4486
4487 /* ... otherwise out of the overflow area. */
4488
4489 /* Care for on-stack alignment if needed. */
4490 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4491 || integer_zerop (TYPE_SIZE (type)))
4492 t = ovf;
4493 else
4494 {
4495 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4496 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4497 build_int_cst (TREE_TYPE (ovf), align - 1));
4498 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4499 build_int_cst (TREE_TYPE (t), -align));
4500 }
4501 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4502
4503 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
4504 gimplify_and_add (t2, pre_p);
4505
4506 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4507 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4508 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4509 gimplify_and_add (t, pre_p);
4510
4511 if (container)
4512 {
4513 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4514 append_to_statement_list (t, pre_p);
4515 }
4516
4517 ptrtype = build_pointer_type (type);
4518 addr = fold_convert (ptrtype, addr);
4519
4520 if (indirect_p)
4521 addr = build_va_arg_indirect_ref (addr);
4522 return build_va_arg_indirect_ref (addr);
4523 }
4524 \f
4525 /* Return nonzero if OPNUM's MEM should be matched
4526 in movabs* patterns. */
4527
4528 int
4529 ix86_check_movabs (rtx insn, int opnum)
4530 {
4531 rtx set, mem;
4532
4533 set = PATTERN (insn);
4534 if (GET_CODE (set) == PARALLEL)
4535 set = XVECEXP (set, 0, 0);
4536 gcc_assert (GET_CODE (set) == SET);
4537 mem = XEXP (set, opnum);
4538 while (GET_CODE (mem) == SUBREG)
4539 mem = SUBREG_REG (mem);
4540 gcc_assert (GET_CODE (mem) == MEM);
4541 return (volatile_ok || !MEM_VOLATILE_P (mem));
4542 }
4543 \f
4544 /* Initialize the table of extra 80387 mathematical constants. */
4545
4546 static void
4547 init_ext_80387_constants (void)
4548 {
4549 static const char * cst[5] =
4550 {
4551 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4552 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4553 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4554 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4555 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4556 };
4557 int i;
4558
4559 for (i = 0; i < 5; i++)
4560 {
4561 real_from_string (&ext_80387_constants_table[i], cst[i]);
4562 /* Ensure each constant is rounded to XFmode precision. */
4563 real_convert (&ext_80387_constants_table[i],
4564 XFmode, &ext_80387_constants_table[i]);
4565 }
4566
4567 ext_80387_constants_init = 1;
4568 }
4569
4570 /* Return true if the constant is something that can be loaded with
4571 a special instruction. */
4572
4573 int
4574 standard_80387_constant_p (rtx x)
4575 {
4576 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4577 return -1;
4578
4579 if (x == CONST0_RTX (GET_MODE (x)))
4580 return 1;
4581 if (x == CONST1_RTX (GET_MODE (x)))
4582 return 2;
4583
4584 /* For XFmode constants, try to find a special 80387 instruction when
4585 optimizing for size or on those CPUs that benefit from them. */
4586 if (GET_MODE (x) == XFmode
4587 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4588 {
4589 REAL_VALUE_TYPE r;
4590 int i;
4591
4592 if (! ext_80387_constants_init)
4593 init_ext_80387_constants ();
4594
4595 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4596 for (i = 0; i < 5; i++)
4597 if (real_identical (&r, &ext_80387_constants_table[i]))
4598 return i + 3;
4599 }
4600
4601 return 0;
4602 }
4603
4604 /* Return the opcode of the special instruction to be used to load
4605 the constant X. */
4606
4607 const char *
4608 standard_80387_constant_opcode (rtx x)
4609 {
4610 switch (standard_80387_constant_p (x))
4611 {
4612 case 1:
4613 return "fldz";
4614 case 2:
4615 return "fld1";
4616 case 3:
4617 return "fldlg2";
4618 case 4:
4619 return "fldln2";
4620 case 5:
4621 return "fldl2e";
4622 case 6:
4623 return "fldl2t";
4624 case 7:
4625 return "fldpi";
4626 default:
4627 gcc_unreachable ();
4628 }
4629 }
4630
4631 /* Return the CONST_DOUBLE representing the 80387 constant that is
4632 loaded by the specified special instruction. The argument IDX
4633 matches the return value from standard_80387_constant_p. */
4634
4635 rtx
4636 standard_80387_constant_rtx (int idx)
4637 {
4638 int i;
4639
4640 if (! ext_80387_constants_init)
4641 init_ext_80387_constants ();
4642
4643 switch (idx)
4644 {
4645 case 3:
4646 case 4:
4647 case 5:
4648 case 6:
4649 case 7:
4650 i = idx - 3;
4651 break;
4652
4653 default:
4654 gcc_unreachable ();
4655 }
4656
4657 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4658 XFmode);
4659 }
4660
4661 /* Return 1 if mode is a valid mode for sse. */
4662 static int
4663 standard_sse_mode_p (enum machine_mode mode)
4664 {
4665 switch (mode)
4666 {
4667 case V16QImode:
4668 case V8HImode:
4669 case V4SImode:
4670 case V2DImode:
4671 case V4SFmode:
4672 case V2DFmode:
4673 return 1;
4674
4675 default:
4676 return 0;
4677 }
4678 }
4679
4680 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4681 */
4682 int
4683 standard_sse_constant_p (rtx x)
4684 {
4685 enum machine_mode mode = GET_MODE (x);
4686
4687 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
4688 return 1;
4689 if (vector_all_ones_operand (x, mode)
4690 && standard_sse_mode_p (mode))
4691 return TARGET_SSE2 ? 2 : -1;
4692
4693 return 0;
4694 }
4695
4696 /* Return the opcode of the special instruction to be used to load
4697 the constant X. */
4698
4699 const char *
4700 standard_sse_constant_opcode (rtx insn, rtx x)
4701 {
4702 switch (standard_sse_constant_p (x))
4703 {
4704 case 1:
4705 if (get_attr_mode (insn) == MODE_V4SF)
4706 return "xorps\t%0, %0";
4707 else if (get_attr_mode (insn) == MODE_V2DF)
4708 return "xorpd\t%0, %0";
4709 else
4710 return "pxor\t%0, %0";
4711 case 2:
4712 return "pcmpeqd\t%0, %0";
4713 }
4714 gcc_unreachable ();
4715 }
4716
4717 /* Returns 1 if OP contains a symbol reference */
4718
4719 int
4720 symbolic_reference_mentioned_p (rtx op)
4721 {
4722 const char *fmt;
4723 int i;
4724
4725 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4726 return 1;
4727
4728 fmt = GET_RTX_FORMAT (GET_CODE (op));
4729 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4730 {
4731 if (fmt[i] == 'E')
4732 {
4733 int j;
4734
4735 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4736 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4737 return 1;
4738 }
4739
4740 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4741 return 1;
4742 }
4743
4744 return 0;
4745 }
4746
4747 /* Return 1 if it is appropriate to emit `ret' instructions in the
4748 body of a function. Do this only if the epilogue is simple, needing a
4749 couple of insns. Prior to reloading, we can't tell how many registers
4750 must be saved, so return 0 then. Return 0 if there is no frame
4751 marker to de-allocate. */
4752
4753 int
4754 ix86_can_use_return_insn_p (void)
4755 {
4756 struct ix86_frame frame;
4757
4758 if (! reload_completed || frame_pointer_needed)
4759 return 0;
4760
4761 /* Don't allow more than 32 pop, since that's all we can do
4762 with one instruction. */
4763 if (current_function_pops_args
4764 && current_function_args_size >= 32768)
4765 return 0;
4766
4767 ix86_compute_frame_layout (&frame);
4768 return frame.to_allocate == 0 && frame.nregs == 0;
4769 }
4770 \f
4771 /* Value should be nonzero if functions must have frame pointers.
4772 Zero means the frame pointer need not be set up (and parms may
4773 be accessed via the stack pointer) in functions that seem suitable. */
4774
4775 int
4776 ix86_frame_pointer_required (void)
4777 {
4778 /* If we accessed previous frames, then the generated code expects
4779 to be able to access the saved ebp value in our frame. */
4780 if (cfun->machine->accesses_prev_frame)
4781 return 1;
4782
4783 /* Several x86 os'es need a frame pointer for other reasons,
4784 usually pertaining to setjmp. */
4785 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4786 return 1;
4787
4788 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4789 the frame pointer by default. Turn it back on now if we've not
4790 got a leaf function. */
4791 if (TARGET_OMIT_LEAF_FRAME_POINTER
4792 && (!current_function_is_leaf
4793 || ix86_current_function_calls_tls_descriptor))
4794 return 1;
4795
4796 if (current_function_profile)
4797 return 1;
4798
4799 return 0;
4800 }
4801
4802 /* Record that the current function accesses previous call frames. */
4803
4804 void
4805 ix86_setup_frame_addresses (void)
4806 {
4807 cfun->machine->accesses_prev_frame = 1;
4808 }
4809 \f
4810 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
4811 # define USE_HIDDEN_LINKONCE 1
4812 #else
4813 # define USE_HIDDEN_LINKONCE 0
4814 #endif
4815
4816 static int pic_labels_used;
4817
4818 /* Fills in the label name that should be used for a pc thunk for
4819 the given register. */
4820
4821 static void
4822 get_pc_thunk_name (char name[32], unsigned int regno)
4823 {
4824 gcc_assert (!TARGET_64BIT);
4825
4826 if (USE_HIDDEN_LINKONCE)
4827 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4828 else
4829 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4830 }
4831
4832
4833 /* This function generates code for -fpic that loads %ebx with
4834 the return address of the caller and then returns. */
4835
4836 void
4837 ix86_file_end (void)
4838 {
4839 rtx xops[2];
4840 int regno;
4841
4842 for (regno = 0; regno < 8; ++regno)
4843 {
4844 char name[32];
4845
4846 if (! ((pic_labels_used >> regno) & 1))
4847 continue;
4848
4849 get_pc_thunk_name (name, regno);
4850
4851 #if TARGET_MACHO
4852 if (TARGET_MACHO)
4853 {
4854 switch_to_section (darwin_sections[text_coal_section]);
4855 fputs ("\t.weak_definition\t", asm_out_file);
4856 assemble_name (asm_out_file, name);
4857 fputs ("\n\t.private_extern\t", asm_out_file);
4858 assemble_name (asm_out_file, name);
4859 fputs ("\n", asm_out_file);
4860 ASM_OUTPUT_LABEL (asm_out_file, name);
4861 }
4862 else
4863 #endif
4864 if (USE_HIDDEN_LINKONCE)
4865 {
4866 tree decl;
4867
4868 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4869 error_mark_node);
4870 TREE_PUBLIC (decl) = 1;
4871 TREE_STATIC (decl) = 1;
4872 DECL_ONE_ONLY (decl) = 1;
4873
4874 (*targetm.asm_out.unique_section) (decl, 0);
4875 switch_to_section (get_named_section (decl, NULL, 0));
4876
4877 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4878 fputs ("\t.hidden\t", asm_out_file);
4879 assemble_name (asm_out_file, name);
4880 fputc ('\n', asm_out_file);
4881 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4882 }
4883 else
4884 {
4885 switch_to_section (text_section);
4886 ASM_OUTPUT_LABEL (asm_out_file, name);
4887 }
4888
4889 xops[0] = gen_rtx_REG (SImode, regno);
4890 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4891 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4892 output_asm_insn ("ret", xops);
4893 }
4894
4895 if (NEED_INDICATE_EXEC_STACK)
4896 file_end_indicate_exec_stack ();
4897 }
4898
4899 /* Emit code for the SET_GOT patterns. */
4900
4901 const char *
4902 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
4903 {
4904 rtx xops[3];
4905
4906 xops[0] = dest;
4907 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4908
4909 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4910 {
4911 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
4912
4913 if (!flag_pic)
4914 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4915 else
4916 output_asm_insn ("call\t%a2", xops);
4917
4918 #if TARGET_MACHO
4919 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4920 is what will be referenced by the Mach-O PIC subsystem. */
4921 if (!label)
4922 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4923 #endif
4924
4925 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4926 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4927
4928 if (flag_pic)
4929 output_asm_insn ("pop{l}\t%0", xops);
4930 }
4931 else
4932 {
4933 char name[32];
4934 get_pc_thunk_name (name, REGNO (dest));
4935 pic_labels_used |= 1 << REGNO (dest);
4936
4937 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4938 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4939 output_asm_insn ("call\t%X2", xops);
4940 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4941 is what will be referenced by the Mach-O PIC subsystem. */
4942 #if TARGET_MACHO
4943 if (!label)
4944 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4945 else
4946 targetm.asm_out.internal_label (asm_out_file, "L",
4947 CODE_LABEL_NUMBER (label));
4948 #endif
4949 }
4950
4951 if (TARGET_MACHO)
4952 return "";
4953
4954 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4955 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4956 else
4957 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
4958
4959 return "";
4960 }
4961
4962 /* Generate an "push" pattern for input ARG. */
4963
4964 static rtx
4965 gen_push (rtx arg)
4966 {
4967 return gen_rtx_SET (VOIDmode,
4968 gen_rtx_MEM (Pmode,
4969 gen_rtx_PRE_DEC (Pmode,
4970 stack_pointer_rtx)),
4971 arg);
4972 }
4973
4974 /* Return >= 0 if there is an unused call-clobbered register available
4975 for the entire function. */
4976
4977 static unsigned int
4978 ix86_select_alt_pic_regnum (void)
4979 {
4980 if (current_function_is_leaf && !current_function_profile
4981 && !ix86_current_function_calls_tls_descriptor)
4982 {
4983 int i;
4984 for (i = 2; i >= 0; --i)
4985 if (!regs_ever_live[i])
4986 return i;
4987 }
4988
4989 return INVALID_REGNUM;
4990 }
4991
4992 /* Return 1 if we need to save REGNO. */
4993 static int
4994 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4995 {
4996 if (pic_offset_table_rtx
4997 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4998 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4999 || current_function_profile
5000 || current_function_calls_eh_return
5001 || current_function_uses_const_pool))
5002 {
5003 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5004 return 0;
5005 return 1;
5006 }
5007
5008 if (current_function_calls_eh_return && maybe_eh_return)
5009 {
5010 unsigned i;
5011 for (i = 0; ; i++)
5012 {
5013 unsigned test = EH_RETURN_DATA_REGNO (i);
5014 if (test == INVALID_REGNUM)
5015 break;
5016 if (test == regno)
5017 return 1;
5018 }
5019 }
5020
5021 if (cfun->machine->force_align_arg_pointer
5022 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5023 return 1;
5024
5025 return (regs_ever_live[regno]
5026 && !call_used_regs[regno]
5027 && !fixed_regs[regno]
5028 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5029 }
5030
5031 /* Return number of registers to be saved on the stack. */
5032
5033 static int
5034 ix86_nsaved_regs (void)
5035 {
5036 int nregs = 0;
5037 int regno;
5038
5039 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5040 if (ix86_save_reg (regno, true))
5041 nregs++;
5042 return nregs;
5043 }
5044
5045 /* Return the offset between two registers, one to be eliminated, and the other
5046 its replacement, at the start of a routine. */
5047
5048 HOST_WIDE_INT
5049 ix86_initial_elimination_offset (int from, int to)
5050 {
5051 struct ix86_frame frame;
5052 ix86_compute_frame_layout (&frame);
5053
5054 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5055 return frame.hard_frame_pointer_offset;
5056 else if (from == FRAME_POINTER_REGNUM
5057 && to == HARD_FRAME_POINTER_REGNUM)
5058 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5059 else
5060 {
5061 gcc_assert (to == STACK_POINTER_REGNUM);
5062
5063 if (from == ARG_POINTER_REGNUM)
5064 return frame.stack_pointer_offset;
5065
5066 gcc_assert (from == FRAME_POINTER_REGNUM);
5067 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5068 }
5069 }
5070
5071 /* Fill structure ix86_frame about frame of currently computed function. */
5072
5073 static void
5074 ix86_compute_frame_layout (struct ix86_frame *frame)
5075 {
5076 HOST_WIDE_INT total_size;
5077 unsigned int stack_alignment_needed;
5078 HOST_WIDE_INT offset;
5079 unsigned int preferred_alignment;
5080 HOST_WIDE_INT size = get_frame_size ();
5081
5082 frame->nregs = ix86_nsaved_regs ();
5083 total_size = size;
5084
5085 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5086 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5087
5088 /* During reload iteration the amount of registers saved can change.
5089 Recompute the value as needed. Do not recompute when amount of registers
5090 didn't change as reload does multiple calls to the function and does not
5091 expect the decision to change within single iteration. */
5092 if (!optimize_size
5093 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5094 {
5095 int count = frame->nregs;
5096
5097 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5098 /* The fast prologue uses move instead of push to save registers. This
5099 is significantly longer, but also executes faster as modern hardware
5100 can execute the moves in parallel, but can't do that for push/pop.
5101
5102 Be careful about choosing what prologue to emit: When function takes
5103 many instructions to execute we may use slow version as well as in
5104 case function is known to be outside hot spot (this is known with
5105 feedback only). Weight the size of function by number of registers
5106 to save as it is cheap to use one or two push instructions but very
5107 slow to use many of them. */
5108 if (count)
5109 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5110 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5111 || (flag_branch_probabilities
5112 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5113 cfun->machine->use_fast_prologue_epilogue = false;
5114 else
5115 cfun->machine->use_fast_prologue_epilogue
5116 = !expensive_function_p (count);
5117 }
5118 if (TARGET_PROLOGUE_USING_MOVE
5119 && cfun->machine->use_fast_prologue_epilogue)
5120 frame->save_regs_using_mov = true;
5121 else
5122 frame->save_regs_using_mov = false;
5123
5124
5125 /* Skip return address and saved base pointer. */
5126 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5127
5128 frame->hard_frame_pointer_offset = offset;
5129
5130 /* Do some sanity checking of stack_alignment_needed and
5131 preferred_alignment, since i386 port is the only using those features
5132 that may break easily. */
5133
5134 gcc_assert (!size || stack_alignment_needed);
5135 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5136 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5137 gcc_assert (stack_alignment_needed
5138 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5139
5140 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5141 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5142
5143 /* Register save area */
5144 offset += frame->nregs * UNITS_PER_WORD;
5145
5146 /* Va-arg area */
5147 if (ix86_save_varrargs_registers)
5148 {
5149 offset += X86_64_VARARGS_SIZE;
5150 frame->va_arg_size = X86_64_VARARGS_SIZE;
5151 }
5152 else
5153 frame->va_arg_size = 0;
5154
5155 /* Align start of frame for local function. */
5156 frame->padding1 = ((offset + stack_alignment_needed - 1)
5157 & -stack_alignment_needed) - offset;
5158
5159 offset += frame->padding1;
5160
5161 /* Frame pointer points here. */
5162 frame->frame_pointer_offset = offset;
5163
5164 offset += size;
5165
5166 /* Add outgoing arguments area. Can be skipped if we eliminated
5167 all the function calls as dead code.
5168 Skipping is however impossible when function calls alloca. Alloca
5169 expander assumes that last current_function_outgoing_args_size
5170 of stack frame are unused. */
5171 if (ACCUMULATE_OUTGOING_ARGS
5172 && (!current_function_is_leaf || current_function_calls_alloca
5173 || ix86_current_function_calls_tls_descriptor))
5174 {
5175 offset += current_function_outgoing_args_size;
5176 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5177 }
5178 else
5179 frame->outgoing_arguments_size = 0;
5180
5181 /* Align stack boundary. Only needed if we're calling another function
5182 or using alloca. */
5183 if (!current_function_is_leaf || current_function_calls_alloca
5184 || ix86_current_function_calls_tls_descriptor)
5185 frame->padding2 = ((offset + preferred_alignment - 1)
5186 & -preferred_alignment) - offset;
5187 else
5188 frame->padding2 = 0;
5189
5190 offset += frame->padding2;
5191
5192 /* We've reached end of stack frame. */
5193 frame->stack_pointer_offset = offset;
5194
5195 /* Size prologue needs to allocate. */
5196 frame->to_allocate =
5197 (size + frame->padding1 + frame->padding2
5198 + frame->outgoing_arguments_size + frame->va_arg_size);
5199
5200 if ((!frame->to_allocate && frame->nregs <= 1)
5201 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5202 frame->save_regs_using_mov = false;
5203
5204 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5205 && current_function_is_leaf
5206 && !ix86_current_function_calls_tls_descriptor)
5207 {
5208 frame->red_zone_size = frame->to_allocate;
5209 if (frame->save_regs_using_mov)
5210 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5211 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5212 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5213 }
5214 else
5215 frame->red_zone_size = 0;
5216 frame->to_allocate -= frame->red_zone_size;
5217 frame->stack_pointer_offset -= frame->red_zone_size;
5218 #if 0
5219 fprintf (stderr, "nregs: %i\n", frame->nregs);
5220 fprintf (stderr, "size: %i\n", size);
5221 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5222 fprintf (stderr, "padding1: %i\n", frame->padding1);
5223 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5224 fprintf (stderr, "padding2: %i\n", frame->padding2);
5225 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5226 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5227 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5228 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5229 frame->hard_frame_pointer_offset);
5230 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5231 #endif
5232 }
5233
5234 /* Emit code to save registers in the prologue. */
5235
5236 static void
5237 ix86_emit_save_regs (void)
5238 {
5239 unsigned int regno;
5240 rtx insn;
5241
5242 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5243 if (ix86_save_reg (regno, true))
5244 {
5245 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5246 RTX_FRAME_RELATED_P (insn) = 1;
5247 }
5248 }
5249
5250 /* Emit code to save registers using MOV insns. First register
5251 is restored from POINTER + OFFSET. */
5252 static void
5253 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5254 {
5255 unsigned int regno;
5256 rtx insn;
5257
5258 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5259 if (ix86_save_reg (regno, true))
5260 {
5261 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5262 Pmode, offset),
5263 gen_rtx_REG (Pmode, regno));
5264 RTX_FRAME_RELATED_P (insn) = 1;
5265 offset += UNITS_PER_WORD;
5266 }
5267 }
5268
5269 /* Expand prologue or epilogue stack adjustment.
5270 The pattern exist to put a dependency on all ebp-based memory accesses.
5271 STYLE should be negative if instructions should be marked as frame related,
5272 zero if %r11 register is live and cannot be freely used and positive
5273 otherwise. */
5274
5275 static void
5276 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5277 {
5278 rtx insn;
5279
5280 if (! TARGET_64BIT)
5281 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5282 else if (x86_64_immediate_operand (offset, DImode))
5283 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5284 else
5285 {
5286 rtx r11;
5287 /* r11 is used by indirect sibcall return as well, set before the
5288 epilogue and used after the epilogue. ATM indirect sibcall
5289 shouldn't be used together with huge frame sizes in one
5290 function because of the frame_size check in sibcall.c. */
5291 gcc_assert (style);
5292 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5293 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5294 if (style < 0)
5295 RTX_FRAME_RELATED_P (insn) = 1;
5296 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5297 offset));
5298 }
5299 if (style < 0)
5300 RTX_FRAME_RELATED_P (insn) = 1;
5301 }
5302
5303 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5304
5305 static rtx
5306 ix86_internal_arg_pointer (void)
5307 {
5308 bool has_force_align_arg_pointer =
5309 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5310 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5311 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5312 && DECL_NAME (current_function_decl)
5313 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5314 && DECL_FILE_SCOPE_P (current_function_decl))
5315 || ix86_force_align_arg_pointer
5316 || has_force_align_arg_pointer)
5317 {
5318 /* Nested functions can't realign the stack due to a register
5319 conflict. */
5320 if (DECL_CONTEXT (current_function_decl)
5321 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5322 {
5323 if (ix86_force_align_arg_pointer)
5324 warning (0, "-mstackrealign ignored for nested functions");
5325 if (has_force_align_arg_pointer)
5326 error ("%s not supported for nested functions",
5327 ix86_force_align_arg_pointer_string);
5328 return virtual_incoming_args_rtx;
5329 }
5330 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5331 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5332 }
5333 else
5334 return virtual_incoming_args_rtx;
5335 }
5336
5337 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5338 This is called from dwarf2out.c to emit call frame instructions
5339 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5340 static void
5341 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5342 {
5343 rtx unspec = SET_SRC (pattern);
5344 gcc_assert (GET_CODE (unspec) == UNSPEC);
5345
5346 switch (index)
5347 {
5348 case UNSPEC_REG_SAVE:
5349 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5350 SET_DEST (pattern));
5351 break;
5352 case UNSPEC_DEF_CFA:
5353 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5354 INTVAL (XVECEXP (unspec, 0, 0)));
5355 break;
5356 default:
5357 gcc_unreachable ();
5358 }
5359 }
5360
5361 /* Expand the prologue into a bunch of separate insns. */
5362
5363 void
5364 ix86_expand_prologue (void)
5365 {
5366 rtx insn;
5367 bool pic_reg_used;
5368 struct ix86_frame frame;
5369 HOST_WIDE_INT allocate;
5370
5371 ix86_compute_frame_layout (&frame);
5372
5373 if (cfun->machine->force_align_arg_pointer)
5374 {
5375 rtx x, y;
5376
5377 /* Grab the argument pointer. */
5378 x = plus_constant (stack_pointer_rtx, 4);
5379 y = cfun->machine->force_align_arg_pointer;
5380 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5381 RTX_FRAME_RELATED_P (insn) = 1;
5382
5383 /* The unwind info consists of two parts: install the fafp as the cfa,
5384 and record the fafp as the "save register" of the stack pointer.
5385 The later is there in order that the unwinder can see where it
5386 should restore the stack pointer across the and insn. */
5387 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5388 x = gen_rtx_SET (VOIDmode, y, x);
5389 RTX_FRAME_RELATED_P (x) = 1;
5390 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5391 UNSPEC_REG_SAVE);
5392 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5393 RTX_FRAME_RELATED_P (y) = 1;
5394 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5395 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5396 REG_NOTES (insn) = x;
5397
5398 /* Align the stack. */
5399 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5400 GEN_INT (-16)));
5401
5402 /* And here we cheat like madmen with the unwind info. We force the
5403 cfa register back to sp+4, which is exactly what it was at the
5404 start of the function. Re-pushing the return address results in
5405 the return at the same spot relative to the cfa, and thus is
5406 correct wrt the unwind info. */
5407 x = cfun->machine->force_align_arg_pointer;
5408 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5409 insn = emit_insn (gen_push (x));
5410 RTX_FRAME_RELATED_P (insn) = 1;
5411
5412 x = GEN_INT (4);
5413 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5414 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5415 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5416 REG_NOTES (insn) = x;
5417 }
5418
5419 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5420 slower on all targets. Also sdb doesn't like it. */
5421
5422 if (frame_pointer_needed)
5423 {
5424 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5425 RTX_FRAME_RELATED_P (insn) = 1;
5426
5427 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5428 RTX_FRAME_RELATED_P (insn) = 1;
5429 }
5430
5431 allocate = frame.to_allocate;
5432
5433 if (!frame.save_regs_using_mov)
5434 ix86_emit_save_regs ();
5435 else
5436 allocate += frame.nregs * UNITS_PER_WORD;
5437
5438 /* When using red zone we may start register saving before allocating
5439 the stack frame saving one cycle of the prologue. */
5440 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5441 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5442 : stack_pointer_rtx,
5443 -frame.nregs * UNITS_PER_WORD);
5444
5445 if (allocate == 0)
5446 ;
5447 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5448 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5449 GEN_INT (-allocate), -1);
5450 else
5451 {
5452 /* Only valid for Win32. */
5453 rtx eax = gen_rtx_REG (SImode, 0);
5454 bool eax_live = ix86_eax_live_at_start_p ();
5455 rtx t;
5456
5457 gcc_assert (!TARGET_64BIT);
5458
5459 if (eax_live)
5460 {
5461 emit_insn (gen_push (eax));
5462 allocate -= 4;
5463 }
5464
5465 emit_move_insn (eax, GEN_INT (allocate));
5466
5467 insn = emit_insn (gen_allocate_stack_worker (eax));
5468 RTX_FRAME_RELATED_P (insn) = 1;
5469 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5470 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5471 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5472 t, REG_NOTES (insn));
5473
5474 if (eax_live)
5475 {
5476 if (frame_pointer_needed)
5477 t = plus_constant (hard_frame_pointer_rtx,
5478 allocate
5479 - frame.to_allocate
5480 - frame.nregs * UNITS_PER_WORD);
5481 else
5482 t = plus_constant (stack_pointer_rtx, allocate);
5483 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5484 }
5485 }
5486
5487 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5488 {
5489 if (!frame_pointer_needed || !frame.to_allocate)
5490 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5491 else
5492 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5493 -frame.nregs * UNITS_PER_WORD);
5494 }
5495
5496 pic_reg_used = false;
5497 if (pic_offset_table_rtx
5498 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5499 || current_function_profile))
5500 {
5501 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5502
5503 if (alt_pic_reg_used != INVALID_REGNUM)
5504 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5505
5506 pic_reg_used = true;
5507 }
5508
5509 if (pic_reg_used)
5510 {
5511 if (TARGET_64BIT)
5512 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5513 else
5514 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5515
5516 /* Even with accurate pre-reload life analysis, we can wind up
5517 deleting all references to the pic register after reload.
5518 Consider if cross-jumping unifies two sides of a branch
5519 controlled by a comparison vs the only read from a global.
5520 In which case, allow the set_got to be deleted, though we're
5521 too late to do anything about the ebx save in the prologue. */
5522 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5523 }
5524
5525 /* Prevent function calls from be scheduled before the call to mcount.
5526 In the pic_reg_used case, make sure that the got load isn't deleted. */
5527 if (current_function_profile)
5528 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5529 }
5530
5531 /* Emit code to restore saved registers using MOV insns. First register
5532 is restored from POINTER + OFFSET. */
5533 static void
5534 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5535 int maybe_eh_return)
5536 {
5537 int regno;
5538 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5539
5540 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5541 if (ix86_save_reg (regno, maybe_eh_return))
5542 {
5543 /* Ensure that adjust_address won't be forced to produce pointer
5544 out of range allowed by x86-64 instruction set. */
5545 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5546 {
5547 rtx r11;
5548
5549 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5550 emit_move_insn (r11, GEN_INT (offset));
5551 emit_insn (gen_adddi3 (r11, r11, pointer));
5552 base_address = gen_rtx_MEM (Pmode, r11);
5553 offset = 0;
5554 }
5555 emit_move_insn (gen_rtx_REG (Pmode, regno),
5556 adjust_address (base_address, Pmode, offset));
5557 offset += UNITS_PER_WORD;
5558 }
5559 }
5560
5561 /* Restore function stack, frame, and registers. */
5562
5563 void
5564 ix86_expand_epilogue (int style)
5565 {
5566 int regno;
5567 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5568 struct ix86_frame frame;
5569 HOST_WIDE_INT offset;
5570
5571 ix86_compute_frame_layout (&frame);
5572
5573 /* Calculate start of saved registers relative to ebp. Special care
5574 must be taken for the normal return case of a function using
5575 eh_return: the eax and edx registers are marked as saved, but not
5576 restored along this path. */
5577 offset = frame.nregs;
5578 if (current_function_calls_eh_return && style != 2)
5579 offset -= 2;
5580 offset *= -UNITS_PER_WORD;
5581
5582 /* If we're only restoring one register and sp is not valid then
5583 using a move instruction to restore the register since it's
5584 less work than reloading sp and popping the register.
5585
5586 The default code result in stack adjustment using add/lea instruction,
5587 while this code results in LEAVE instruction (or discrete equivalent),
5588 so it is profitable in some other cases as well. Especially when there
5589 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5590 and there is exactly one register to pop. This heuristic may need some
5591 tuning in future. */
5592 if ((!sp_valid && frame.nregs <= 1)
5593 || (TARGET_EPILOGUE_USING_MOVE
5594 && cfun->machine->use_fast_prologue_epilogue
5595 && (frame.nregs > 1 || frame.to_allocate))
5596 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5597 || (frame_pointer_needed && TARGET_USE_LEAVE
5598 && cfun->machine->use_fast_prologue_epilogue
5599 && frame.nregs == 1)
5600 || current_function_calls_eh_return)
5601 {
5602 /* Restore registers. We can use ebp or esp to address the memory
5603 locations. If both are available, default to ebp, since offsets
5604 are known to be small. Only exception is esp pointing directly to the
5605 end of block of saved registers, where we may simplify addressing
5606 mode. */
5607
5608 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5609 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5610 frame.to_allocate, style == 2);
5611 else
5612 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5613 offset, style == 2);
5614
5615 /* eh_return epilogues need %ecx added to the stack pointer. */
5616 if (style == 2)
5617 {
5618 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5619
5620 if (frame_pointer_needed)
5621 {
5622 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5623 tmp = plus_constant (tmp, UNITS_PER_WORD);
5624 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5625
5626 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5627 emit_move_insn (hard_frame_pointer_rtx, tmp);
5628
5629 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5630 const0_rtx, style);
5631 }
5632 else
5633 {
5634 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5635 tmp = plus_constant (tmp, (frame.to_allocate
5636 + frame.nregs * UNITS_PER_WORD));
5637 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5638 }
5639 }
5640 else if (!frame_pointer_needed)
5641 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5642 GEN_INT (frame.to_allocate
5643 + frame.nregs * UNITS_PER_WORD),
5644 style);
5645 /* If not an i386, mov & pop is faster than "leave". */
5646 else if (TARGET_USE_LEAVE || optimize_size
5647 || !cfun->machine->use_fast_prologue_epilogue)
5648 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5649 else
5650 {
5651 pro_epilogue_adjust_stack (stack_pointer_rtx,
5652 hard_frame_pointer_rtx,
5653 const0_rtx, style);
5654 if (TARGET_64BIT)
5655 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5656 else
5657 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5658 }
5659 }
5660 else
5661 {
5662 /* First step is to deallocate the stack frame so that we can
5663 pop the registers. */
5664 if (!sp_valid)
5665 {
5666 gcc_assert (frame_pointer_needed);
5667 pro_epilogue_adjust_stack (stack_pointer_rtx,
5668 hard_frame_pointer_rtx,
5669 GEN_INT (offset), style);
5670 }
5671 else if (frame.to_allocate)
5672 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5673 GEN_INT (frame.to_allocate), style);
5674
5675 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5676 if (ix86_save_reg (regno, false))
5677 {
5678 if (TARGET_64BIT)
5679 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5680 else
5681 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5682 }
5683 if (frame_pointer_needed)
5684 {
5685 /* Leave results in shorter dependency chains on CPUs that are
5686 able to grok it fast. */
5687 if (TARGET_USE_LEAVE)
5688 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5689 else if (TARGET_64BIT)
5690 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5691 else
5692 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5693 }
5694 }
5695
5696 if (cfun->machine->force_align_arg_pointer)
5697 {
5698 emit_insn (gen_addsi3 (stack_pointer_rtx,
5699 cfun->machine->force_align_arg_pointer,
5700 GEN_INT (-4)));
5701 }
5702
5703 /* Sibcall epilogues don't want a return instruction. */
5704 if (style == 0)
5705 return;
5706
5707 if (current_function_pops_args && current_function_args_size)
5708 {
5709 rtx popc = GEN_INT (current_function_pops_args);
5710
5711 /* i386 can only pop 64K bytes. If asked to pop more, pop
5712 return address, do explicit add, and jump indirectly to the
5713 caller. */
5714
5715 if (current_function_pops_args >= 65536)
5716 {
5717 rtx ecx = gen_rtx_REG (SImode, 2);
5718
5719 /* There is no "pascal" calling convention in 64bit ABI. */
5720 gcc_assert (!TARGET_64BIT);
5721
5722 emit_insn (gen_popsi1 (ecx));
5723 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5724 emit_jump_insn (gen_return_indirect_internal (ecx));
5725 }
5726 else
5727 emit_jump_insn (gen_return_pop_internal (popc));
5728 }
5729 else
5730 emit_jump_insn (gen_return_internal ());
5731 }
5732
5733 /* Reset from the function's potential modifications. */
5734
5735 static void
5736 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5737 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5738 {
5739 if (pic_offset_table_rtx)
5740 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5741 #if TARGET_MACHO
5742 /* Mach-O doesn't support labels at the end of objects, so if
5743 it looks like we might want one, insert a NOP. */
5744 {
5745 rtx insn = get_last_insn ();
5746 while (insn
5747 && NOTE_P (insn)
5748 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
5749 insn = PREV_INSN (insn);
5750 if (insn
5751 && (LABEL_P (insn)
5752 || (NOTE_P (insn)
5753 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
5754 fputs ("\tnop\n", file);
5755 }
5756 #endif
5757
5758 }
5759 \f
5760 /* Extract the parts of an RTL expression that is a valid memory address
5761 for an instruction. Return 0 if the structure of the address is
5762 grossly off. Return -1 if the address contains ASHIFT, so it is not
5763 strictly valid, but still used for computing length of lea instruction. */
5764
5765 int
5766 ix86_decompose_address (rtx addr, struct ix86_address *out)
5767 {
5768 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
5769 rtx base_reg, index_reg;
5770 HOST_WIDE_INT scale = 1;
5771 rtx scale_rtx = NULL_RTX;
5772 int retval = 1;
5773 enum ix86_address_seg seg = SEG_DEFAULT;
5774
5775 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5776 base = addr;
5777 else if (GET_CODE (addr) == PLUS)
5778 {
5779 rtx addends[4], op;
5780 int n = 0, i;
5781
5782 op = addr;
5783 do
5784 {
5785 if (n >= 4)
5786 return 0;
5787 addends[n++] = XEXP (op, 1);
5788 op = XEXP (op, 0);
5789 }
5790 while (GET_CODE (op) == PLUS);
5791 if (n >= 4)
5792 return 0;
5793 addends[n] = op;
5794
5795 for (i = n; i >= 0; --i)
5796 {
5797 op = addends[i];
5798 switch (GET_CODE (op))
5799 {
5800 case MULT:
5801 if (index)
5802 return 0;
5803 index = XEXP (op, 0);
5804 scale_rtx = XEXP (op, 1);
5805 break;
5806
5807 case UNSPEC:
5808 if (XINT (op, 1) == UNSPEC_TP
5809 && TARGET_TLS_DIRECT_SEG_REFS
5810 && seg == SEG_DEFAULT)
5811 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5812 else
5813 return 0;
5814 break;
5815
5816 case REG:
5817 case SUBREG:
5818 if (!base)
5819 base = op;
5820 else if (!index)
5821 index = op;
5822 else
5823 return 0;
5824 break;
5825
5826 case CONST:
5827 case CONST_INT:
5828 case SYMBOL_REF:
5829 case LABEL_REF:
5830 if (disp)
5831 return 0;
5832 disp = op;
5833 break;
5834
5835 default:
5836 return 0;
5837 }
5838 }
5839 }
5840 else if (GET_CODE (addr) == MULT)
5841 {
5842 index = XEXP (addr, 0); /* index*scale */
5843 scale_rtx = XEXP (addr, 1);
5844 }
5845 else if (GET_CODE (addr) == ASHIFT)
5846 {
5847 rtx tmp;
5848
5849 /* We're called for lea too, which implements ashift on occasion. */
5850 index = XEXP (addr, 0);
5851 tmp = XEXP (addr, 1);
5852 if (GET_CODE (tmp) != CONST_INT)
5853 return 0;
5854 scale = INTVAL (tmp);
5855 if ((unsigned HOST_WIDE_INT) scale > 3)
5856 return 0;
5857 scale = 1 << scale;
5858 retval = -1;
5859 }
5860 else
5861 disp = addr; /* displacement */
5862
5863 /* Extract the integral value of scale. */
5864 if (scale_rtx)
5865 {
5866 if (GET_CODE (scale_rtx) != CONST_INT)
5867 return 0;
5868 scale = INTVAL (scale_rtx);
5869 }
5870
5871 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
5872 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
5873
5874 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5875 if (base_reg && index_reg && scale == 1
5876 && (index_reg == arg_pointer_rtx
5877 || index_reg == frame_pointer_rtx
5878 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
5879 {
5880 rtx tmp;
5881 tmp = base, base = index, index = tmp;
5882 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
5883 }
5884
5885 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5886 if ((base_reg == hard_frame_pointer_rtx
5887 || base_reg == frame_pointer_rtx
5888 || base_reg == arg_pointer_rtx) && !disp)
5889 disp = const0_rtx;
5890
5891 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5892 Avoid this by transforming to [%esi+0]. */
5893 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5894 && base_reg && !index_reg && !disp
5895 && REG_P (base_reg)
5896 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
5897 disp = const0_rtx;
5898
5899 /* Special case: encode reg+reg instead of reg*2. */
5900 if (!base && index && scale && scale == 2)
5901 base = index, base_reg = index_reg, scale = 1;
5902
5903 /* Special case: scaling cannot be encoded without base or displacement. */
5904 if (!base && !disp && index && scale != 1)
5905 disp = const0_rtx;
5906
5907 out->base = base;
5908 out->index = index;
5909 out->disp = disp;
5910 out->scale = scale;
5911 out->seg = seg;
5912
5913 return retval;
5914 }
5915 \f
5916 /* Return cost of the memory address x.
5917 For i386, it is better to use a complex address than let gcc copy
5918 the address into a reg and make a new pseudo. But not if the address
5919 requires to two regs - that would mean more pseudos with longer
5920 lifetimes. */
5921 static int
5922 ix86_address_cost (rtx x)
5923 {
5924 struct ix86_address parts;
5925 int cost = 1;
5926 int ok = ix86_decompose_address (x, &parts);
5927
5928 gcc_assert (ok);
5929
5930 if (parts.base && GET_CODE (parts.base) == SUBREG)
5931 parts.base = SUBREG_REG (parts.base);
5932 if (parts.index && GET_CODE (parts.index) == SUBREG)
5933 parts.index = SUBREG_REG (parts.index);
5934
5935 /* More complex memory references are better. */
5936 if (parts.disp && parts.disp != const0_rtx)
5937 cost--;
5938 if (parts.seg != SEG_DEFAULT)
5939 cost--;
5940
5941 /* Attempt to minimize number of registers in the address. */
5942 if ((parts.base
5943 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5944 || (parts.index
5945 && (!REG_P (parts.index)
5946 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5947 cost++;
5948
5949 if (parts.base
5950 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5951 && parts.index
5952 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5953 && parts.base != parts.index)
5954 cost++;
5955
5956 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5957 since it's predecode logic can't detect the length of instructions
5958 and it degenerates to vector decoded. Increase cost of such
5959 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5960 to split such addresses or even refuse such addresses at all.
5961
5962 Following addressing modes are affected:
5963 [base+scale*index]
5964 [scale*index+disp]
5965 [base+index]
5966
5967 The first and last case may be avoidable by explicitly coding the zero in
5968 memory address, but I don't have AMD-K6 machine handy to check this
5969 theory. */
5970
5971 if (TARGET_K6
5972 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5973 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5974 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5975 cost += 10;
5976
5977 return cost;
5978 }
5979 \f
5980 /* If X is a machine specific address (i.e. a symbol or label being
5981 referenced as a displacement from the GOT implemented using an
5982 UNSPEC), then return the base term. Otherwise return X. */
5983
5984 rtx
5985 ix86_find_base_term (rtx x)
5986 {
5987 rtx term;
5988
5989 if (TARGET_64BIT)
5990 {
5991 if (GET_CODE (x) != CONST)
5992 return x;
5993 term = XEXP (x, 0);
5994 if (GET_CODE (term) == PLUS
5995 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5996 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5997 term = XEXP (term, 0);
5998 if (GET_CODE (term) != UNSPEC
5999 || XINT (term, 1) != UNSPEC_GOTPCREL)
6000 return x;
6001
6002 term = XVECEXP (term, 0, 0);
6003
6004 if (GET_CODE (term) != SYMBOL_REF
6005 && GET_CODE (term) != LABEL_REF)
6006 return x;
6007
6008 return term;
6009 }
6010
6011 term = ix86_delegitimize_address (x);
6012
6013 if (GET_CODE (term) != SYMBOL_REF
6014 && GET_CODE (term) != LABEL_REF)
6015 return x;
6016
6017 return term;
6018 }
6019
6020 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6021 this is used for to form addresses to local data when -fPIC is in
6022 use. */
6023
6024 static bool
6025 darwin_local_data_pic (rtx disp)
6026 {
6027 if (GET_CODE (disp) == MINUS)
6028 {
6029 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6030 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6031 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6032 {
6033 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6034 if (! strcmp (sym_name, "<pic base>"))
6035 return true;
6036 }
6037 }
6038
6039 return false;
6040 }
6041 \f
6042 /* Determine if a given RTX is a valid constant. We already know this
6043 satisfies CONSTANT_P. */
6044
6045 bool
6046 legitimate_constant_p (rtx x)
6047 {
6048 switch (GET_CODE (x))
6049 {
6050 case CONST:
6051 x = XEXP (x, 0);
6052
6053 if (GET_CODE (x) == PLUS)
6054 {
6055 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6056 return false;
6057 x = XEXP (x, 0);
6058 }
6059
6060 if (TARGET_MACHO && darwin_local_data_pic (x))
6061 return true;
6062
6063 /* Only some unspecs are valid as "constants". */
6064 if (GET_CODE (x) == UNSPEC)
6065 switch (XINT (x, 1))
6066 {
6067 case UNSPEC_GOTOFF:
6068 return TARGET_64BIT;
6069 case UNSPEC_TPOFF:
6070 case UNSPEC_NTPOFF:
6071 x = XVECEXP (x, 0, 0);
6072 return (GET_CODE (x) == SYMBOL_REF
6073 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6074 case UNSPEC_DTPOFF:
6075 x = XVECEXP (x, 0, 0);
6076 return (GET_CODE (x) == SYMBOL_REF
6077 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6078 default:
6079 return false;
6080 }
6081
6082 /* We must have drilled down to a symbol. */
6083 if (GET_CODE (x) == LABEL_REF)
6084 return true;
6085 if (GET_CODE (x) != SYMBOL_REF)
6086 return false;
6087 /* FALLTHRU */
6088
6089 case SYMBOL_REF:
6090 /* TLS symbols are never valid. */
6091 if (SYMBOL_REF_TLS_MODEL (x))
6092 return false;
6093 break;
6094
6095 case CONST_DOUBLE:
6096 if (GET_MODE (x) == TImode
6097 && x != CONST0_RTX (TImode)
6098 && !TARGET_64BIT)
6099 return false;
6100 break;
6101
6102 case CONST_VECTOR:
6103 if (x == CONST0_RTX (GET_MODE (x)))
6104 return true;
6105 return false;
6106
6107 default:
6108 break;
6109 }
6110
6111 /* Otherwise we handle everything else in the move patterns. */
6112 return true;
6113 }
6114
6115 /* Determine if it's legal to put X into the constant pool. This
6116 is not possible for the address of thread-local symbols, which
6117 is checked above. */
6118
6119 static bool
6120 ix86_cannot_force_const_mem (rtx x)
6121 {
6122 /* We can always put integral constants and vectors in memory. */
6123 switch (GET_CODE (x))
6124 {
6125 case CONST_INT:
6126 case CONST_DOUBLE:
6127 case CONST_VECTOR:
6128 return false;
6129
6130 default:
6131 break;
6132 }
6133 return !legitimate_constant_p (x);
6134 }
6135
6136 /* Determine if a given RTX is a valid constant address. */
6137
6138 bool
6139 constant_address_p (rtx x)
6140 {
6141 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6142 }
6143
6144 /* Nonzero if the constant value X is a legitimate general operand
6145 when generating PIC code. It is given that flag_pic is on and
6146 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6147
6148 bool
6149 legitimate_pic_operand_p (rtx x)
6150 {
6151 rtx inner;
6152
6153 switch (GET_CODE (x))
6154 {
6155 case CONST:
6156 inner = XEXP (x, 0);
6157 if (GET_CODE (inner) == PLUS
6158 && GET_CODE (XEXP (inner, 1)) == CONST_INT)
6159 inner = XEXP (inner, 0);
6160
6161 /* Only some unspecs are valid as "constants". */
6162 if (GET_CODE (inner) == UNSPEC)
6163 switch (XINT (inner, 1))
6164 {
6165 case UNSPEC_GOTOFF:
6166 return TARGET_64BIT;
6167 case UNSPEC_TPOFF:
6168 x = XVECEXP (inner, 0, 0);
6169 return (GET_CODE (x) == SYMBOL_REF
6170 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6171 default:
6172 return false;
6173 }
6174 /* FALLTHRU */
6175
6176 case SYMBOL_REF:
6177 case LABEL_REF:
6178 return legitimate_pic_address_disp_p (x);
6179
6180 default:
6181 return true;
6182 }
6183 }
6184
6185 /* Determine if a given CONST RTX is a valid memory displacement
6186 in PIC mode. */
6187
6188 int
6189 legitimate_pic_address_disp_p (rtx disp)
6190 {
6191 bool saw_plus;
6192
6193 /* In 64bit mode we can allow direct addresses of symbols and labels
6194 when they are not dynamic symbols. */
6195 if (TARGET_64BIT)
6196 {
6197 rtx op0 = disp, op1;
6198
6199 switch (GET_CODE (disp))
6200 {
6201 case LABEL_REF:
6202 return true;
6203
6204 case CONST:
6205 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6206 break;
6207 op0 = XEXP (XEXP (disp, 0), 0);
6208 op1 = XEXP (XEXP (disp, 0), 1);
6209 if (GET_CODE (op1) != CONST_INT
6210 || INTVAL (op1) >= 16*1024*1024
6211 || INTVAL (op1) < -16*1024*1024)
6212 break;
6213 if (GET_CODE (op0) == LABEL_REF)
6214 return true;
6215 if (GET_CODE (op0) != SYMBOL_REF)
6216 break;
6217 /* FALLTHRU */
6218
6219 case SYMBOL_REF:
6220 /* TLS references should always be enclosed in UNSPEC. */
6221 if (SYMBOL_REF_TLS_MODEL (op0))
6222 return false;
6223 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
6224 return true;
6225 break;
6226
6227 default:
6228 break;
6229 }
6230 }
6231 if (GET_CODE (disp) != CONST)
6232 return 0;
6233 disp = XEXP (disp, 0);
6234
6235 if (TARGET_64BIT)
6236 {
6237 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6238 of GOT tables. We should not need these anyway. */
6239 if (GET_CODE (disp) != UNSPEC
6240 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6241 && XINT (disp, 1) != UNSPEC_GOTOFF))
6242 return 0;
6243
6244 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6245 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6246 return 0;
6247 return 1;
6248 }
6249
6250 saw_plus = false;
6251 if (GET_CODE (disp) == PLUS)
6252 {
6253 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6254 return 0;
6255 disp = XEXP (disp, 0);
6256 saw_plus = true;
6257 }
6258
6259 if (TARGET_MACHO && darwin_local_data_pic (disp))
6260 return 1;
6261
6262 if (GET_CODE (disp) != UNSPEC)
6263 return 0;
6264
6265 switch (XINT (disp, 1))
6266 {
6267 case UNSPEC_GOT:
6268 if (saw_plus)
6269 return false;
6270 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6271 case UNSPEC_GOTOFF:
6272 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6273 While ABI specify also 32bit relocation but we don't produce it in
6274 small PIC model at all. */
6275 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6276 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6277 && !TARGET_64BIT)
6278 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6279 return false;
6280 case UNSPEC_GOTTPOFF:
6281 case UNSPEC_GOTNTPOFF:
6282 case UNSPEC_INDNTPOFF:
6283 if (saw_plus)
6284 return false;
6285 disp = XVECEXP (disp, 0, 0);
6286 return (GET_CODE (disp) == SYMBOL_REF
6287 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6288 case UNSPEC_NTPOFF:
6289 disp = XVECEXP (disp, 0, 0);
6290 return (GET_CODE (disp) == SYMBOL_REF
6291 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6292 case UNSPEC_DTPOFF:
6293 disp = XVECEXP (disp, 0, 0);
6294 return (GET_CODE (disp) == SYMBOL_REF
6295 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6296 }
6297
6298 return 0;
6299 }
6300
6301 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6302 memory address for an instruction. The MODE argument is the machine mode
6303 for the MEM expression that wants to use this address.
6304
6305 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6306 convert common non-canonical forms to canonical form so that they will
6307 be recognized. */
6308
6309 int
6310 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6311 {
6312 struct ix86_address parts;
6313 rtx base, index, disp;
6314 HOST_WIDE_INT scale;
6315 const char *reason = NULL;
6316 rtx reason_rtx = NULL_RTX;
6317
6318 if (TARGET_DEBUG_ADDR)
6319 {
6320 fprintf (stderr,
6321 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6322 GET_MODE_NAME (mode), strict);
6323 debug_rtx (addr);
6324 }
6325
6326 if (ix86_decompose_address (addr, &parts) <= 0)
6327 {
6328 reason = "decomposition failed";
6329 goto report_error;
6330 }
6331
6332 base = parts.base;
6333 index = parts.index;
6334 disp = parts.disp;
6335 scale = parts.scale;
6336
6337 /* Validate base register.
6338
6339 Don't allow SUBREG's that span more than a word here. It can lead to spill
6340 failures when the base is one word out of a two word structure, which is
6341 represented internally as a DImode int. */
6342
6343 if (base)
6344 {
6345 rtx reg;
6346 reason_rtx = base;
6347
6348 if (REG_P (base))
6349 reg = base;
6350 else if (GET_CODE (base) == SUBREG
6351 && REG_P (SUBREG_REG (base))
6352 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6353 <= UNITS_PER_WORD)
6354 reg = SUBREG_REG (base);
6355 else
6356 {
6357 reason = "base is not a register";
6358 goto report_error;
6359 }
6360
6361 if (GET_MODE (base) != Pmode)
6362 {
6363 reason = "base is not in Pmode";
6364 goto report_error;
6365 }
6366
6367 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6368 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6369 {
6370 reason = "base is not valid";
6371 goto report_error;
6372 }
6373 }
6374
6375 /* Validate index register.
6376
6377 Don't allow SUBREG's that span more than a word here -- same as above. */
6378
6379 if (index)
6380 {
6381 rtx reg;
6382 reason_rtx = index;
6383
6384 if (REG_P (index))
6385 reg = index;
6386 else if (GET_CODE (index) == SUBREG
6387 && REG_P (SUBREG_REG (index))
6388 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6389 <= UNITS_PER_WORD)
6390 reg = SUBREG_REG (index);
6391 else
6392 {
6393 reason = "index is not a register";
6394 goto report_error;
6395 }
6396
6397 if (GET_MODE (index) != Pmode)
6398 {
6399 reason = "index is not in Pmode";
6400 goto report_error;
6401 }
6402
6403 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6404 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6405 {
6406 reason = "index is not valid";
6407 goto report_error;
6408 }
6409 }
6410
6411 /* Validate scale factor. */
6412 if (scale != 1)
6413 {
6414 reason_rtx = GEN_INT (scale);
6415 if (!index)
6416 {
6417 reason = "scale without index";
6418 goto report_error;
6419 }
6420
6421 if (scale != 2 && scale != 4 && scale != 8)
6422 {
6423 reason = "scale is not a valid multiplier";
6424 goto report_error;
6425 }
6426 }
6427
6428 /* Validate displacement. */
6429 if (disp)
6430 {
6431 reason_rtx = disp;
6432
6433 if (GET_CODE (disp) == CONST
6434 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6435 switch (XINT (XEXP (disp, 0), 1))
6436 {
6437 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6438 used. While ABI specify also 32bit relocations, we don't produce
6439 them at all and use IP relative instead. */
6440 case UNSPEC_GOT:
6441 case UNSPEC_GOTOFF:
6442 gcc_assert (flag_pic);
6443 if (!TARGET_64BIT)
6444 goto is_legitimate_pic;
6445 reason = "64bit address unspec";
6446 goto report_error;
6447
6448 case UNSPEC_GOTPCREL:
6449 gcc_assert (flag_pic);
6450 goto is_legitimate_pic;
6451
6452 case UNSPEC_GOTTPOFF:
6453 case UNSPEC_GOTNTPOFF:
6454 case UNSPEC_INDNTPOFF:
6455 case UNSPEC_NTPOFF:
6456 case UNSPEC_DTPOFF:
6457 break;
6458
6459 default:
6460 reason = "invalid address unspec";
6461 goto report_error;
6462 }
6463
6464 else if (SYMBOLIC_CONST (disp)
6465 && (flag_pic
6466 || (TARGET_MACHO
6467 #if TARGET_MACHO
6468 && MACHOPIC_INDIRECT
6469 && !machopic_operand_p (disp)
6470 #endif
6471 )))
6472 {
6473
6474 is_legitimate_pic:
6475 if (TARGET_64BIT && (index || base))
6476 {
6477 /* foo@dtpoff(%rX) is ok. */
6478 if (GET_CODE (disp) != CONST
6479 || GET_CODE (XEXP (disp, 0)) != PLUS
6480 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6481 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6482 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6483 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6484 {
6485 reason = "non-constant pic memory reference";
6486 goto report_error;
6487 }
6488 }
6489 else if (! legitimate_pic_address_disp_p (disp))
6490 {
6491 reason = "displacement is an invalid pic construct";
6492 goto report_error;
6493 }
6494
6495 /* This code used to verify that a symbolic pic displacement
6496 includes the pic_offset_table_rtx register.
6497
6498 While this is good idea, unfortunately these constructs may
6499 be created by "adds using lea" optimization for incorrect
6500 code like:
6501
6502 int a;
6503 int foo(int i)
6504 {
6505 return *(&a+i);
6506 }
6507
6508 This code is nonsensical, but results in addressing
6509 GOT table with pic_offset_table_rtx base. We can't
6510 just refuse it easily, since it gets matched by
6511 "addsi3" pattern, that later gets split to lea in the
6512 case output register differs from input. While this
6513 can be handled by separate addsi pattern for this case
6514 that never results in lea, this seems to be easier and
6515 correct fix for crash to disable this test. */
6516 }
6517 else if (GET_CODE (disp) != LABEL_REF
6518 && GET_CODE (disp) != CONST_INT
6519 && (GET_CODE (disp) != CONST
6520 || !legitimate_constant_p (disp))
6521 && (GET_CODE (disp) != SYMBOL_REF
6522 || !legitimate_constant_p (disp)))
6523 {
6524 reason = "displacement is not constant";
6525 goto report_error;
6526 }
6527 else if (TARGET_64BIT
6528 && !x86_64_immediate_operand (disp, VOIDmode))
6529 {
6530 reason = "displacement is out of range";
6531 goto report_error;
6532 }
6533 }
6534
6535 /* Everything looks valid. */
6536 if (TARGET_DEBUG_ADDR)
6537 fprintf (stderr, "Success.\n");
6538 return TRUE;
6539
6540 report_error:
6541 if (TARGET_DEBUG_ADDR)
6542 {
6543 fprintf (stderr, "Error: %s\n", reason);
6544 debug_rtx (reason_rtx);
6545 }
6546 return FALSE;
6547 }
6548 \f
6549 /* Return a unique alias set for the GOT. */
6550
6551 static HOST_WIDE_INT
6552 ix86_GOT_alias_set (void)
6553 {
6554 static HOST_WIDE_INT set = -1;
6555 if (set == -1)
6556 set = new_alias_set ();
6557 return set;
6558 }
6559
6560 /* Return a legitimate reference for ORIG (an address) using the
6561 register REG. If REG is 0, a new pseudo is generated.
6562
6563 There are two types of references that must be handled:
6564
6565 1. Global data references must load the address from the GOT, via
6566 the PIC reg. An insn is emitted to do this load, and the reg is
6567 returned.
6568
6569 2. Static data references, constant pool addresses, and code labels
6570 compute the address as an offset from the GOT, whose base is in
6571 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6572 differentiate them from global data objects. The returned
6573 address is the PIC reg + an unspec constant.
6574
6575 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6576 reg also appears in the address. */
6577
6578 static rtx
6579 legitimize_pic_address (rtx orig, rtx reg)
6580 {
6581 rtx addr = orig;
6582 rtx new = orig;
6583 rtx base;
6584
6585 #if TARGET_MACHO
6586 if (TARGET_MACHO && !TARGET_64BIT)
6587 {
6588 if (reg == 0)
6589 reg = gen_reg_rtx (Pmode);
6590 /* Use the generic Mach-O PIC machinery. */
6591 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6592 }
6593 #endif
6594
6595 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6596 new = addr;
6597 else if (TARGET_64BIT
6598 && ix86_cmodel != CM_SMALL_PIC
6599 && local_symbolic_operand (addr, Pmode))
6600 {
6601 rtx tmpreg;
6602 /* This symbol may be referenced via a displacement from the PIC
6603 base address (@GOTOFF). */
6604
6605 if (reload_in_progress)
6606 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6607 if (GET_CODE (addr) == CONST)
6608 addr = XEXP (addr, 0);
6609 if (GET_CODE (addr) == PLUS)
6610 {
6611 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6612 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6613 }
6614 else
6615 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6616 new = gen_rtx_CONST (Pmode, new);
6617 if (!reg)
6618 tmpreg = gen_reg_rtx (Pmode);
6619 else
6620 tmpreg = reg;
6621 emit_move_insn (tmpreg, new);
6622
6623 if (reg != 0)
6624 {
6625 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6626 tmpreg, 1, OPTAB_DIRECT);
6627 new = reg;
6628 }
6629 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6630 }
6631 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6632 {
6633 /* This symbol may be referenced via a displacement from the PIC
6634 base address (@GOTOFF). */
6635
6636 if (reload_in_progress)
6637 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6638 if (GET_CODE (addr) == CONST)
6639 addr = XEXP (addr, 0);
6640 if (GET_CODE (addr) == PLUS)
6641 {
6642 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6643 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6644 }
6645 else
6646 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6647 new = gen_rtx_CONST (Pmode, new);
6648 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6649
6650 if (reg != 0)
6651 {
6652 emit_move_insn (reg, new);
6653 new = reg;
6654 }
6655 }
6656 else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
6657 {
6658 if (TARGET_64BIT)
6659 {
6660 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6661 new = gen_rtx_CONST (Pmode, new);
6662 new = gen_const_mem (Pmode, new);
6663 set_mem_alias_set (new, ix86_GOT_alias_set ());
6664
6665 if (reg == 0)
6666 reg = gen_reg_rtx (Pmode);
6667 /* Use directly gen_movsi, otherwise the address is loaded
6668 into register for CSE. We don't want to CSE this addresses,
6669 instead we CSE addresses from the GOT table, so skip this. */
6670 emit_insn (gen_movsi (reg, new));
6671 new = reg;
6672 }
6673 else
6674 {
6675 /* This symbol must be referenced via a load from the
6676 Global Offset Table (@GOT). */
6677
6678 if (reload_in_progress)
6679 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6680 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6681 new = gen_rtx_CONST (Pmode, new);
6682 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6683 new = gen_const_mem (Pmode, new);
6684 set_mem_alias_set (new, ix86_GOT_alias_set ());
6685
6686 if (reg == 0)
6687 reg = gen_reg_rtx (Pmode);
6688 emit_move_insn (reg, new);
6689 new = reg;
6690 }
6691 }
6692 else
6693 {
6694 if (GET_CODE (addr) == CONST_INT
6695 && !x86_64_immediate_operand (addr, VOIDmode))
6696 {
6697 if (reg)
6698 {
6699 emit_move_insn (reg, addr);
6700 new = reg;
6701 }
6702 else
6703 new = force_reg (Pmode, addr);
6704 }
6705 else if (GET_CODE (addr) == CONST)
6706 {
6707 addr = XEXP (addr, 0);
6708
6709 /* We must match stuff we generate before. Assume the only
6710 unspecs that can get here are ours. Not that we could do
6711 anything with them anyway.... */
6712 if (GET_CODE (addr) == UNSPEC
6713 || (GET_CODE (addr) == PLUS
6714 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6715 return orig;
6716 gcc_assert (GET_CODE (addr) == PLUS);
6717 }
6718 if (GET_CODE (addr) == PLUS)
6719 {
6720 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6721
6722 /* Check first to see if this is a constant offset from a @GOTOFF
6723 symbol reference. */
6724 if (local_symbolic_operand (op0, Pmode)
6725 && GET_CODE (op1) == CONST_INT)
6726 {
6727 if (!TARGET_64BIT)
6728 {
6729 if (reload_in_progress)
6730 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6731 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6732 UNSPEC_GOTOFF);
6733 new = gen_rtx_PLUS (Pmode, new, op1);
6734 new = gen_rtx_CONST (Pmode, new);
6735 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6736
6737 if (reg != 0)
6738 {
6739 emit_move_insn (reg, new);
6740 new = reg;
6741 }
6742 }
6743 else
6744 {
6745 if (INTVAL (op1) < -16*1024*1024
6746 || INTVAL (op1) >= 16*1024*1024)
6747 {
6748 if (!x86_64_immediate_operand (op1, Pmode))
6749 op1 = force_reg (Pmode, op1);
6750 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
6751 }
6752 }
6753 }
6754 else
6755 {
6756 base = legitimize_pic_address (XEXP (addr, 0), reg);
6757 new = legitimize_pic_address (XEXP (addr, 1),
6758 base == reg ? NULL_RTX : reg);
6759
6760 if (GET_CODE (new) == CONST_INT)
6761 new = plus_constant (base, INTVAL (new));
6762 else
6763 {
6764 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6765 {
6766 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6767 new = XEXP (new, 1);
6768 }
6769 new = gen_rtx_PLUS (Pmode, base, new);
6770 }
6771 }
6772 }
6773 }
6774 return new;
6775 }
6776 \f
6777 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6778
6779 static rtx
6780 get_thread_pointer (int to_reg)
6781 {
6782 rtx tp, reg, insn;
6783
6784 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6785 if (!to_reg)
6786 return tp;
6787
6788 reg = gen_reg_rtx (Pmode);
6789 insn = gen_rtx_SET (VOIDmode, reg, tp);
6790 insn = emit_insn (insn);
6791
6792 return reg;
6793 }
6794
6795 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6796 false if we expect this to be used for a memory address and true if
6797 we expect to load the address into a register. */
6798
6799 static rtx
6800 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6801 {
6802 rtx dest, base, off, pic, tp;
6803 int type;
6804
6805 switch (model)
6806 {
6807 case TLS_MODEL_GLOBAL_DYNAMIC:
6808 dest = gen_reg_rtx (Pmode);
6809 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6810
6811 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6812 {
6813 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6814
6815 start_sequence ();
6816 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6817 insns = get_insns ();
6818 end_sequence ();
6819
6820 emit_libcall_block (insns, dest, rax, x);
6821 }
6822 else if (TARGET_64BIT && TARGET_GNU2_TLS)
6823 emit_insn (gen_tls_global_dynamic_64 (dest, x));
6824 else
6825 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6826
6827 if (TARGET_GNU2_TLS)
6828 {
6829 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
6830
6831 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6832 }
6833 break;
6834
6835 case TLS_MODEL_LOCAL_DYNAMIC:
6836 base = gen_reg_rtx (Pmode);
6837 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6838
6839 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6840 {
6841 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6842
6843 start_sequence ();
6844 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6845 insns = get_insns ();
6846 end_sequence ();
6847
6848 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6849 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6850 emit_libcall_block (insns, base, rax, note);
6851 }
6852 else if (TARGET_64BIT && TARGET_GNU2_TLS)
6853 emit_insn (gen_tls_local_dynamic_base_64 (base));
6854 else
6855 emit_insn (gen_tls_local_dynamic_base_32 (base));
6856
6857 if (TARGET_GNU2_TLS)
6858 {
6859 rtx x = ix86_tls_module_base ();
6860
6861 set_unique_reg_note (get_last_insn (), REG_EQUIV,
6862 gen_rtx_MINUS (Pmode, x, tp));
6863 }
6864
6865 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6866 off = gen_rtx_CONST (Pmode, off);
6867
6868 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
6869
6870 if (TARGET_GNU2_TLS)
6871 {
6872 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
6873
6874 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6875 }
6876
6877 break;
6878
6879 case TLS_MODEL_INITIAL_EXEC:
6880 if (TARGET_64BIT)
6881 {
6882 pic = NULL;
6883 type = UNSPEC_GOTNTPOFF;
6884 }
6885 else if (flag_pic)
6886 {
6887 if (reload_in_progress)
6888 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6889 pic = pic_offset_table_rtx;
6890 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6891 }
6892 else if (!TARGET_ANY_GNU_TLS)
6893 {
6894 pic = gen_reg_rtx (Pmode);
6895 emit_insn (gen_set_got (pic));
6896 type = UNSPEC_GOTTPOFF;
6897 }
6898 else
6899 {
6900 pic = NULL;
6901 type = UNSPEC_INDNTPOFF;
6902 }
6903
6904 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6905 off = gen_rtx_CONST (Pmode, off);
6906 if (pic)
6907 off = gen_rtx_PLUS (Pmode, pic, off);
6908 off = gen_const_mem (Pmode, off);
6909 set_mem_alias_set (off, ix86_GOT_alias_set ());
6910
6911 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6912 {
6913 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6914 off = force_reg (Pmode, off);
6915 return gen_rtx_PLUS (Pmode, base, off);
6916 }
6917 else
6918 {
6919 base = get_thread_pointer (true);
6920 dest = gen_reg_rtx (Pmode);
6921 emit_insn (gen_subsi3 (dest, base, off));
6922 }
6923 break;
6924
6925 case TLS_MODEL_LOCAL_EXEC:
6926 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6927 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6928 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6929 off = gen_rtx_CONST (Pmode, off);
6930
6931 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6932 {
6933 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6934 return gen_rtx_PLUS (Pmode, base, off);
6935 }
6936 else
6937 {
6938 base = get_thread_pointer (true);
6939 dest = gen_reg_rtx (Pmode);
6940 emit_insn (gen_subsi3 (dest, base, off));
6941 }
6942 break;
6943
6944 default:
6945 gcc_unreachable ();
6946 }
6947
6948 return dest;
6949 }
6950
6951 /* Try machine-dependent ways of modifying an illegitimate address
6952 to be legitimate. If we find one, return the new, valid address.
6953 This macro is used in only one place: `memory_address' in explow.c.
6954
6955 OLDX is the address as it was before break_out_memory_refs was called.
6956 In some cases it is useful to look at this to decide what needs to be done.
6957
6958 MODE and WIN are passed so that this macro can use
6959 GO_IF_LEGITIMATE_ADDRESS.
6960
6961 It is always safe for this macro to do nothing. It exists to recognize
6962 opportunities to optimize the output.
6963
6964 For the 80386, we handle X+REG by loading X into a register R and
6965 using R+REG. R will go in a general reg and indexing will be used.
6966 However, if REG is a broken-out memory address or multiplication,
6967 nothing needs to be done because REG can certainly go in a general reg.
6968
6969 When -fpic is used, special handling is needed for symbolic references.
6970 See comments by legitimize_pic_address in i386.c for details. */
6971
6972 rtx
6973 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6974 {
6975 int changed = 0;
6976 unsigned log;
6977
6978 if (TARGET_DEBUG_ADDR)
6979 {
6980 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6981 GET_MODE_NAME (mode));
6982 debug_rtx (x);
6983 }
6984
6985 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
6986 if (log)
6987 return legitimize_tls_address (x, log, false);
6988 if (GET_CODE (x) == CONST
6989 && GET_CODE (XEXP (x, 0)) == PLUS
6990 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
6991 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
6992 {
6993 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
6994 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
6995 }
6996
6997 if (flag_pic && SYMBOLIC_CONST (x))
6998 return legitimize_pic_address (x, 0);
6999
7000 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7001 if (GET_CODE (x) == ASHIFT
7002 && GET_CODE (XEXP (x, 1)) == CONST_INT
7003 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7004 {
7005 changed = 1;
7006 log = INTVAL (XEXP (x, 1));
7007 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7008 GEN_INT (1 << log));
7009 }
7010
7011 if (GET_CODE (x) == PLUS)
7012 {
7013 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7014
7015 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7016 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7017 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7018 {
7019 changed = 1;
7020 log = INTVAL (XEXP (XEXP (x, 0), 1));
7021 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7022 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7023 GEN_INT (1 << log));
7024 }
7025
7026 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7027 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
7028 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7029 {
7030 changed = 1;
7031 log = INTVAL (XEXP (XEXP (x, 1), 1));
7032 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7033 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7034 GEN_INT (1 << log));
7035 }
7036
7037 /* Put multiply first if it isn't already. */
7038 if (GET_CODE (XEXP (x, 1)) == MULT)
7039 {
7040 rtx tmp = XEXP (x, 0);
7041 XEXP (x, 0) = XEXP (x, 1);
7042 XEXP (x, 1) = tmp;
7043 changed = 1;
7044 }
7045
7046 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7047 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7048 created by virtual register instantiation, register elimination, and
7049 similar optimizations. */
7050 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7051 {
7052 changed = 1;
7053 x = gen_rtx_PLUS (Pmode,
7054 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7055 XEXP (XEXP (x, 1), 0)),
7056 XEXP (XEXP (x, 1), 1));
7057 }
7058
7059 /* Canonicalize
7060 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7061 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7062 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7063 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7064 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7065 && CONSTANT_P (XEXP (x, 1)))
7066 {
7067 rtx constant;
7068 rtx other = NULL_RTX;
7069
7070 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7071 {
7072 constant = XEXP (x, 1);
7073 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7074 }
7075 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
7076 {
7077 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7078 other = XEXP (x, 1);
7079 }
7080 else
7081 constant = 0;
7082
7083 if (constant)
7084 {
7085 changed = 1;
7086 x = gen_rtx_PLUS (Pmode,
7087 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7088 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7089 plus_constant (other, INTVAL (constant)));
7090 }
7091 }
7092
7093 if (changed && legitimate_address_p (mode, x, FALSE))
7094 return x;
7095
7096 if (GET_CODE (XEXP (x, 0)) == MULT)
7097 {
7098 changed = 1;
7099 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7100 }
7101
7102 if (GET_CODE (XEXP (x, 1)) == MULT)
7103 {
7104 changed = 1;
7105 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7106 }
7107
7108 if (changed
7109 && GET_CODE (XEXP (x, 1)) == REG
7110 && GET_CODE (XEXP (x, 0)) == REG)
7111 return x;
7112
7113 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7114 {
7115 changed = 1;
7116 x = legitimize_pic_address (x, 0);
7117 }
7118
7119 if (changed && legitimate_address_p (mode, x, FALSE))
7120 return x;
7121
7122 if (GET_CODE (XEXP (x, 0)) == REG)
7123 {
7124 rtx temp = gen_reg_rtx (Pmode);
7125 rtx val = force_operand (XEXP (x, 1), temp);
7126 if (val != temp)
7127 emit_move_insn (temp, val);
7128
7129 XEXP (x, 1) = temp;
7130 return x;
7131 }
7132
7133 else if (GET_CODE (XEXP (x, 1)) == REG)
7134 {
7135 rtx temp = gen_reg_rtx (Pmode);
7136 rtx val = force_operand (XEXP (x, 0), temp);
7137 if (val != temp)
7138 emit_move_insn (temp, val);
7139
7140 XEXP (x, 0) = temp;
7141 return x;
7142 }
7143 }
7144
7145 return x;
7146 }
7147 \f
7148 /* Print an integer constant expression in assembler syntax. Addition
7149 and subtraction are the only arithmetic that may appear in these
7150 expressions. FILE is the stdio stream to write to, X is the rtx, and
7151 CODE is the operand print code from the output string. */
7152
7153 static void
7154 output_pic_addr_const (FILE *file, rtx x, int code)
7155 {
7156 char buf[256];
7157
7158 switch (GET_CODE (x))
7159 {
7160 case PC:
7161 gcc_assert (flag_pic);
7162 putc ('.', file);
7163 break;
7164
7165 case SYMBOL_REF:
7166 output_addr_const (file, x);
7167 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7168 fputs ("@PLT", file);
7169 break;
7170
7171 case LABEL_REF:
7172 x = XEXP (x, 0);
7173 /* FALLTHRU */
7174 case CODE_LABEL:
7175 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7176 assemble_name (asm_out_file, buf);
7177 break;
7178
7179 case CONST_INT:
7180 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7181 break;
7182
7183 case CONST:
7184 /* This used to output parentheses around the expression,
7185 but that does not work on the 386 (either ATT or BSD assembler). */
7186 output_pic_addr_const (file, XEXP (x, 0), code);
7187 break;
7188
7189 case CONST_DOUBLE:
7190 if (GET_MODE (x) == VOIDmode)
7191 {
7192 /* We can use %d if the number is <32 bits and positive. */
7193 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7194 fprintf (file, "0x%lx%08lx",
7195 (unsigned long) CONST_DOUBLE_HIGH (x),
7196 (unsigned long) CONST_DOUBLE_LOW (x));
7197 else
7198 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7199 }
7200 else
7201 /* We can't handle floating point constants;
7202 PRINT_OPERAND must handle them. */
7203 output_operand_lossage ("floating constant misused");
7204 break;
7205
7206 case PLUS:
7207 /* Some assemblers need integer constants to appear first. */
7208 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
7209 {
7210 output_pic_addr_const (file, XEXP (x, 0), code);
7211 putc ('+', file);
7212 output_pic_addr_const (file, XEXP (x, 1), code);
7213 }
7214 else
7215 {
7216 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
7217 output_pic_addr_const (file, XEXP (x, 1), code);
7218 putc ('+', file);
7219 output_pic_addr_const (file, XEXP (x, 0), code);
7220 }
7221 break;
7222
7223 case MINUS:
7224 if (!TARGET_MACHO)
7225 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7226 output_pic_addr_const (file, XEXP (x, 0), code);
7227 putc ('-', file);
7228 output_pic_addr_const (file, XEXP (x, 1), code);
7229 if (!TARGET_MACHO)
7230 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7231 break;
7232
7233 case UNSPEC:
7234 gcc_assert (XVECLEN (x, 0) == 1);
7235 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7236 switch (XINT (x, 1))
7237 {
7238 case UNSPEC_GOT:
7239 fputs ("@GOT", file);
7240 break;
7241 case UNSPEC_GOTOFF:
7242 fputs ("@GOTOFF", file);
7243 break;
7244 case UNSPEC_GOTPCREL:
7245 fputs ("@GOTPCREL(%rip)", file);
7246 break;
7247 case UNSPEC_GOTTPOFF:
7248 /* FIXME: This might be @TPOFF in Sun ld too. */
7249 fputs ("@GOTTPOFF", file);
7250 break;
7251 case UNSPEC_TPOFF:
7252 fputs ("@TPOFF", file);
7253 break;
7254 case UNSPEC_NTPOFF:
7255 if (TARGET_64BIT)
7256 fputs ("@TPOFF", file);
7257 else
7258 fputs ("@NTPOFF", file);
7259 break;
7260 case UNSPEC_DTPOFF:
7261 fputs ("@DTPOFF", file);
7262 break;
7263 case UNSPEC_GOTNTPOFF:
7264 if (TARGET_64BIT)
7265 fputs ("@GOTTPOFF(%rip)", file);
7266 else
7267 fputs ("@GOTNTPOFF", file);
7268 break;
7269 case UNSPEC_INDNTPOFF:
7270 fputs ("@INDNTPOFF", file);
7271 break;
7272 default:
7273 output_operand_lossage ("invalid UNSPEC as operand");
7274 break;
7275 }
7276 break;
7277
7278 default:
7279 output_operand_lossage ("invalid expression as operand");
7280 }
7281 }
7282
7283 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7284 We need to emit DTP-relative relocations. */
7285
7286 static void
7287 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7288 {
7289 fputs (ASM_LONG, file);
7290 output_addr_const (file, x);
7291 fputs ("@DTPOFF", file);
7292 switch (size)
7293 {
7294 case 4:
7295 break;
7296 case 8:
7297 fputs (", 0", file);
7298 break;
7299 default:
7300 gcc_unreachable ();
7301 }
7302 }
7303
7304 /* In the name of slightly smaller debug output, and to cater to
7305 general assembler lossage, recognize PIC+GOTOFF and turn it back
7306 into a direct symbol reference.
7307
7308 On Darwin, this is necessary to avoid a crash, because Darwin
7309 has a different PIC label for each routine but the DWARF debugging
7310 information is not associated with any particular routine, so it's
7311 necessary to remove references to the PIC label from RTL stored by
7312 the DWARF output code. */
7313
7314 static rtx
7315 ix86_delegitimize_address (rtx orig_x)
7316 {
7317 rtx x = orig_x;
7318 /* reg_addend is NULL or a multiple of some register. */
7319 rtx reg_addend = NULL_RTX;
7320 /* const_addend is NULL or a const_int. */
7321 rtx const_addend = NULL_RTX;
7322 /* This is the result, or NULL. */
7323 rtx result = NULL_RTX;
7324
7325 if (GET_CODE (x) == MEM)
7326 x = XEXP (x, 0);
7327
7328 if (TARGET_64BIT)
7329 {
7330 if (GET_CODE (x) != CONST
7331 || GET_CODE (XEXP (x, 0)) != UNSPEC
7332 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7333 || GET_CODE (orig_x) != MEM)
7334 return orig_x;
7335 return XVECEXP (XEXP (x, 0), 0, 0);
7336 }
7337
7338 if (GET_CODE (x) != PLUS
7339 || GET_CODE (XEXP (x, 1)) != CONST)
7340 return orig_x;
7341
7342 if (GET_CODE (XEXP (x, 0)) == REG
7343 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7344 /* %ebx + GOT/GOTOFF */
7345 ;
7346 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7347 {
7348 /* %ebx + %reg * scale + GOT/GOTOFF */
7349 reg_addend = XEXP (x, 0);
7350 if (GET_CODE (XEXP (reg_addend, 0)) == REG
7351 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7352 reg_addend = XEXP (reg_addend, 1);
7353 else if (GET_CODE (XEXP (reg_addend, 1)) == REG
7354 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7355 reg_addend = XEXP (reg_addend, 0);
7356 else
7357 return orig_x;
7358 if (GET_CODE (reg_addend) != REG
7359 && GET_CODE (reg_addend) != MULT
7360 && GET_CODE (reg_addend) != ASHIFT)
7361 return orig_x;
7362 }
7363 else
7364 return orig_x;
7365
7366 x = XEXP (XEXP (x, 1), 0);
7367 if (GET_CODE (x) == PLUS
7368 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7369 {
7370 const_addend = XEXP (x, 1);
7371 x = XEXP (x, 0);
7372 }
7373
7374 if (GET_CODE (x) == UNSPEC
7375 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7376 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7377 result = XVECEXP (x, 0, 0);
7378
7379 if (TARGET_MACHO && darwin_local_data_pic (x)
7380 && GET_CODE (orig_x) != MEM)
7381 result = XEXP (x, 0);
7382
7383 if (! result)
7384 return orig_x;
7385
7386 if (const_addend)
7387 result = gen_rtx_PLUS (Pmode, result, const_addend);
7388 if (reg_addend)
7389 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7390 return result;
7391 }
7392 \f
7393 static void
7394 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7395 int fp, FILE *file)
7396 {
7397 const char *suffix;
7398
7399 if (mode == CCFPmode || mode == CCFPUmode)
7400 {
7401 enum rtx_code second_code, bypass_code;
7402 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7403 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7404 code = ix86_fp_compare_code_to_integer (code);
7405 mode = CCmode;
7406 }
7407 if (reverse)
7408 code = reverse_condition (code);
7409
7410 switch (code)
7411 {
7412 case EQ:
7413 suffix = "e";
7414 break;
7415 case NE:
7416 suffix = "ne";
7417 break;
7418 case GT:
7419 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7420 suffix = "g";
7421 break;
7422 case GTU:
7423 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7424 Those same assemblers have the same but opposite lossage on cmov. */
7425 gcc_assert (mode == CCmode);
7426 suffix = fp ? "nbe" : "a";
7427 break;
7428 case LT:
7429 switch (mode)
7430 {
7431 case CCNOmode:
7432 case CCGOCmode:
7433 suffix = "s";
7434 break;
7435
7436 case CCmode:
7437 case CCGCmode:
7438 suffix = "l";
7439 break;
7440
7441 default:
7442 gcc_unreachable ();
7443 }
7444 break;
7445 case LTU:
7446 gcc_assert (mode == CCmode);
7447 suffix = "b";
7448 break;
7449 case GE:
7450 switch (mode)
7451 {
7452 case CCNOmode:
7453 case CCGOCmode:
7454 suffix = "ns";
7455 break;
7456
7457 case CCmode:
7458 case CCGCmode:
7459 suffix = "ge";
7460 break;
7461
7462 default:
7463 gcc_unreachable ();
7464 }
7465 break;
7466 case GEU:
7467 /* ??? As above. */
7468 gcc_assert (mode == CCmode);
7469 suffix = fp ? "nb" : "ae";
7470 break;
7471 case LE:
7472 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7473 suffix = "le";
7474 break;
7475 case LEU:
7476 gcc_assert (mode == CCmode);
7477 suffix = "be";
7478 break;
7479 case UNORDERED:
7480 suffix = fp ? "u" : "p";
7481 break;
7482 case ORDERED:
7483 suffix = fp ? "nu" : "np";
7484 break;
7485 default:
7486 gcc_unreachable ();
7487 }
7488 fputs (suffix, file);
7489 }
7490
7491 /* Print the name of register X to FILE based on its machine mode and number.
7492 If CODE is 'w', pretend the mode is HImode.
7493 If CODE is 'b', pretend the mode is QImode.
7494 If CODE is 'k', pretend the mode is SImode.
7495 If CODE is 'q', pretend the mode is DImode.
7496 If CODE is 'h', pretend the reg is the 'high' byte register.
7497 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7498
7499 void
7500 print_reg (rtx x, int code, FILE *file)
7501 {
7502 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7503 && REGNO (x) != FRAME_POINTER_REGNUM
7504 && REGNO (x) != FLAGS_REG
7505 && REGNO (x) != FPSR_REG
7506 && REGNO (x) != FPCR_REG);
7507
7508 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7509 putc ('%', file);
7510
7511 if (code == 'w' || MMX_REG_P (x))
7512 code = 2;
7513 else if (code == 'b')
7514 code = 1;
7515 else if (code == 'k')
7516 code = 4;
7517 else if (code == 'q')
7518 code = 8;
7519 else if (code == 'y')
7520 code = 3;
7521 else if (code == 'h')
7522 code = 0;
7523 else
7524 code = GET_MODE_SIZE (GET_MODE (x));
7525
7526 /* Irritatingly, AMD extended registers use different naming convention
7527 from the normal registers. */
7528 if (REX_INT_REG_P (x))
7529 {
7530 gcc_assert (TARGET_64BIT);
7531 switch (code)
7532 {
7533 case 0:
7534 error ("extended registers have no high halves");
7535 break;
7536 case 1:
7537 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7538 break;
7539 case 2:
7540 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7541 break;
7542 case 4:
7543 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7544 break;
7545 case 8:
7546 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7547 break;
7548 default:
7549 error ("unsupported operand size for extended register");
7550 break;
7551 }
7552 return;
7553 }
7554 switch (code)
7555 {
7556 case 3:
7557 if (STACK_TOP_P (x))
7558 {
7559 fputs ("st(0)", file);
7560 break;
7561 }
7562 /* FALLTHRU */
7563 case 8:
7564 case 4:
7565 case 12:
7566 if (! ANY_FP_REG_P (x))
7567 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7568 /* FALLTHRU */
7569 case 16:
7570 case 2:
7571 normal:
7572 fputs (hi_reg_name[REGNO (x)], file);
7573 break;
7574 case 1:
7575 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7576 goto normal;
7577 fputs (qi_reg_name[REGNO (x)], file);
7578 break;
7579 case 0:
7580 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7581 goto normal;
7582 fputs (qi_high_reg_name[REGNO (x)], file);
7583 break;
7584 default:
7585 gcc_unreachable ();
7586 }
7587 }
7588
7589 /* Locate some local-dynamic symbol still in use by this function
7590 so that we can print its name in some tls_local_dynamic_base
7591 pattern. */
7592
7593 static const char *
7594 get_some_local_dynamic_name (void)
7595 {
7596 rtx insn;
7597
7598 if (cfun->machine->some_ld_name)
7599 return cfun->machine->some_ld_name;
7600
7601 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7602 if (INSN_P (insn)
7603 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7604 return cfun->machine->some_ld_name;
7605
7606 gcc_unreachable ();
7607 }
7608
7609 static int
7610 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7611 {
7612 rtx x = *px;
7613
7614 if (GET_CODE (x) == SYMBOL_REF
7615 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
7616 {
7617 cfun->machine->some_ld_name = XSTR (x, 0);
7618 return 1;
7619 }
7620
7621 return 0;
7622 }
7623
7624 /* Meaning of CODE:
7625 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7626 C -- print opcode suffix for set/cmov insn.
7627 c -- like C, but print reversed condition
7628 F,f -- likewise, but for floating-point.
7629 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7630 otherwise nothing
7631 R -- print the prefix for register names.
7632 z -- print the opcode suffix for the size of the current operand.
7633 * -- print a star (in certain assembler syntax)
7634 A -- print an absolute memory reference.
7635 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7636 s -- print a shift double count, followed by the assemblers argument
7637 delimiter.
7638 b -- print the QImode name of the register for the indicated operand.
7639 %b0 would print %al if operands[0] is reg 0.
7640 w -- likewise, print the HImode name of the register.
7641 k -- likewise, print the SImode name of the register.
7642 q -- likewise, print the DImode name of the register.
7643 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7644 y -- print "st(0)" instead of "st" as a register.
7645 D -- print condition for SSE cmp instruction.
7646 P -- if PIC, print an @PLT suffix.
7647 X -- don't print any sort of PIC '@' suffix for a symbol.
7648 & -- print some in-use local-dynamic symbol name.
7649 H -- print a memory address offset by 8; used for sse high-parts
7650 */
7651
7652 void
7653 print_operand (FILE *file, rtx x, int code)
7654 {
7655 if (code)
7656 {
7657 switch (code)
7658 {
7659 case '*':
7660 if (ASSEMBLER_DIALECT == ASM_ATT)
7661 putc ('*', file);
7662 return;
7663
7664 case '&':
7665 assemble_name (file, get_some_local_dynamic_name ());
7666 return;
7667
7668 case 'A':
7669 switch (ASSEMBLER_DIALECT)
7670 {
7671 case ASM_ATT:
7672 putc ('*', file);
7673 break;
7674
7675 case ASM_INTEL:
7676 /* Intel syntax. For absolute addresses, registers should not
7677 be surrounded by braces. */
7678 if (GET_CODE (x) != REG)
7679 {
7680 putc ('[', file);
7681 PRINT_OPERAND (file, x, 0);
7682 putc (']', file);
7683 return;
7684 }
7685 break;
7686
7687 default:
7688 gcc_unreachable ();
7689 }
7690
7691 PRINT_OPERAND (file, x, 0);
7692 return;
7693
7694
7695 case 'L':
7696 if (ASSEMBLER_DIALECT == ASM_ATT)
7697 putc ('l', file);
7698 return;
7699
7700 case 'W':
7701 if (ASSEMBLER_DIALECT == ASM_ATT)
7702 putc ('w', file);
7703 return;
7704
7705 case 'B':
7706 if (ASSEMBLER_DIALECT == ASM_ATT)
7707 putc ('b', file);
7708 return;
7709
7710 case 'Q':
7711 if (ASSEMBLER_DIALECT == ASM_ATT)
7712 putc ('l', file);
7713 return;
7714
7715 case 'S':
7716 if (ASSEMBLER_DIALECT == ASM_ATT)
7717 putc ('s', file);
7718 return;
7719
7720 case 'T':
7721 if (ASSEMBLER_DIALECT == ASM_ATT)
7722 putc ('t', file);
7723 return;
7724
7725 case 'z':
7726 /* 387 opcodes don't get size suffixes if the operands are
7727 registers. */
7728 if (STACK_REG_P (x))
7729 return;
7730
7731 /* Likewise if using Intel opcodes. */
7732 if (ASSEMBLER_DIALECT == ASM_INTEL)
7733 return;
7734
7735 /* This is the size of op from size of operand. */
7736 switch (GET_MODE_SIZE (GET_MODE (x)))
7737 {
7738 case 2:
7739 #ifdef HAVE_GAS_FILDS_FISTS
7740 putc ('s', file);
7741 #endif
7742 return;
7743
7744 case 4:
7745 if (GET_MODE (x) == SFmode)
7746 {
7747 putc ('s', file);
7748 return;
7749 }
7750 else
7751 putc ('l', file);
7752 return;
7753
7754 case 12:
7755 case 16:
7756 putc ('t', file);
7757 return;
7758
7759 case 8:
7760 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7761 {
7762 #ifdef GAS_MNEMONICS
7763 putc ('q', file);
7764 #else
7765 putc ('l', file);
7766 putc ('l', file);
7767 #endif
7768 }
7769 else
7770 putc ('l', file);
7771 return;
7772
7773 default:
7774 gcc_unreachable ();
7775 }
7776
7777 case 'b':
7778 case 'w':
7779 case 'k':
7780 case 'q':
7781 case 'h':
7782 case 'y':
7783 case 'X':
7784 case 'P':
7785 break;
7786
7787 case 's':
7788 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7789 {
7790 PRINT_OPERAND (file, x, 0);
7791 putc (',', file);
7792 }
7793 return;
7794
7795 case 'D':
7796 /* Little bit of braindamage here. The SSE compare instructions
7797 does use completely different names for the comparisons that the
7798 fp conditional moves. */
7799 switch (GET_CODE (x))
7800 {
7801 case EQ:
7802 case UNEQ:
7803 fputs ("eq", file);
7804 break;
7805 case LT:
7806 case UNLT:
7807 fputs ("lt", file);
7808 break;
7809 case LE:
7810 case UNLE:
7811 fputs ("le", file);
7812 break;
7813 case UNORDERED:
7814 fputs ("unord", file);
7815 break;
7816 case NE:
7817 case LTGT:
7818 fputs ("neq", file);
7819 break;
7820 case UNGE:
7821 case GE:
7822 fputs ("nlt", file);
7823 break;
7824 case UNGT:
7825 case GT:
7826 fputs ("nle", file);
7827 break;
7828 case ORDERED:
7829 fputs ("ord", file);
7830 break;
7831 default:
7832 gcc_unreachable ();
7833 }
7834 return;
7835 case 'O':
7836 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7837 if (ASSEMBLER_DIALECT == ASM_ATT)
7838 {
7839 switch (GET_MODE (x))
7840 {
7841 case HImode: putc ('w', file); break;
7842 case SImode:
7843 case SFmode: putc ('l', file); break;
7844 case DImode:
7845 case DFmode: putc ('q', file); break;
7846 default: gcc_unreachable ();
7847 }
7848 putc ('.', file);
7849 }
7850 #endif
7851 return;
7852 case 'C':
7853 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7854 return;
7855 case 'F':
7856 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7857 if (ASSEMBLER_DIALECT == ASM_ATT)
7858 putc ('.', file);
7859 #endif
7860 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7861 return;
7862
7863 /* Like above, but reverse condition */
7864 case 'c':
7865 /* Check to see if argument to %c is really a constant
7866 and not a condition code which needs to be reversed. */
7867 if (!COMPARISON_P (x))
7868 {
7869 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7870 return;
7871 }
7872 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7873 return;
7874 case 'f':
7875 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7876 if (ASSEMBLER_DIALECT == ASM_ATT)
7877 putc ('.', file);
7878 #endif
7879 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7880 return;
7881
7882 case 'H':
7883 /* It doesn't actually matter what mode we use here, as we're
7884 only going to use this for printing. */
7885 x = adjust_address_nv (x, DImode, 8);
7886 break;
7887
7888 case '+':
7889 {
7890 rtx x;
7891
7892 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7893 return;
7894
7895 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7896 if (x)
7897 {
7898 int pred_val = INTVAL (XEXP (x, 0));
7899
7900 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7901 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7902 {
7903 int taken = pred_val > REG_BR_PROB_BASE / 2;
7904 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7905
7906 /* Emit hints only in the case default branch prediction
7907 heuristics would fail. */
7908 if (taken != cputaken)
7909 {
7910 /* We use 3e (DS) prefix for taken branches and
7911 2e (CS) prefix for not taken branches. */
7912 if (taken)
7913 fputs ("ds ; ", file);
7914 else
7915 fputs ("cs ; ", file);
7916 }
7917 }
7918 }
7919 return;
7920 }
7921 default:
7922 output_operand_lossage ("invalid operand code '%c'", code);
7923 }
7924 }
7925
7926 if (GET_CODE (x) == REG)
7927 print_reg (x, code, file);
7928
7929 else if (GET_CODE (x) == MEM)
7930 {
7931 /* No `byte ptr' prefix for call instructions. */
7932 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7933 {
7934 const char * size;
7935 switch (GET_MODE_SIZE (GET_MODE (x)))
7936 {
7937 case 1: size = "BYTE"; break;
7938 case 2: size = "WORD"; break;
7939 case 4: size = "DWORD"; break;
7940 case 8: size = "QWORD"; break;
7941 case 12: size = "XWORD"; break;
7942 case 16: size = "XMMWORD"; break;
7943 default:
7944 gcc_unreachable ();
7945 }
7946
7947 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7948 if (code == 'b')
7949 size = "BYTE";
7950 else if (code == 'w')
7951 size = "WORD";
7952 else if (code == 'k')
7953 size = "DWORD";
7954
7955 fputs (size, file);
7956 fputs (" PTR ", file);
7957 }
7958
7959 x = XEXP (x, 0);
7960 /* Avoid (%rip) for call operands. */
7961 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7962 && GET_CODE (x) != CONST_INT)
7963 output_addr_const (file, x);
7964 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7965 output_operand_lossage ("invalid constraints for operand");
7966 else
7967 output_address (x);
7968 }
7969
7970 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7971 {
7972 REAL_VALUE_TYPE r;
7973 long l;
7974
7975 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7976 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7977
7978 if (ASSEMBLER_DIALECT == ASM_ATT)
7979 putc ('$', file);
7980 fprintf (file, "0x%08lx", l);
7981 }
7982
7983 /* These float cases don't actually occur as immediate operands. */
7984 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7985 {
7986 char dstr[30];
7987
7988 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7989 fprintf (file, "%s", dstr);
7990 }
7991
7992 else if (GET_CODE (x) == CONST_DOUBLE
7993 && GET_MODE (x) == XFmode)
7994 {
7995 char dstr[30];
7996
7997 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7998 fprintf (file, "%s", dstr);
7999 }
8000
8001 else
8002 {
8003 /* We have patterns that allow zero sets of memory, for instance.
8004 In 64-bit mode, we should probably support all 8-byte vectors,
8005 since we can in fact encode that into an immediate. */
8006 if (GET_CODE (x) == CONST_VECTOR)
8007 {
8008 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8009 x = const0_rtx;
8010 }
8011
8012 if (code != 'P')
8013 {
8014 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8015 {
8016 if (ASSEMBLER_DIALECT == ASM_ATT)
8017 putc ('$', file);
8018 }
8019 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8020 || GET_CODE (x) == LABEL_REF)
8021 {
8022 if (ASSEMBLER_DIALECT == ASM_ATT)
8023 putc ('$', file);
8024 else
8025 fputs ("OFFSET FLAT:", file);
8026 }
8027 }
8028 if (GET_CODE (x) == CONST_INT)
8029 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8030 else if (flag_pic)
8031 output_pic_addr_const (file, x, code);
8032 else
8033 output_addr_const (file, x);
8034 }
8035 }
8036 \f
8037 /* Print a memory operand whose address is ADDR. */
8038
8039 void
8040 print_operand_address (FILE *file, rtx addr)
8041 {
8042 struct ix86_address parts;
8043 rtx base, index, disp;
8044 int scale;
8045 int ok = ix86_decompose_address (addr, &parts);
8046
8047 gcc_assert (ok);
8048
8049 base = parts.base;
8050 index = parts.index;
8051 disp = parts.disp;
8052 scale = parts.scale;
8053
8054 switch (parts.seg)
8055 {
8056 case SEG_DEFAULT:
8057 break;
8058 case SEG_FS:
8059 case SEG_GS:
8060 if (USER_LABEL_PREFIX[0] == 0)
8061 putc ('%', file);
8062 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8063 break;
8064 default:
8065 gcc_unreachable ();
8066 }
8067
8068 if (!base && !index)
8069 {
8070 /* Displacement only requires special attention. */
8071
8072 if (GET_CODE (disp) == CONST_INT)
8073 {
8074 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8075 {
8076 if (USER_LABEL_PREFIX[0] == 0)
8077 putc ('%', file);
8078 fputs ("ds:", file);
8079 }
8080 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8081 }
8082 else if (flag_pic)
8083 output_pic_addr_const (file, disp, 0);
8084 else
8085 output_addr_const (file, disp);
8086
8087 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8088 if (TARGET_64BIT)
8089 {
8090 if (GET_CODE (disp) == CONST
8091 && GET_CODE (XEXP (disp, 0)) == PLUS
8092 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8093 disp = XEXP (XEXP (disp, 0), 0);
8094 if (GET_CODE (disp) == LABEL_REF
8095 || (GET_CODE (disp) == SYMBOL_REF
8096 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8097 fputs ("(%rip)", file);
8098 }
8099 }
8100 else
8101 {
8102 if (ASSEMBLER_DIALECT == ASM_ATT)
8103 {
8104 if (disp)
8105 {
8106 if (flag_pic)
8107 output_pic_addr_const (file, disp, 0);
8108 else if (GET_CODE (disp) == LABEL_REF)
8109 output_asm_label (disp);
8110 else
8111 output_addr_const (file, disp);
8112 }
8113
8114 putc ('(', file);
8115 if (base)
8116 print_reg (base, 0, file);
8117 if (index)
8118 {
8119 putc (',', file);
8120 print_reg (index, 0, file);
8121 if (scale != 1)
8122 fprintf (file, ",%d", scale);
8123 }
8124 putc (')', file);
8125 }
8126 else
8127 {
8128 rtx offset = NULL_RTX;
8129
8130 if (disp)
8131 {
8132 /* Pull out the offset of a symbol; print any symbol itself. */
8133 if (GET_CODE (disp) == CONST
8134 && GET_CODE (XEXP (disp, 0)) == PLUS
8135 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8136 {
8137 offset = XEXP (XEXP (disp, 0), 1);
8138 disp = gen_rtx_CONST (VOIDmode,
8139 XEXP (XEXP (disp, 0), 0));
8140 }
8141
8142 if (flag_pic)
8143 output_pic_addr_const (file, disp, 0);
8144 else if (GET_CODE (disp) == LABEL_REF)
8145 output_asm_label (disp);
8146 else if (GET_CODE (disp) == CONST_INT)
8147 offset = disp;
8148 else
8149 output_addr_const (file, disp);
8150 }
8151
8152 putc ('[', file);
8153 if (base)
8154 {
8155 print_reg (base, 0, file);
8156 if (offset)
8157 {
8158 if (INTVAL (offset) >= 0)
8159 putc ('+', file);
8160 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8161 }
8162 }
8163 else if (offset)
8164 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8165 else
8166 putc ('0', file);
8167
8168 if (index)
8169 {
8170 putc ('+', file);
8171 print_reg (index, 0, file);
8172 if (scale != 1)
8173 fprintf (file, "*%d", scale);
8174 }
8175 putc (']', file);
8176 }
8177 }
8178 }
8179
8180 bool
8181 output_addr_const_extra (FILE *file, rtx x)
8182 {
8183 rtx op;
8184
8185 if (GET_CODE (x) != UNSPEC)
8186 return false;
8187
8188 op = XVECEXP (x, 0, 0);
8189 switch (XINT (x, 1))
8190 {
8191 case UNSPEC_GOTTPOFF:
8192 output_addr_const (file, op);
8193 /* FIXME: This might be @TPOFF in Sun ld. */
8194 fputs ("@GOTTPOFF", file);
8195 break;
8196 case UNSPEC_TPOFF:
8197 output_addr_const (file, op);
8198 fputs ("@TPOFF", file);
8199 break;
8200 case UNSPEC_NTPOFF:
8201 output_addr_const (file, op);
8202 if (TARGET_64BIT)
8203 fputs ("@TPOFF", file);
8204 else
8205 fputs ("@NTPOFF", file);
8206 break;
8207 case UNSPEC_DTPOFF:
8208 output_addr_const (file, op);
8209 fputs ("@DTPOFF", file);
8210 break;
8211 case UNSPEC_GOTNTPOFF:
8212 output_addr_const (file, op);
8213 if (TARGET_64BIT)
8214 fputs ("@GOTTPOFF(%rip)", file);
8215 else
8216 fputs ("@GOTNTPOFF", file);
8217 break;
8218 case UNSPEC_INDNTPOFF:
8219 output_addr_const (file, op);
8220 fputs ("@INDNTPOFF", file);
8221 break;
8222
8223 default:
8224 return false;
8225 }
8226
8227 return true;
8228 }
8229 \f
8230 /* Split one or more DImode RTL references into pairs of SImode
8231 references. The RTL can be REG, offsettable MEM, integer constant, or
8232 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8233 split and "num" is its length. lo_half and hi_half are output arrays
8234 that parallel "operands". */
8235
8236 void
8237 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8238 {
8239 while (num--)
8240 {
8241 rtx op = operands[num];
8242
8243 /* simplify_subreg refuse to split volatile memory addresses,
8244 but we still have to handle it. */
8245 if (GET_CODE (op) == MEM)
8246 {
8247 lo_half[num] = adjust_address (op, SImode, 0);
8248 hi_half[num] = adjust_address (op, SImode, 4);
8249 }
8250 else
8251 {
8252 lo_half[num] = simplify_gen_subreg (SImode, op,
8253 GET_MODE (op) == VOIDmode
8254 ? DImode : GET_MODE (op), 0);
8255 hi_half[num] = simplify_gen_subreg (SImode, op,
8256 GET_MODE (op) == VOIDmode
8257 ? DImode : GET_MODE (op), 4);
8258 }
8259 }
8260 }
8261 /* Split one or more TImode RTL references into pairs of DImode
8262 references. The RTL can be REG, offsettable MEM, integer constant, or
8263 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8264 split and "num" is its length. lo_half and hi_half are output arrays
8265 that parallel "operands". */
8266
8267 void
8268 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8269 {
8270 while (num--)
8271 {
8272 rtx op = operands[num];
8273
8274 /* simplify_subreg refuse to split volatile memory addresses, but we
8275 still have to handle it. */
8276 if (GET_CODE (op) == MEM)
8277 {
8278 lo_half[num] = adjust_address (op, DImode, 0);
8279 hi_half[num] = adjust_address (op, DImode, 8);
8280 }
8281 else
8282 {
8283 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8284 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8285 }
8286 }
8287 }
8288 \f
8289 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8290 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8291 is the expression of the binary operation. The output may either be
8292 emitted here, or returned to the caller, like all output_* functions.
8293
8294 There is no guarantee that the operands are the same mode, as they
8295 might be within FLOAT or FLOAT_EXTEND expressions. */
8296
8297 #ifndef SYSV386_COMPAT
8298 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8299 wants to fix the assemblers because that causes incompatibility
8300 with gcc. No-one wants to fix gcc because that causes
8301 incompatibility with assemblers... You can use the option of
8302 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8303 #define SYSV386_COMPAT 1
8304 #endif
8305
8306 const char *
8307 output_387_binary_op (rtx insn, rtx *operands)
8308 {
8309 static char buf[30];
8310 const char *p;
8311 const char *ssep;
8312 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8313
8314 #ifdef ENABLE_CHECKING
8315 /* Even if we do not want to check the inputs, this documents input
8316 constraints. Which helps in understanding the following code. */
8317 if (STACK_REG_P (operands[0])
8318 && ((REG_P (operands[1])
8319 && REGNO (operands[0]) == REGNO (operands[1])
8320 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
8321 || (REG_P (operands[2])
8322 && REGNO (operands[0]) == REGNO (operands[2])
8323 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
8324 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8325 ; /* ok */
8326 else
8327 gcc_assert (is_sse);
8328 #endif
8329
8330 switch (GET_CODE (operands[3]))
8331 {
8332 case PLUS:
8333 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8334 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8335 p = "fiadd";
8336 else
8337 p = "fadd";
8338 ssep = "add";
8339 break;
8340
8341 case MINUS:
8342 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8343 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8344 p = "fisub";
8345 else
8346 p = "fsub";
8347 ssep = "sub";
8348 break;
8349
8350 case MULT:
8351 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8352 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8353 p = "fimul";
8354 else
8355 p = "fmul";
8356 ssep = "mul";
8357 break;
8358
8359 case DIV:
8360 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8361 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8362 p = "fidiv";
8363 else
8364 p = "fdiv";
8365 ssep = "div";
8366 break;
8367
8368 default:
8369 gcc_unreachable ();
8370 }
8371
8372 if (is_sse)
8373 {
8374 strcpy (buf, ssep);
8375 if (GET_MODE (operands[0]) == SFmode)
8376 strcat (buf, "ss\t{%2, %0|%0, %2}");
8377 else
8378 strcat (buf, "sd\t{%2, %0|%0, %2}");
8379 return buf;
8380 }
8381 strcpy (buf, p);
8382
8383 switch (GET_CODE (operands[3]))
8384 {
8385 case MULT:
8386 case PLUS:
8387 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8388 {
8389 rtx temp = operands[2];
8390 operands[2] = operands[1];
8391 operands[1] = temp;
8392 }
8393
8394 /* know operands[0] == operands[1]. */
8395
8396 if (GET_CODE (operands[2]) == MEM)
8397 {
8398 p = "%z2\t%2";
8399 break;
8400 }
8401
8402 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8403 {
8404 if (STACK_TOP_P (operands[0]))
8405 /* How is it that we are storing to a dead operand[2]?
8406 Well, presumably operands[1] is dead too. We can't
8407 store the result to st(0) as st(0) gets popped on this
8408 instruction. Instead store to operands[2] (which I
8409 think has to be st(1)). st(1) will be popped later.
8410 gcc <= 2.8.1 didn't have this check and generated
8411 assembly code that the Unixware assembler rejected. */
8412 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8413 else
8414 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8415 break;
8416 }
8417
8418 if (STACK_TOP_P (operands[0]))
8419 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8420 else
8421 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8422 break;
8423
8424 case MINUS:
8425 case DIV:
8426 if (GET_CODE (operands[1]) == MEM)
8427 {
8428 p = "r%z1\t%1";
8429 break;
8430 }
8431
8432 if (GET_CODE (operands[2]) == MEM)
8433 {
8434 p = "%z2\t%2";
8435 break;
8436 }
8437
8438 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8439 {
8440 #if SYSV386_COMPAT
8441 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8442 derived assemblers, confusingly reverse the direction of
8443 the operation for fsub{r} and fdiv{r} when the
8444 destination register is not st(0). The Intel assembler
8445 doesn't have this brain damage. Read !SYSV386_COMPAT to
8446 figure out what the hardware really does. */
8447 if (STACK_TOP_P (operands[0]))
8448 p = "{p\t%0, %2|rp\t%2, %0}";
8449 else
8450 p = "{rp\t%2, %0|p\t%0, %2}";
8451 #else
8452 if (STACK_TOP_P (operands[0]))
8453 /* As above for fmul/fadd, we can't store to st(0). */
8454 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8455 else
8456 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8457 #endif
8458 break;
8459 }
8460
8461 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8462 {
8463 #if SYSV386_COMPAT
8464 if (STACK_TOP_P (operands[0]))
8465 p = "{rp\t%0, %1|p\t%1, %0}";
8466 else
8467 p = "{p\t%1, %0|rp\t%0, %1}";
8468 #else
8469 if (STACK_TOP_P (operands[0]))
8470 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8471 else
8472 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8473 #endif
8474 break;
8475 }
8476
8477 if (STACK_TOP_P (operands[0]))
8478 {
8479 if (STACK_TOP_P (operands[1]))
8480 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8481 else
8482 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8483 break;
8484 }
8485 else if (STACK_TOP_P (operands[1]))
8486 {
8487 #if SYSV386_COMPAT
8488 p = "{\t%1, %0|r\t%0, %1}";
8489 #else
8490 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8491 #endif
8492 }
8493 else
8494 {
8495 #if SYSV386_COMPAT
8496 p = "{r\t%2, %0|\t%0, %2}";
8497 #else
8498 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8499 #endif
8500 }
8501 break;
8502
8503 default:
8504 gcc_unreachable ();
8505 }
8506
8507 strcat (buf, p);
8508 return buf;
8509 }
8510
8511 /* Return needed mode for entity in optimize_mode_switching pass. */
8512
8513 int
8514 ix86_mode_needed (int entity, rtx insn)
8515 {
8516 enum attr_i387_cw mode;
8517
8518 /* The mode UNINITIALIZED is used to store control word after a
8519 function call or ASM pattern. The mode ANY specify that function
8520 has no requirements on the control word and make no changes in the
8521 bits we are interested in. */
8522
8523 if (CALL_P (insn)
8524 || (NONJUMP_INSN_P (insn)
8525 && (asm_noperands (PATTERN (insn)) >= 0
8526 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8527 return I387_CW_UNINITIALIZED;
8528
8529 if (recog_memoized (insn) < 0)
8530 return I387_CW_ANY;
8531
8532 mode = get_attr_i387_cw (insn);
8533
8534 switch (entity)
8535 {
8536 case I387_TRUNC:
8537 if (mode == I387_CW_TRUNC)
8538 return mode;
8539 break;
8540
8541 case I387_FLOOR:
8542 if (mode == I387_CW_FLOOR)
8543 return mode;
8544 break;
8545
8546 case I387_CEIL:
8547 if (mode == I387_CW_CEIL)
8548 return mode;
8549 break;
8550
8551 case I387_MASK_PM:
8552 if (mode == I387_CW_MASK_PM)
8553 return mode;
8554 break;
8555
8556 default:
8557 gcc_unreachable ();
8558 }
8559
8560 return I387_CW_ANY;
8561 }
8562
8563 /* Output code to initialize control word copies used by trunc?f?i and
8564 rounding patterns. CURRENT_MODE is set to current control word,
8565 while NEW_MODE is set to new control word. */
8566
8567 void
8568 emit_i387_cw_initialization (int mode)
8569 {
8570 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8571 rtx new_mode;
8572
8573 int slot;
8574
8575 rtx reg = gen_reg_rtx (HImode);
8576
8577 emit_insn (gen_x86_fnstcw_1 (stored_mode));
8578 emit_move_insn (reg, stored_mode);
8579
8580 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
8581 {
8582 switch (mode)
8583 {
8584 case I387_CW_TRUNC:
8585 /* round toward zero (truncate) */
8586 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8587 slot = SLOT_CW_TRUNC;
8588 break;
8589
8590 case I387_CW_FLOOR:
8591 /* round down toward -oo */
8592 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8593 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8594 slot = SLOT_CW_FLOOR;
8595 break;
8596
8597 case I387_CW_CEIL:
8598 /* round up toward +oo */
8599 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8600 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
8601 slot = SLOT_CW_CEIL;
8602 break;
8603
8604 case I387_CW_MASK_PM:
8605 /* mask precision exception for nearbyint() */
8606 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8607 slot = SLOT_CW_MASK_PM;
8608 break;
8609
8610 default:
8611 gcc_unreachable ();
8612 }
8613 }
8614 else
8615 {
8616 switch (mode)
8617 {
8618 case I387_CW_TRUNC:
8619 /* round toward zero (truncate) */
8620 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8621 slot = SLOT_CW_TRUNC;
8622 break;
8623
8624 case I387_CW_FLOOR:
8625 /* round down toward -oo */
8626 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
8627 slot = SLOT_CW_FLOOR;
8628 break;
8629
8630 case I387_CW_CEIL:
8631 /* round up toward +oo */
8632 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
8633 slot = SLOT_CW_CEIL;
8634 break;
8635
8636 case I387_CW_MASK_PM:
8637 /* mask precision exception for nearbyint() */
8638 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8639 slot = SLOT_CW_MASK_PM;
8640 break;
8641
8642 default:
8643 gcc_unreachable ();
8644 }
8645 }
8646
8647 gcc_assert (slot < MAX_386_STACK_LOCALS);
8648
8649 new_mode = assign_386_stack_local (HImode, slot);
8650 emit_move_insn (new_mode, reg);
8651 }
8652
8653 /* Output code for INSN to convert a float to a signed int. OPERANDS
8654 are the insn operands. The output may be [HSD]Imode and the input
8655 operand may be [SDX]Fmode. */
8656
8657 const char *
8658 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
8659 {
8660 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8661 int dimode_p = GET_MODE (operands[0]) == DImode;
8662 int round_mode = get_attr_i387_cw (insn);
8663
8664 /* Jump through a hoop or two for DImode, since the hardware has no
8665 non-popping instruction. We used to do this a different way, but
8666 that was somewhat fragile and broke with post-reload splitters. */
8667 if ((dimode_p || fisttp) && !stack_top_dies)
8668 output_asm_insn ("fld\t%y1", operands);
8669
8670 gcc_assert (STACK_TOP_P (operands[1]));
8671 gcc_assert (GET_CODE (operands[0]) == MEM);
8672
8673 if (fisttp)
8674 output_asm_insn ("fisttp%z0\t%0", operands);
8675 else
8676 {
8677 if (round_mode != I387_CW_ANY)
8678 output_asm_insn ("fldcw\t%3", operands);
8679 if (stack_top_dies || dimode_p)
8680 output_asm_insn ("fistp%z0\t%0", operands);
8681 else
8682 output_asm_insn ("fist%z0\t%0", operands);
8683 if (round_mode != I387_CW_ANY)
8684 output_asm_insn ("fldcw\t%2", operands);
8685 }
8686
8687 return "";
8688 }
8689
8690 /* Output code for x87 ffreep insn. The OPNO argument, which may only
8691 have the values zero or one, indicates the ffreep insn's operand
8692 from the OPERANDS array. */
8693
8694 static const char *
8695 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
8696 {
8697 if (TARGET_USE_FFREEP)
8698 #if HAVE_AS_IX86_FFREEP
8699 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
8700 #else
8701 {
8702 static char retval[] = ".word\t0xc_df";
8703 int regno = REGNO (operands[opno]);
8704
8705 gcc_assert (FP_REGNO_P (regno));
8706
8707 retval[9] = '0' + (regno - FIRST_STACK_REG);
8708 return retval;
8709 }
8710 #endif
8711
8712 return opno ? "fstp\t%y1" : "fstp\t%y0";
8713 }
8714
8715
8716 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8717 should be used. UNORDERED_P is true when fucom should be used. */
8718
8719 const char *
8720 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8721 {
8722 int stack_top_dies;
8723 rtx cmp_op0, cmp_op1;
8724 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
8725
8726 if (eflags_p)
8727 {
8728 cmp_op0 = operands[0];
8729 cmp_op1 = operands[1];
8730 }
8731 else
8732 {
8733 cmp_op0 = operands[1];
8734 cmp_op1 = operands[2];
8735 }
8736
8737 if (is_sse)
8738 {
8739 if (GET_MODE (operands[0]) == SFmode)
8740 if (unordered_p)
8741 return "ucomiss\t{%1, %0|%0, %1}";
8742 else
8743 return "comiss\t{%1, %0|%0, %1}";
8744 else
8745 if (unordered_p)
8746 return "ucomisd\t{%1, %0|%0, %1}";
8747 else
8748 return "comisd\t{%1, %0|%0, %1}";
8749 }
8750
8751 gcc_assert (STACK_TOP_P (cmp_op0));
8752
8753 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8754
8755 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
8756 {
8757 if (stack_top_dies)
8758 {
8759 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
8760 return output_387_ffreep (operands, 1);
8761 }
8762 else
8763 return "ftst\n\tfnstsw\t%0";
8764 }
8765
8766 if (STACK_REG_P (cmp_op1)
8767 && stack_top_dies
8768 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8769 && REGNO (cmp_op1) != FIRST_STACK_REG)
8770 {
8771 /* If both the top of the 387 stack dies, and the other operand
8772 is also a stack register that dies, then this must be a
8773 `fcompp' float compare */
8774
8775 if (eflags_p)
8776 {
8777 /* There is no double popping fcomi variant. Fortunately,
8778 eflags is immune from the fstp's cc clobbering. */
8779 if (unordered_p)
8780 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8781 else
8782 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8783 return output_387_ffreep (operands, 0);
8784 }
8785 else
8786 {
8787 if (unordered_p)
8788 return "fucompp\n\tfnstsw\t%0";
8789 else
8790 return "fcompp\n\tfnstsw\t%0";
8791 }
8792 }
8793 else
8794 {
8795 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8796
8797 static const char * const alt[16] =
8798 {
8799 "fcom%z2\t%y2\n\tfnstsw\t%0",
8800 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8801 "fucom%z2\t%y2\n\tfnstsw\t%0",
8802 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8803
8804 "ficom%z2\t%y2\n\tfnstsw\t%0",
8805 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8806 NULL,
8807 NULL,
8808
8809 "fcomi\t{%y1, %0|%0, %y1}",
8810 "fcomip\t{%y1, %0|%0, %y1}",
8811 "fucomi\t{%y1, %0|%0, %y1}",
8812 "fucomip\t{%y1, %0|%0, %y1}",
8813
8814 NULL,
8815 NULL,
8816 NULL,
8817 NULL
8818 };
8819
8820 int mask;
8821 const char *ret;
8822
8823 mask = eflags_p << 3;
8824 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
8825 mask |= unordered_p << 1;
8826 mask |= stack_top_dies;
8827
8828 gcc_assert (mask < 16);
8829 ret = alt[mask];
8830 gcc_assert (ret);
8831
8832 return ret;
8833 }
8834 }
8835
8836 void
8837 ix86_output_addr_vec_elt (FILE *file, int value)
8838 {
8839 const char *directive = ASM_LONG;
8840
8841 #ifdef ASM_QUAD
8842 if (TARGET_64BIT)
8843 directive = ASM_QUAD;
8844 #else
8845 gcc_assert (!TARGET_64BIT);
8846 #endif
8847
8848 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8849 }
8850
8851 void
8852 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8853 {
8854 if (TARGET_64BIT)
8855 fprintf (file, "%s%s%d-%s%d\n",
8856 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8857 else if (HAVE_AS_GOTOFF_IN_DATA)
8858 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8859 #if TARGET_MACHO
8860 else if (TARGET_MACHO)
8861 {
8862 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8863 machopic_output_function_base_name (file);
8864 fprintf(file, "\n");
8865 }
8866 #endif
8867 else
8868 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8869 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8870 }
8871 \f
8872 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8873 for the target. */
8874
8875 void
8876 ix86_expand_clear (rtx dest)
8877 {
8878 rtx tmp;
8879
8880 /* We play register width games, which are only valid after reload. */
8881 gcc_assert (reload_completed);
8882
8883 /* Avoid HImode and its attendant prefix byte. */
8884 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8885 dest = gen_rtx_REG (SImode, REGNO (dest));
8886
8887 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8888
8889 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8890 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8891 {
8892 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8893 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8894 }
8895
8896 emit_insn (tmp);
8897 }
8898
8899 /* X is an unchanging MEM. If it is a constant pool reference, return
8900 the constant pool rtx, else NULL. */
8901
8902 rtx
8903 maybe_get_pool_constant (rtx x)
8904 {
8905 x = ix86_delegitimize_address (XEXP (x, 0));
8906
8907 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8908 return get_pool_constant (x);
8909
8910 return NULL_RTX;
8911 }
8912
8913 void
8914 ix86_expand_move (enum machine_mode mode, rtx operands[])
8915 {
8916 int strict = (reload_in_progress || reload_completed);
8917 rtx op0, op1;
8918 enum tls_model model;
8919
8920 op0 = operands[0];
8921 op1 = operands[1];
8922
8923 if (GET_CODE (op1) == SYMBOL_REF)
8924 {
8925 model = SYMBOL_REF_TLS_MODEL (op1);
8926 if (model)
8927 {
8928 op1 = legitimize_tls_address (op1, model, true);
8929 op1 = force_operand (op1, op0);
8930 if (op1 == op0)
8931 return;
8932 }
8933 }
8934 else if (GET_CODE (op1) == CONST
8935 && GET_CODE (XEXP (op1, 0)) == PLUS
8936 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
8937 {
8938 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
8939 if (model)
8940 {
8941 rtx addend = XEXP (XEXP (op1, 0), 1);
8942 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
8943 op1 = force_operand (op1, NULL);
8944 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
8945 op0, 1, OPTAB_DIRECT);
8946 if (op1 == op0)
8947 return;
8948 }
8949 }
8950
8951 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8952 {
8953 if (TARGET_MACHO && !TARGET_64BIT)
8954 {
8955 #if TARGET_MACHO
8956 if (MACHOPIC_PURE)
8957 {
8958 rtx temp = ((reload_in_progress
8959 || ((op0 && GET_CODE (op0) == REG)
8960 && mode == Pmode))
8961 ? op0 : gen_reg_rtx (Pmode));
8962 op1 = machopic_indirect_data_reference (op1, temp);
8963 op1 = machopic_legitimize_pic_address (op1, mode,
8964 temp == op1 ? 0 : temp);
8965 }
8966 else if (MACHOPIC_INDIRECT)
8967 op1 = machopic_indirect_data_reference (op1, 0);
8968 if (op0 == op1)
8969 return;
8970 #endif
8971 }
8972 else
8973 {
8974 if (GET_CODE (op0) == MEM)
8975 op1 = force_reg (Pmode, op1);
8976 else
8977 op1 = legitimize_address (op1, op1, Pmode);
8978 }
8979 }
8980 else
8981 {
8982 if (GET_CODE (op0) == MEM
8983 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8984 || !push_operand (op0, mode))
8985 && GET_CODE (op1) == MEM)
8986 op1 = force_reg (mode, op1);
8987
8988 if (push_operand (op0, mode)
8989 && ! general_no_elim_operand (op1, mode))
8990 op1 = copy_to_mode_reg (mode, op1);
8991
8992 /* Force large constants in 64bit compilation into register
8993 to get them CSEed. */
8994 if (TARGET_64BIT && mode == DImode
8995 && immediate_operand (op1, mode)
8996 && !x86_64_zext_immediate_operand (op1, VOIDmode)
8997 && !register_operand (op0, mode)
8998 && optimize && !reload_completed && !reload_in_progress)
8999 op1 = copy_to_mode_reg (mode, op1);
9000
9001 if (FLOAT_MODE_P (mode))
9002 {
9003 /* If we are loading a floating point constant to a register,
9004 force the value to memory now, since we'll get better code
9005 out the back end. */
9006
9007 if (strict)
9008 ;
9009 else if (GET_CODE (op1) == CONST_DOUBLE)
9010 {
9011 op1 = validize_mem (force_const_mem (mode, op1));
9012 if (!register_operand (op0, mode))
9013 {
9014 rtx temp = gen_reg_rtx (mode);
9015 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9016 emit_move_insn (op0, temp);
9017 return;
9018 }
9019 }
9020 }
9021 }
9022
9023 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9024 }
9025
9026 void
9027 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9028 {
9029 rtx op0 = operands[0], op1 = operands[1];
9030
9031 /* Force constants other than zero into memory. We do not know how
9032 the instructions used to build constants modify the upper 64 bits
9033 of the register, once we have that information we may be able
9034 to handle some of them more efficiently. */
9035 if ((reload_in_progress | reload_completed) == 0
9036 && register_operand (op0, mode)
9037 && CONSTANT_P (op1)
9038 && standard_sse_constant_p (op1) <= 0)
9039 op1 = validize_mem (force_const_mem (mode, op1));
9040
9041 /* Make operand1 a register if it isn't already. */
9042 if (!no_new_pseudos
9043 && !register_operand (op0, mode)
9044 && !register_operand (op1, mode))
9045 {
9046 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9047 return;
9048 }
9049
9050 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9051 }
9052
9053 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9054 straight to ix86_expand_vector_move. */
9055
9056 void
9057 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9058 {
9059 rtx op0, op1, m;
9060
9061 op0 = operands[0];
9062 op1 = operands[1];
9063
9064 if (MEM_P (op1))
9065 {
9066 /* If we're optimizing for size, movups is the smallest. */
9067 if (optimize_size)
9068 {
9069 op0 = gen_lowpart (V4SFmode, op0);
9070 op1 = gen_lowpart (V4SFmode, op1);
9071 emit_insn (gen_sse_movups (op0, op1));
9072 return;
9073 }
9074
9075 /* ??? If we have typed data, then it would appear that using
9076 movdqu is the only way to get unaligned data loaded with
9077 integer type. */
9078 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9079 {
9080 op0 = gen_lowpart (V16QImode, op0);
9081 op1 = gen_lowpart (V16QImode, op1);
9082 emit_insn (gen_sse2_movdqu (op0, op1));
9083 return;
9084 }
9085
9086 if (TARGET_SSE2 && mode == V2DFmode)
9087 {
9088 rtx zero;
9089
9090 /* When SSE registers are split into halves, we can avoid
9091 writing to the top half twice. */
9092 if (TARGET_SSE_SPLIT_REGS)
9093 {
9094 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9095 zero = op0;
9096 }
9097 else
9098 {
9099 /* ??? Not sure about the best option for the Intel chips.
9100 The following would seem to satisfy; the register is
9101 entirely cleared, breaking the dependency chain. We
9102 then store to the upper half, with a dependency depth
9103 of one. A rumor has it that Intel recommends two movsd
9104 followed by an unpacklpd, but this is unconfirmed. And
9105 given that the dependency depth of the unpacklpd would
9106 still be one, I'm not sure why this would be better. */
9107 zero = CONST0_RTX (V2DFmode);
9108 }
9109
9110 m = adjust_address (op1, DFmode, 0);
9111 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9112 m = adjust_address (op1, DFmode, 8);
9113 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9114 }
9115 else
9116 {
9117 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9118 emit_move_insn (op0, CONST0_RTX (mode));
9119 else
9120 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9121
9122 if (mode != V4SFmode)
9123 op0 = gen_lowpart (V4SFmode, op0);
9124 m = adjust_address (op1, V2SFmode, 0);
9125 emit_insn (gen_sse_loadlps (op0, op0, m));
9126 m = adjust_address (op1, V2SFmode, 8);
9127 emit_insn (gen_sse_loadhps (op0, op0, m));
9128 }
9129 }
9130 else if (MEM_P (op0))
9131 {
9132 /* If we're optimizing for size, movups is the smallest. */
9133 if (optimize_size)
9134 {
9135 op0 = gen_lowpart (V4SFmode, op0);
9136 op1 = gen_lowpart (V4SFmode, op1);
9137 emit_insn (gen_sse_movups (op0, op1));
9138 return;
9139 }
9140
9141 /* ??? Similar to above, only less clear because of quote
9142 typeless stores unquote. */
9143 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9144 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9145 {
9146 op0 = gen_lowpart (V16QImode, op0);
9147 op1 = gen_lowpart (V16QImode, op1);
9148 emit_insn (gen_sse2_movdqu (op0, op1));
9149 return;
9150 }
9151
9152 if (TARGET_SSE2 && mode == V2DFmode)
9153 {
9154 m = adjust_address (op0, DFmode, 0);
9155 emit_insn (gen_sse2_storelpd (m, op1));
9156 m = adjust_address (op0, DFmode, 8);
9157 emit_insn (gen_sse2_storehpd (m, op1));
9158 }
9159 else
9160 {
9161 if (mode != V4SFmode)
9162 op1 = gen_lowpart (V4SFmode, op1);
9163 m = adjust_address (op0, V2SFmode, 0);
9164 emit_insn (gen_sse_storelps (m, op1));
9165 m = adjust_address (op0, V2SFmode, 8);
9166 emit_insn (gen_sse_storehps (m, op1));
9167 }
9168 }
9169 else
9170 gcc_unreachable ();
9171 }
9172
9173 /* Expand a push in MODE. This is some mode for which we do not support
9174 proper push instructions, at least from the registers that we expect
9175 the value to live in. */
9176
9177 void
9178 ix86_expand_push (enum machine_mode mode, rtx x)
9179 {
9180 rtx tmp;
9181
9182 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9183 GEN_INT (-GET_MODE_SIZE (mode)),
9184 stack_pointer_rtx, 1, OPTAB_DIRECT);
9185 if (tmp != stack_pointer_rtx)
9186 emit_move_insn (stack_pointer_rtx, tmp);
9187
9188 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9189 emit_move_insn (tmp, x);
9190 }
9191
9192 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9193 destination to use for the operation. If different from the true
9194 destination in operands[0], a copy operation will be required. */
9195
9196 rtx
9197 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9198 rtx operands[])
9199 {
9200 int matching_memory;
9201 rtx src1, src2, dst;
9202
9203 dst = operands[0];
9204 src1 = operands[1];
9205 src2 = operands[2];
9206
9207 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
9208 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9209 && (rtx_equal_p (dst, src2)
9210 || immediate_operand (src1, mode)))
9211 {
9212 rtx temp = src1;
9213 src1 = src2;
9214 src2 = temp;
9215 }
9216
9217 /* If the destination is memory, and we do not have matching source
9218 operands, do things in registers. */
9219 matching_memory = 0;
9220 if (GET_CODE (dst) == MEM)
9221 {
9222 if (rtx_equal_p (dst, src1))
9223 matching_memory = 1;
9224 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9225 && rtx_equal_p (dst, src2))
9226 matching_memory = 2;
9227 else
9228 dst = gen_reg_rtx (mode);
9229 }
9230
9231 /* Both source operands cannot be in memory. */
9232 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
9233 {
9234 if (matching_memory != 2)
9235 src2 = force_reg (mode, src2);
9236 else
9237 src1 = force_reg (mode, src1);
9238 }
9239
9240 /* If the operation is not commutable, source 1 cannot be a constant
9241 or non-matching memory. */
9242 if ((CONSTANT_P (src1)
9243 || (!matching_memory && GET_CODE (src1) == MEM))
9244 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9245 src1 = force_reg (mode, src1);
9246
9247 src1 = operands[1] = src1;
9248 src2 = operands[2] = src2;
9249 return dst;
9250 }
9251
9252 /* Similarly, but assume that the destination has already been
9253 set up properly. */
9254
9255 void
9256 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9257 enum machine_mode mode, rtx operands[])
9258 {
9259 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9260 gcc_assert (dst == operands[0]);
9261 }
9262
9263 /* Attempt to expand a binary operator. Make the expansion closer to the
9264 actual machine, then just general_operand, which will allow 3 separate
9265 memory references (one output, two input) in a single insn. */
9266
9267 void
9268 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9269 rtx operands[])
9270 {
9271 rtx src1, src2, dst, op, clob;
9272
9273 dst = ix86_fixup_binary_operands (code, mode, operands);
9274 src1 = operands[1];
9275 src2 = operands[2];
9276
9277 /* Emit the instruction. */
9278
9279 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9280 if (reload_in_progress)
9281 {
9282 /* Reload doesn't know about the flags register, and doesn't know that
9283 it doesn't want to clobber it. We can only do this with PLUS. */
9284 gcc_assert (code == PLUS);
9285 emit_insn (op);
9286 }
9287 else
9288 {
9289 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9290 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9291 }
9292
9293 /* Fix up the destination if needed. */
9294 if (dst != operands[0])
9295 emit_move_insn (operands[0], dst);
9296 }
9297
9298 /* Return TRUE or FALSE depending on whether the binary operator meets the
9299 appropriate constraints. */
9300
9301 int
9302 ix86_binary_operator_ok (enum rtx_code code,
9303 enum machine_mode mode ATTRIBUTE_UNUSED,
9304 rtx operands[3])
9305 {
9306 /* Both source operands cannot be in memory. */
9307 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
9308 return 0;
9309 /* If the operation is not commutable, source 1 cannot be a constant. */
9310 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9311 return 0;
9312 /* If the destination is memory, we must have a matching source operand. */
9313 if (GET_CODE (operands[0]) == MEM
9314 && ! (rtx_equal_p (operands[0], operands[1])
9315 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9316 && rtx_equal_p (operands[0], operands[2]))))
9317 return 0;
9318 /* If the operation is not commutable and the source 1 is memory, we must
9319 have a matching destination. */
9320 if (GET_CODE (operands[1]) == MEM
9321 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
9322 && ! rtx_equal_p (operands[0], operands[1]))
9323 return 0;
9324 return 1;
9325 }
9326
9327 /* Attempt to expand a unary operator. Make the expansion closer to the
9328 actual machine, then just general_operand, which will allow 2 separate
9329 memory references (one output, one input) in a single insn. */
9330
9331 void
9332 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9333 rtx operands[])
9334 {
9335 int matching_memory;
9336 rtx src, dst, op, clob;
9337
9338 dst = operands[0];
9339 src = operands[1];
9340
9341 /* If the destination is memory, and we do not have matching source
9342 operands, do things in registers. */
9343 matching_memory = 0;
9344 if (MEM_P (dst))
9345 {
9346 if (rtx_equal_p (dst, src))
9347 matching_memory = 1;
9348 else
9349 dst = gen_reg_rtx (mode);
9350 }
9351
9352 /* When source operand is memory, destination must match. */
9353 if (MEM_P (src) && !matching_memory)
9354 src = force_reg (mode, src);
9355
9356 /* Emit the instruction. */
9357
9358 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9359 if (reload_in_progress || code == NOT)
9360 {
9361 /* Reload doesn't know about the flags register, and doesn't know that
9362 it doesn't want to clobber it. */
9363 gcc_assert (code == NOT);
9364 emit_insn (op);
9365 }
9366 else
9367 {
9368 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9369 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9370 }
9371
9372 /* Fix up the destination if needed. */
9373 if (dst != operands[0])
9374 emit_move_insn (operands[0], dst);
9375 }
9376
9377 /* Return TRUE or FALSE depending on whether the unary operator meets the
9378 appropriate constraints. */
9379
9380 int
9381 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9382 enum machine_mode mode ATTRIBUTE_UNUSED,
9383 rtx operands[2] ATTRIBUTE_UNUSED)
9384 {
9385 /* If one of operands is memory, source and destination must match. */
9386 if ((GET_CODE (operands[0]) == MEM
9387 || GET_CODE (operands[1]) == MEM)
9388 && ! rtx_equal_p (operands[0], operands[1]))
9389 return FALSE;
9390 return TRUE;
9391 }
9392
9393 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9394 Create a mask for the sign bit in MODE for an SSE register. If VECT is
9395 true, then replicate the mask for all elements of the vector register.
9396 If INVERT is true, then create a mask excluding the sign bit. */
9397
9398 rtx
9399 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
9400 {
9401 enum machine_mode vec_mode;
9402 HOST_WIDE_INT hi, lo;
9403 int shift = 63;
9404 rtvec v;
9405 rtx mask;
9406
9407 /* Find the sign bit, sign extended to 2*HWI. */
9408 if (mode == SFmode)
9409 lo = 0x80000000, hi = lo < 0;
9410 else if (HOST_BITS_PER_WIDE_INT >= 64)
9411 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
9412 else
9413 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
9414
9415 if (invert)
9416 lo = ~lo, hi = ~hi;
9417
9418 /* Force this value into the low part of a fp vector constant. */
9419 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
9420 mask = gen_lowpart (mode, mask);
9421
9422 if (mode == SFmode)
9423 {
9424 if (vect)
9425 v = gen_rtvec (4, mask, mask, mask, mask);
9426 else
9427 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
9428 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9429 vec_mode = V4SFmode;
9430 }
9431 else
9432 {
9433 if (vect)
9434 v = gen_rtvec (2, mask, mask);
9435 else
9436 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
9437 vec_mode = V2DFmode;
9438 }
9439
9440 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
9441 }
9442
9443 /* Generate code for floating point ABS or NEG. */
9444
9445 void
9446 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
9447 rtx operands[])
9448 {
9449 rtx mask, set, use, clob, dst, src;
9450 bool matching_memory;
9451 bool use_sse = false;
9452 bool vector_mode = VECTOR_MODE_P (mode);
9453 enum machine_mode elt_mode = mode;
9454
9455 if (vector_mode)
9456 {
9457 elt_mode = GET_MODE_INNER (mode);
9458 use_sse = true;
9459 }
9460 else if (TARGET_SSE_MATH)
9461 use_sse = SSE_FLOAT_MODE_P (mode);
9462
9463 /* NEG and ABS performed with SSE use bitwise mask operations.
9464 Create the appropriate mask now. */
9465 if (use_sse)
9466 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
9467 else
9468 mask = NULL_RTX;
9469
9470 dst = operands[0];
9471 src = operands[1];
9472
9473 /* If the destination is memory, and we don't have matching source
9474 operands or we're using the x87, do things in registers. */
9475 matching_memory = false;
9476 if (MEM_P (dst))
9477 {
9478 if (use_sse && rtx_equal_p (dst, src))
9479 matching_memory = true;
9480 else
9481 dst = gen_reg_rtx (mode);
9482 }
9483 if (MEM_P (src) && !matching_memory)
9484 src = force_reg (mode, src);
9485
9486 if (vector_mode)
9487 {
9488 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
9489 set = gen_rtx_SET (VOIDmode, dst, set);
9490 emit_insn (set);
9491 }
9492 else
9493 {
9494 set = gen_rtx_fmt_e (code, mode, src);
9495 set = gen_rtx_SET (VOIDmode, dst, set);
9496 if (mask)
9497 {
9498 use = gen_rtx_USE (VOIDmode, mask);
9499 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9500 emit_insn (gen_rtx_PARALLEL (VOIDmode,
9501 gen_rtvec (3, set, use, clob)));
9502 }
9503 else
9504 emit_insn (set);
9505 }
9506
9507 if (dst != operands[0])
9508 emit_move_insn (operands[0], dst);
9509 }
9510
9511 /* Expand a copysign operation. Special case operand 0 being a constant. */
9512
9513 void
9514 ix86_expand_copysign (rtx operands[])
9515 {
9516 enum machine_mode mode, vmode;
9517 rtx dest, op0, op1, mask, nmask;
9518
9519 dest = operands[0];
9520 op0 = operands[1];
9521 op1 = operands[2];
9522
9523 mode = GET_MODE (dest);
9524 vmode = mode == SFmode ? V4SFmode : V2DFmode;
9525
9526 if (GET_CODE (op0) == CONST_DOUBLE)
9527 {
9528 rtvec v;
9529
9530 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
9531 op0 = simplify_unary_operation (ABS, mode, op0, mode);
9532
9533 if (op0 == CONST0_RTX (mode))
9534 op0 = CONST0_RTX (vmode);
9535 else
9536 {
9537 if (mode == SFmode)
9538 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
9539 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9540 else
9541 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
9542 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
9543 }
9544
9545 mask = ix86_build_signbit_mask (mode, 0, 0);
9546
9547 if (mode == SFmode)
9548 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
9549 else
9550 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
9551 }
9552 else
9553 {
9554 nmask = ix86_build_signbit_mask (mode, 0, 1);
9555 mask = ix86_build_signbit_mask (mode, 0, 0);
9556
9557 if (mode == SFmode)
9558 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
9559 else
9560 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
9561 }
9562 }
9563
9564 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
9565 be a constant, and so has already been expanded into a vector constant. */
9566
9567 void
9568 ix86_split_copysign_const (rtx operands[])
9569 {
9570 enum machine_mode mode, vmode;
9571 rtx dest, op0, op1, mask, x;
9572
9573 dest = operands[0];
9574 op0 = operands[1];
9575 op1 = operands[2];
9576 mask = operands[3];
9577
9578 mode = GET_MODE (dest);
9579 vmode = GET_MODE (mask);
9580
9581 dest = simplify_gen_subreg (vmode, dest, mode, 0);
9582 x = gen_rtx_AND (vmode, dest, mask);
9583 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9584
9585 if (op0 != CONST0_RTX (vmode))
9586 {
9587 x = gen_rtx_IOR (vmode, dest, op0);
9588 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9589 }
9590 }
9591
9592 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
9593 so we have to do two masks. */
9594
9595 void
9596 ix86_split_copysign_var (rtx operands[])
9597 {
9598 enum machine_mode mode, vmode;
9599 rtx dest, scratch, op0, op1, mask, nmask, x;
9600
9601 dest = operands[0];
9602 scratch = operands[1];
9603 op0 = operands[2];
9604 op1 = operands[3];
9605 nmask = operands[4];
9606 mask = operands[5];
9607
9608 mode = GET_MODE (dest);
9609 vmode = GET_MODE (mask);
9610
9611 if (rtx_equal_p (op0, op1))
9612 {
9613 /* Shouldn't happen often (it's useless, obviously), but when it does
9614 we'd generate incorrect code if we continue below. */
9615 emit_move_insn (dest, op0);
9616 return;
9617 }
9618
9619 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
9620 {
9621 gcc_assert (REGNO (op1) == REGNO (scratch));
9622
9623 x = gen_rtx_AND (vmode, scratch, mask);
9624 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9625
9626 dest = mask;
9627 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9628 x = gen_rtx_NOT (vmode, dest);
9629 x = gen_rtx_AND (vmode, x, op0);
9630 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9631 }
9632 else
9633 {
9634 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
9635 {
9636 x = gen_rtx_AND (vmode, scratch, mask);
9637 }
9638 else /* alternative 2,4 */
9639 {
9640 gcc_assert (REGNO (mask) == REGNO (scratch));
9641 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
9642 x = gen_rtx_AND (vmode, scratch, op1);
9643 }
9644 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9645
9646 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
9647 {
9648 dest = simplify_gen_subreg (vmode, op0, mode, 0);
9649 x = gen_rtx_AND (vmode, dest, nmask);
9650 }
9651 else /* alternative 3,4 */
9652 {
9653 gcc_assert (REGNO (nmask) == REGNO (dest));
9654 dest = nmask;
9655 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9656 x = gen_rtx_AND (vmode, dest, op0);
9657 }
9658 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9659 }
9660
9661 x = gen_rtx_IOR (vmode, dest, scratch);
9662 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9663 }
9664
9665 /* Return TRUE or FALSE depending on whether the first SET in INSN
9666 has source and destination with matching CC modes, and that the
9667 CC mode is at least as constrained as REQ_MODE. */
9668
9669 int
9670 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
9671 {
9672 rtx set;
9673 enum machine_mode set_mode;
9674
9675 set = PATTERN (insn);
9676 if (GET_CODE (set) == PARALLEL)
9677 set = XVECEXP (set, 0, 0);
9678 gcc_assert (GET_CODE (set) == SET);
9679 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
9680
9681 set_mode = GET_MODE (SET_DEST (set));
9682 switch (set_mode)
9683 {
9684 case CCNOmode:
9685 if (req_mode != CCNOmode
9686 && (req_mode != CCmode
9687 || XEXP (SET_SRC (set), 1) != const0_rtx))
9688 return 0;
9689 break;
9690 case CCmode:
9691 if (req_mode == CCGCmode)
9692 return 0;
9693 /* FALLTHRU */
9694 case CCGCmode:
9695 if (req_mode == CCGOCmode || req_mode == CCNOmode)
9696 return 0;
9697 /* FALLTHRU */
9698 case CCGOCmode:
9699 if (req_mode == CCZmode)
9700 return 0;
9701 /* FALLTHRU */
9702 case CCZmode:
9703 break;
9704
9705 default:
9706 gcc_unreachable ();
9707 }
9708
9709 return (GET_MODE (SET_SRC (set)) == set_mode);
9710 }
9711
9712 /* Generate insn patterns to do an integer compare of OPERANDS. */
9713
9714 static rtx
9715 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
9716 {
9717 enum machine_mode cmpmode;
9718 rtx tmp, flags;
9719
9720 cmpmode = SELECT_CC_MODE (code, op0, op1);
9721 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
9722
9723 /* This is very simple, but making the interface the same as in the
9724 FP case makes the rest of the code easier. */
9725 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
9726 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
9727
9728 /* Return the test that should be put into the flags user, i.e.
9729 the bcc, scc, or cmov instruction. */
9730 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
9731 }
9732
9733 /* Figure out whether to use ordered or unordered fp comparisons.
9734 Return the appropriate mode to use. */
9735
9736 enum machine_mode
9737 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
9738 {
9739 /* ??? In order to make all comparisons reversible, we do all comparisons
9740 non-trapping when compiling for IEEE. Once gcc is able to distinguish
9741 all forms trapping and nontrapping comparisons, we can make inequality
9742 comparisons trapping again, since it results in better code when using
9743 FCOM based compares. */
9744 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
9745 }
9746
9747 enum machine_mode
9748 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9749 {
9750 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
9751 return ix86_fp_compare_mode (code);
9752 switch (code)
9753 {
9754 /* Only zero flag is needed. */
9755 case EQ: /* ZF=0 */
9756 case NE: /* ZF!=0 */
9757 return CCZmode;
9758 /* Codes needing carry flag. */
9759 case GEU: /* CF=0 */
9760 case GTU: /* CF=0 & ZF=0 */
9761 case LTU: /* CF=1 */
9762 case LEU: /* CF=1 | ZF=1 */
9763 return CCmode;
9764 /* Codes possibly doable only with sign flag when
9765 comparing against zero. */
9766 case GE: /* SF=OF or SF=0 */
9767 case LT: /* SF<>OF or SF=1 */
9768 if (op1 == const0_rtx)
9769 return CCGOCmode;
9770 else
9771 /* For other cases Carry flag is not required. */
9772 return CCGCmode;
9773 /* Codes doable only with sign flag when comparing
9774 against zero, but we miss jump instruction for it
9775 so we need to use relational tests against overflow
9776 that thus needs to be zero. */
9777 case GT: /* ZF=0 & SF=OF */
9778 case LE: /* ZF=1 | SF<>OF */
9779 if (op1 == const0_rtx)
9780 return CCNOmode;
9781 else
9782 return CCGCmode;
9783 /* strcmp pattern do (use flags) and combine may ask us for proper
9784 mode. */
9785 case USE:
9786 return CCmode;
9787 default:
9788 gcc_unreachable ();
9789 }
9790 }
9791
9792 /* Return the fixed registers used for condition codes. */
9793
9794 static bool
9795 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
9796 {
9797 *p1 = FLAGS_REG;
9798 *p2 = FPSR_REG;
9799 return true;
9800 }
9801
9802 /* If two condition code modes are compatible, return a condition code
9803 mode which is compatible with both. Otherwise, return
9804 VOIDmode. */
9805
9806 static enum machine_mode
9807 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
9808 {
9809 if (m1 == m2)
9810 return m1;
9811
9812 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
9813 return VOIDmode;
9814
9815 if ((m1 == CCGCmode && m2 == CCGOCmode)
9816 || (m1 == CCGOCmode && m2 == CCGCmode))
9817 return CCGCmode;
9818
9819 switch (m1)
9820 {
9821 default:
9822 gcc_unreachable ();
9823
9824 case CCmode:
9825 case CCGCmode:
9826 case CCGOCmode:
9827 case CCNOmode:
9828 case CCZmode:
9829 switch (m2)
9830 {
9831 default:
9832 return VOIDmode;
9833
9834 case CCmode:
9835 case CCGCmode:
9836 case CCGOCmode:
9837 case CCNOmode:
9838 case CCZmode:
9839 return CCmode;
9840 }
9841
9842 case CCFPmode:
9843 case CCFPUmode:
9844 /* These are only compatible with themselves, which we already
9845 checked above. */
9846 return VOIDmode;
9847 }
9848 }
9849
9850 /* Return true if we should use an FCOMI instruction for this fp comparison. */
9851
9852 int
9853 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
9854 {
9855 enum rtx_code swapped_code = swap_condition (code);
9856 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
9857 || (ix86_fp_comparison_cost (swapped_code)
9858 == ix86_fp_comparison_fcomi_cost (swapped_code)));
9859 }
9860
9861 /* Swap, force into registers, or otherwise massage the two operands
9862 to a fp comparison. The operands are updated in place; the new
9863 comparison code is returned. */
9864
9865 static enum rtx_code
9866 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
9867 {
9868 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
9869 rtx op0 = *pop0, op1 = *pop1;
9870 enum machine_mode op_mode = GET_MODE (op0);
9871 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
9872
9873 /* All of the unordered compare instructions only work on registers.
9874 The same is true of the fcomi compare instructions. The XFmode
9875 compare instructions require registers except when comparing
9876 against zero or when converting operand 1 from fixed point to
9877 floating point. */
9878
9879 if (!is_sse
9880 && (fpcmp_mode == CCFPUmode
9881 || (op_mode == XFmode
9882 && ! (standard_80387_constant_p (op0) == 1
9883 || standard_80387_constant_p (op1) == 1)
9884 && GET_CODE (op1) != FLOAT)
9885 || ix86_use_fcomi_compare (code)))
9886 {
9887 op0 = force_reg (op_mode, op0);
9888 op1 = force_reg (op_mode, op1);
9889 }
9890 else
9891 {
9892 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
9893 things around if they appear profitable, otherwise force op0
9894 into a register. */
9895
9896 if (standard_80387_constant_p (op0) == 0
9897 || (GET_CODE (op0) == MEM
9898 && ! (standard_80387_constant_p (op1) == 0
9899 || GET_CODE (op1) == MEM)))
9900 {
9901 rtx tmp;
9902 tmp = op0, op0 = op1, op1 = tmp;
9903 code = swap_condition (code);
9904 }
9905
9906 if (GET_CODE (op0) != REG)
9907 op0 = force_reg (op_mode, op0);
9908
9909 if (CONSTANT_P (op1))
9910 {
9911 int tmp = standard_80387_constant_p (op1);
9912 if (tmp == 0)
9913 op1 = validize_mem (force_const_mem (op_mode, op1));
9914 else if (tmp == 1)
9915 {
9916 if (TARGET_CMOVE)
9917 op1 = force_reg (op_mode, op1);
9918 }
9919 else
9920 op1 = force_reg (op_mode, op1);
9921 }
9922 }
9923
9924 /* Try to rearrange the comparison to make it cheaper. */
9925 if (ix86_fp_comparison_cost (code)
9926 > ix86_fp_comparison_cost (swap_condition (code))
9927 && (GET_CODE (op1) == REG || !no_new_pseudos))
9928 {
9929 rtx tmp;
9930 tmp = op0, op0 = op1, op1 = tmp;
9931 code = swap_condition (code);
9932 if (GET_CODE (op0) != REG)
9933 op0 = force_reg (op_mode, op0);
9934 }
9935
9936 *pop0 = op0;
9937 *pop1 = op1;
9938 return code;
9939 }
9940
9941 /* Convert comparison codes we use to represent FP comparison to integer
9942 code that will result in proper branch. Return UNKNOWN if no such code
9943 is available. */
9944
9945 enum rtx_code
9946 ix86_fp_compare_code_to_integer (enum rtx_code code)
9947 {
9948 switch (code)
9949 {
9950 case GT:
9951 return GTU;
9952 case GE:
9953 return GEU;
9954 case ORDERED:
9955 case UNORDERED:
9956 return code;
9957 break;
9958 case UNEQ:
9959 return EQ;
9960 break;
9961 case UNLT:
9962 return LTU;
9963 break;
9964 case UNLE:
9965 return LEU;
9966 break;
9967 case LTGT:
9968 return NE;
9969 break;
9970 default:
9971 return UNKNOWN;
9972 }
9973 }
9974
9975 /* Split comparison code CODE into comparisons we can do using branch
9976 instructions. BYPASS_CODE is comparison code for branch that will
9977 branch around FIRST_CODE and SECOND_CODE. If some of branches
9978 is not required, set value to UNKNOWN.
9979 We never require more than two branches. */
9980
9981 void
9982 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
9983 enum rtx_code *first_code,
9984 enum rtx_code *second_code)
9985 {
9986 *first_code = code;
9987 *bypass_code = UNKNOWN;
9988 *second_code = UNKNOWN;
9989
9990 /* The fcomi comparison sets flags as follows:
9991
9992 cmp ZF PF CF
9993 > 0 0 0
9994 < 0 0 1
9995 = 1 0 0
9996 un 1 1 1 */
9997
9998 switch (code)
9999 {
10000 case GT: /* GTU - CF=0 & ZF=0 */
10001 case GE: /* GEU - CF=0 */
10002 case ORDERED: /* PF=0 */
10003 case UNORDERED: /* PF=1 */
10004 case UNEQ: /* EQ - ZF=1 */
10005 case UNLT: /* LTU - CF=1 */
10006 case UNLE: /* LEU - CF=1 | ZF=1 */
10007 case LTGT: /* EQ - ZF=0 */
10008 break;
10009 case LT: /* LTU - CF=1 - fails on unordered */
10010 *first_code = UNLT;
10011 *bypass_code = UNORDERED;
10012 break;
10013 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
10014 *first_code = UNLE;
10015 *bypass_code = UNORDERED;
10016 break;
10017 case EQ: /* EQ - ZF=1 - fails on unordered */
10018 *first_code = UNEQ;
10019 *bypass_code = UNORDERED;
10020 break;
10021 case NE: /* NE - ZF=0 - fails on unordered */
10022 *first_code = LTGT;
10023 *second_code = UNORDERED;
10024 break;
10025 case UNGE: /* GEU - CF=0 - fails on unordered */
10026 *first_code = GE;
10027 *second_code = UNORDERED;
10028 break;
10029 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
10030 *first_code = GT;
10031 *second_code = UNORDERED;
10032 break;
10033 default:
10034 gcc_unreachable ();
10035 }
10036 if (!TARGET_IEEE_FP)
10037 {
10038 *second_code = UNKNOWN;
10039 *bypass_code = UNKNOWN;
10040 }
10041 }
10042
10043 /* Return cost of comparison done fcom + arithmetics operations on AX.
10044 All following functions do use number of instructions as a cost metrics.
10045 In future this should be tweaked to compute bytes for optimize_size and
10046 take into account performance of various instructions on various CPUs. */
10047 static int
10048 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10049 {
10050 if (!TARGET_IEEE_FP)
10051 return 4;
10052 /* The cost of code output by ix86_expand_fp_compare. */
10053 switch (code)
10054 {
10055 case UNLE:
10056 case UNLT:
10057 case LTGT:
10058 case GT:
10059 case GE:
10060 case UNORDERED:
10061 case ORDERED:
10062 case UNEQ:
10063 return 4;
10064 break;
10065 case LT:
10066 case NE:
10067 case EQ:
10068 case UNGE:
10069 return 5;
10070 break;
10071 case LE:
10072 case UNGT:
10073 return 6;
10074 break;
10075 default:
10076 gcc_unreachable ();
10077 }
10078 }
10079
10080 /* Return cost of comparison done using fcomi operation.
10081 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10082 static int
10083 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10084 {
10085 enum rtx_code bypass_code, first_code, second_code;
10086 /* Return arbitrarily high cost when instruction is not supported - this
10087 prevents gcc from using it. */
10088 if (!TARGET_CMOVE)
10089 return 1024;
10090 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10091 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10092 }
10093
10094 /* Return cost of comparison done using sahf operation.
10095 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10096 static int
10097 ix86_fp_comparison_sahf_cost (enum rtx_code code)
10098 {
10099 enum rtx_code bypass_code, first_code, second_code;
10100 /* Return arbitrarily high cost when instruction is not preferred - this
10101 avoids gcc from using it. */
10102 if (!TARGET_USE_SAHF && !optimize_size)
10103 return 1024;
10104 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10105 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10106 }
10107
10108 /* Compute cost of the comparison done using any method.
10109 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10110 static int
10111 ix86_fp_comparison_cost (enum rtx_code code)
10112 {
10113 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10114 int min;
10115
10116 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10117 sahf_cost = ix86_fp_comparison_sahf_cost (code);
10118
10119 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10120 if (min > sahf_cost)
10121 min = sahf_cost;
10122 if (min > fcomi_cost)
10123 min = fcomi_cost;
10124 return min;
10125 }
10126
10127 /* Generate insn patterns to do a floating point compare of OPERANDS. */
10128
10129 static rtx
10130 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
10131 rtx *second_test, rtx *bypass_test)
10132 {
10133 enum machine_mode fpcmp_mode, intcmp_mode;
10134 rtx tmp, tmp2;
10135 int cost = ix86_fp_comparison_cost (code);
10136 enum rtx_code bypass_code, first_code, second_code;
10137
10138 fpcmp_mode = ix86_fp_compare_mode (code);
10139 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
10140
10141 if (second_test)
10142 *second_test = NULL_RTX;
10143 if (bypass_test)
10144 *bypass_test = NULL_RTX;
10145
10146 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10147
10148 /* Do fcomi/sahf based test when profitable. */
10149 if ((bypass_code == UNKNOWN || bypass_test)
10150 && (second_code == UNKNOWN || second_test)
10151 && ix86_fp_comparison_arithmetics_cost (code) > cost)
10152 {
10153 if (TARGET_CMOVE)
10154 {
10155 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10156 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
10157 tmp);
10158 emit_insn (tmp);
10159 }
10160 else
10161 {
10162 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10163 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10164 if (!scratch)
10165 scratch = gen_reg_rtx (HImode);
10166 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10167 emit_insn (gen_x86_sahf_1 (scratch));
10168 }
10169
10170 /* The FP codes work out to act like unsigned. */
10171 intcmp_mode = fpcmp_mode;
10172 code = first_code;
10173 if (bypass_code != UNKNOWN)
10174 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
10175 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10176 const0_rtx);
10177 if (second_code != UNKNOWN)
10178 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
10179 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10180 const0_rtx);
10181 }
10182 else
10183 {
10184 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10185 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10186 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10187 if (!scratch)
10188 scratch = gen_reg_rtx (HImode);
10189 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10190
10191 /* In the unordered case, we have to check C2 for NaN's, which
10192 doesn't happen to work out to anything nice combination-wise.
10193 So do some bit twiddling on the value we've got in AH to come
10194 up with an appropriate set of condition codes. */
10195
10196 intcmp_mode = CCNOmode;
10197 switch (code)
10198 {
10199 case GT:
10200 case UNGT:
10201 if (code == GT || !TARGET_IEEE_FP)
10202 {
10203 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10204 code = EQ;
10205 }
10206 else
10207 {
10208 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10209 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10210 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
10211 intcmp_mode = CCmode;
10212 code = GEU;
10213 }
10214 break;
10215 case LT:
10216 case UNLT:
10217 if (code == LT && TARGET_IEEE_FP)
10218 {
10219 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10220 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
10221 intcmp_mode = CCmode;
10222 code = EQ;
10223 }
10224 else
10225 {
10226 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
10227 code = NE;
10228 }
10229 break;
10230 case GE:
10231 case UNGE:
10232 if (code == GE || !TARGET_IEEE_FP)
10233 {
10234 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
10235 code = EQ;
10236 }
10237 else
10238 {
10239 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10240 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10241 GEN_INT (0x01)));
10242 code = NE;
10243 }
10244 break;
10245 case LE:
10246 case UNLE:
10247 if (code == LE && TARGET_IEEE_FP)
10248 {
10249 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10250 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10251 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10252 intcmp_mode = CCmode;
10253 code = LTU;
10254 }
10255 else
10256 {
10257 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10258 code = NE;
10259 }
10260 break;
10261 case EQ:
10262 case UNEQ:
10263 if (code == EQ && TARGET_IEEE_FP)
10264 {
10265 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10266 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10267 intcmp_mode = CCmode;
10268 code = EQ;
10269 }
10270 else
10271 {
10272 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10273 code = NE;
10274 break;
10275 }
10276 break;
10277 case NE:
10278 case LTGT:
10279 if (code == NE && TARGET_IEEE_FP)
10280 {
10281 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10282 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10283 GEN_INT (0x40)));
10284 code = NE;
10285 }
10286 else
10287 {
10288 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10289 code = EQ;
10290 }
10291 break;
10292
10293 case UNORDERED:
10294 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10295 code = NE;
10296 break;
10297 case ORDERED:
10298 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10299 code = EQ;
10300 break;
10301
10302 default:
10303 gcc_unreachable ();
10304 }
10305 }
10306
10307 /* Return the test that should be put into the flags user, i.e.
10308 the bcc, scc, or cmov instruction. */
10309 return gen_rtx_fmt_ee (code, VOIDmode,
10310 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10311 const0_rtx);
10312 }
10313
10314 rtx
10315 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
10316 {
10317 rtx op0, op1, ret;
10318 op0 = ix86_compare_op0;
10319 op1 = ix86_compare_op1;
10320
10321 if (second_test)
10322 *second_test = NULL_RTX;
10323 if (bypass_test)
10324 *bypass_test = NULL_RTX;
10325
10326 if (ix86_compare_emitted)
10327 {
10328 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
10329 ix86_compare_emitted = NULL_RTX;
10330 }
10331 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10332 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10333 second_test, bypass_test);
10334 else
10335 ret = ix86_expand_int_compare (code, op0, op1);
10336
10337 return ret;
10338 }
10339
10340 /* Return true if the CODE will result in nontrivial jump sequence. */
10341 bool
10342 ix86_fp_jump_nontrivial_p (enum rtx_code code)
10343 {
10344 enum rtx_code bypass_code, first_code, second_code;
10345 if (!TARGET_CMOVE)
10346 return true;
10347 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10348 return bypass_code != UNKNOWN || second_code != UNKNOWN;
10349 }
10350
10351 void
10352 ix86_expand_branch (enum rtx_code code, rtx label)
10353 {
10354 rtx tmp;
10355
10356 /* If we have emitted a compare insn, go straight to simple.
10357 ix86_expand_compare won't emit anything if ix86_compare_emitted
10358 is non NULL. */
10359 if (ix86_compare_emitted)
10360 goto simple;
10361
10362 switch (GET_MODE (ix86_compare_op0))
10363 {
10364 case QImode:
10365 case HImode:
10366 case SImode:
10367 simple:
10368 tmp = ix86_expand_compare (code, NULL, NULL);
10369 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10370 gen_rtx_LABEL_REF (VOIDmode, label),
10371 pc_rtx);
10372 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10373 return;
10374
10375 case SFmode:
10376 case DFmode:
10377 case XFmode:
10378 {
10379 rtvec vec;
10380 int use_fcomi;
10381 enum rtx_code bypass_code, first_code, second_code;
10382
10383 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
10384 &ix86_compare_op1);
10385
10386 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10387
10388 /* Check whether we will use the natural sequence with one jump. If
10389 so, we can expand jump early. Otherwise delay expansion by
10390 creating compound insn to not confuse optimizers. */
10391 if (bypass_code == UNKNOWN && second_code == UNKNOWN
10392 && TARGET_CMOVE)
10393 {
10394 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
10395 gen_rtx_LABEL_REF (VOIDmode, label),
10396 pc_rtx, NULL_RTX, NULL_RTX);
10397 }
10398 else
10399 {
10400 tmp = gen_rtx_fmt_ee (code, VOIDmode,
10401 ix86_compare_op0, ix86_compare_op1);
10402 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10403 gen_rtx_LABEL_REF (VOIDmode, label),
10404 pc_rtx);
10405 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
10406
10407 use_fcomi = ix86_use_fcomi_compare (code);
10408 vec = rtvec_alloc (3 + !use_fcomi);
10409 RTVEC_ELT (vec, 0) = tmp;
10410 RTVEC_ELT (vec, 1)
10411 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
10412 RTVEC_ELT (vec, 2)
10413 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
10414 if (! use_fcomi)
10415 RTVEC_ELT (vec, 3)
10416 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
10417
10418 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
10419 }
10420 return;
10421 }
10422
10423 case DImode:
10424 if (TARGET_64BIT)
10425 goto simple;
10426 case TImode:
10427 /* Expand DImode branch into multiple compare+branch. */
10428 {
10429 rtx lo[2], hi[2], label2;
10430 enum rtx_code code1, code2, code3;
10431 enum machine_mode submode;
10432
10433 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
10434 {
10435 tmp = ix86_compare_op0;
10436 ix86_compare_op0 = ix86_compare_op1;
10437 ix86_compare_op1 = tmp;
10438 code = swap_condition (code);
10439 }
10440 if (GET_MODE (ix86_compare_op0) == DImode)
10441 {
10442 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
10443 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
10444 submode = SImode;
10445 }
10446 else
10447 {
10448 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
10449 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
10450 submode = DImode;
10451 }
10452
10453 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10454 avoid two branches. This costs one extra insn, so disable when
10455 optimizing for size. */
10456
10457 if ((code == EQ || code == NE)
10458 && (!optimize_size
10459 || hi[1] == const0_rtx || lo[1] == const0_rtx))
10460 {
10461 rtx xor0, xor1;
10462
10463 xor1 = hi[0];
10464 if (hi[1] != const0_rtx)
10465 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
10466 NULL_RTX, 0, OPTAB_WIDEN);
10467
10468 xor0 = lo[0];
10469 if (lo[1] != const0_rtx)
10470 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
10471 NULL_RTX, 0, OPTAB_WIDEN);
10472
10473 tmp = expand_binop (submode, ior_optab, xor1, xor0,
10474 NULL_RTX, 0, OPTAB_WIDEN);
10475
10476 ix86_compare_op0 = tmp;
10477 ix86_compare_op1 = const0_rtx;
10478 ix86_expand_branch (code, label);
10479 return;
10480 }
10481
10482 /* Otherwise, if we are doing less-than or greater-or-equal-than,
10483 op1 is a constant and the low word is zero, then we can just
10484 examine the high word. */
10485
10486 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
10487 switch (code)
10488 {
10489 case LT: case LTU: case GE: case GEU:
10490 ix86_compare_op0 = hi[0];
10491 ix86_compare_op1 = hi[1];
10492 ix86_expand_branch (code, label);
10493 return;
10494 default:
10495 break;
10496 }
10497
10498 /* Otherwise, we need two or three jumps. */
10499
10500 label2 = gen_label_rtx ();
10501
10502 code1 = code;
10503 code2 = swap_condition (code);
10504 code3 = unsigned_condition (code);
10505
10506 switch (code)
10507 {
10508 case LT: case GT: case LTU: case GTU:
10509 break;
10510
10511 case LE: code1 = LT; code2 = GT; break;
10512 case GE: code1 = GT; code2 = LT; break;
10513 case LEU: code1 = LTU; code2 = GTU; break;
10514 case GEU: code1 = GTU; code2 = LTU; break;
10515
10516 case EQ: code1 = UNKNOWN; code2 = NE; break;
10517 case NE: code2 = UNKNOWN; break;
10518
10519 default:
10520 gcc_unreachable ();
10521 }
10522
10523 /*
10524 * a < b =>
10525 * if (hi(a) < hi(b)) goto true;
10526 * if (hi(a) > hi(b)) goto false;
10527 * if (lo(a) < lo(b)) goto true;
10528 * false:
10529 */
10530
10531 ix86_compare_op0 = hi[0];
10532 ix86_compare_op1 = hi[1];
10533
10534 if (code1 != UNKNOWN)
10535 ix86_expand_branch (code1, label);
10536 if (code2 != UNKNOWN)
10537 ix86_expand_branch (code2, label2);
10538
10539 ix86_compare_op0 = lo[0];
10540 ix86_compare_op1 = lo[1];
10541 ix86_expand_branch (code3, label);
10542
10543 if (code2 != UNKNOWN)
10544 emit_label (label2);
10545 return;
10546 }
10547
10548 default:
10549 gcc_unreachable ();
10550 }
10551 }
10552
10553 /* Split branch based on floating point condition. */
10554 void
10555 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
10556 rtx target1, rtx target2, rtx tmp, rtx pushed)
10557 {
10558 rtx second, bypass;
10559 rtx label = NULL_RTX;
10560 rtx condition;
10561 int bypass_probability = -1, second_probability = -1, probability = -1;
10562 rtx i;
10563
10564 if (target2 != pc_rtx)
10565 {
10566 rtx tmp = target2;
10567 code = reverse_condition_maybe_unordered (code);
10568 target2 = target1;
10569 target1 = tmp;
10570 }
10571
10572 condition = ix86_expand_fp_compare (code, op1, op2,
10573 tmp, &second, &bypass);
10574
10575 /* Remove pushed operand from stack. */
10576 if (pushed)
10577 ix86_free_from_memory (GET_MODE (pushed));
10578
10579 if (split_branch_probability >= 0)
10580 {
10581 /* Distribute the probabilities across the jumps.
10582 Assume the BYPASS and SECOND to be always test
10583 for UNORDERED. */
10584 probability = split_branch_probability;
10585
10586 /* Value of 1 is low enough to make no need for probability
10587 to be updated. Later we may run some experiments and see
10588 if unordered values are more frequent in practice. */
10589 if (bypass)
10590 bypass_probability = 1;
10591 if (second)
10592 second_probability = 1;
10593 }
10594 if (bypass != NULL_RTX)
10595 {
10596 label = gen_label_rtx ();
10597 i = emit_jump_insn (gen_rtx_SET
10598 (VOIDmode, pc_rtx,
10599 gen_rtx_IF_THEN_ELSE (VOIDmode,
10600 bypass,
10601 gen_rtx_LABEL_REF (VOIDmode,
10602 label),
10603 pc_rtx)));
10604 if (bypass_probability >= 0)
10605 REG_NOTES (i)
10606 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10607 GEN_INT (bypass_probability),
10608 REG_NOTES (i));
10609 }
10610 i = emit_jump_insn (gen_rtx_SET
10611 (VOIDmode, pc_rtx,
10612 gen_rtx_IF_THEN_ELSE (VOIDmode,
10613 condition, target1, target2)));
10614 if (probability >= 0)
10615 REG_NOTES (i)
10616 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10617 GEN_INT (probability),
10618 REG_NOTES (i));
10619 if (second != NULL_RTX)
10620 {
10621 i = emit_jump_insn (gen_rtx_SET
10622 (VOIDmode, pc_rtx,
10623 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
10624 target2)));
10625 if (second_probability >= 0)
10626 REG_NOTES (i)
10627 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10628 GEN_INT (second_probability),
10629 REG_NOTES (i));
10630 }
10631 if (label != NULL_RTX)
10632 emit_label (label);
10633 }
10634
10635 int
10636 ix86_expand_setcc (enum rtx_code code, rtx dest)
10637 {
10638 rtx ret, tmp, tmpreg, equiv;
10639 rtx second_test, bypass_test;
10640
10641 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
10642 return 0; /* FAIL */
10643
10644 gcc_assert (GET_MODE (dest) == QImode);
10645
10646 ret = ix86_expand_compare (code, &second_test, &bypass_test);
10647 PUT_MODE (ret, QImode);
10648
10649 tmp = dest;
10650 tmpreg = dest;
10651
10652 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
10653 if (bypass_test || second_test)
10654 {
10655 rtx test = second_test;
10656 int bypass = 0;
10657 rtx tmp2 = gen_reg_rtx (QImode);
10658 if (bypass_test)
10659 {
10660 gcc_assert (!second_test);
10661 test = bypass_test;
10662 bypass = 1;
10663 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
10664 }
10665 PUT_MODE (test, QImode);
10666 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
10667
10668 if (bypass)
10669 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
10670 else
10671 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
10672 }
10673
10674 /* Attach a REG_EQUAL note describing the comparison result. */
10675 if (ix86_compare_op0 && ix86_compare_op1)
10676 {
10677 equiv = simplify_gen_relational (code, QImode,
10678 GET_MODE (ix86_compare_op0),
10679 ix86_compare_op0, ix86_compare_op1);
10680 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
10681 }
10682
10683 return 1; /* DONE */
10684 }
10685
10686 /* Expand comparison setting or clearing carry flag. Return true when
10687 successful and set pop for the operation. */
10688 static bool
10689 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
10690 {
10691 enum machine_mode mode =
10692 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
10693
10694 /* Do not handle DImode compares that go through special path. Also we can't
10695 deal with FP compares yet. This is possible to add. */
10696 if (mode == (TARGET_64BIT ? TImode : DImode))
10697 return false;
10698 if (FLOAT_MODE_P (mode))
10699 {
10700 rtx second_test = NULL, bypass_test = NULL;
10701 rtx compare_op, compare_seq;
10702
10703 /* Shortcut: following common codes never translate into carry flag compares. */
10704 if (code == EQ || code == NE || code == UNEQ || code == LTGT
10705 || code == ORDERED || code == UNORDERED)
10706 return false;
10707
10708 /* These comparisons require zero flag; swap operands so they won't. */
10709 if ((code == GT || code == UNLE || code == LE || code == UNGT)
10710 && !TARGET_IEEE_FP)
10711 {
10712 rtx tmp = op0;
10713 op0 = op1;
10714 op1 = tmp;
10715 code = swap_condition (code);
10716 }
10717
10718 /* Try to expand the comparison and verify that we end up with carry flag
10719 based comparison. This is fails to be true only when we decide to expand
10720 comparison using arithmetic that is not too common scenario. */
10721 start_sequence ();
10722 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10723 &second_test, &bypass_test);
10724 compare_seq = get_insns ();
10725 end_sequence ();
10726
10727 if (second_test || bypass_test)
10728 return false;
10729 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10730 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10731 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
10732 else
10733 code = GET_CODE (compare_op);
10734 if (code != LTU && code != GEU)
10735 return false;
10736 emit_insn (compare_seq);
10737 *pop = compare_op;
10738 return true;
10739 }
10740 if (!INTEGRAL_MODE_P (mode))
10741 return false;
10742 switch (code)
10743 {
10744 case LTU:
10745 case GEU:
10746 break;
10747
10748 /* Convert a==0 into (unsigned)a<1. */
10749 case EQ:
10750 case NE:
10751 if (op1 != const0_rtx)
10752 return false;
10753 op1 = const1_rtx;
10754 code = (code == EQ ? LTU : GEU);
10755 break;
10756
10757 /* Convert a>b into b<a or a>=b-1. */
10758 case GTU:
10759 case LEU:
10760 if (GET_CODE (op1) == CONST_INT)
10761 {
10762 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
10763 /* Bail out on overflow. We still can swap operands but that
10764 would force loading of the constant into register. */
10765 if (op1 == const0_rtx
10766 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
10767 return false;
10768 code = (code == GTU ? GEU : LTU);
10769 }
10770 else
10771 {
10772 rtx tmp = op1;
10773 op1 = op0;
10774 op0 = tmp;
10775 code = (code == GTU ? LTU : GEU);
10776 }
10777 break;
10778
10779 /* Convert a>=0 into (unsigned)a<0x80000000. */
10780 case LT:
10781 case GE:
10782 if (mode == DImode || op1 != const0_rtx)
10783 return false;
10784 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10785 code = (code == LT ? GEU : LTU);
10786 break;
10787 case LE:
10788 case GT:
10789 if (mode == DImode || op1 != constm1_rtx)
10790 return false;
10791 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10792 code = (code == LE ? GEU : LTU);
10793 break;
10794
10795 default:
10796 return false;
10797 }
10798 /* Swapping operands may cause constant to appear as first operand. */
10799 if (!nonimmediate_operand (op0, VOIDmode))
10800 {
10801 if (no_new_pseudos)
10802 return false;
10803 op0 = force_reg (mode, op0);
10804 }
10805 ix86_compare_op0 = op0;
10806 ix86_compare_op1 = op1;
10807 *pop = ix86_expand_compare (code, NULL, NULL);
10808 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
10809 return true;
10810 }
10811
10812 int
10813 ix86_expand_int_movcc (rtx operands[])
10814 {
10815 enum rtx_code code = GET_CODE (operands[1]), compare_code;
10816 rtx compare_seq, compare_op;
10817 rtx second_test, bypass_test;
10818 enum machine_mode mode = GET_MODE (operands[0]);
10819 bool sign_bit_compare_p = false;;
10820
10821 start_sequence ();
10822 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10823 compare_seq = get_insns ();
10824 end_sequence ();
10825
10826 compare_code = GET_CODE (compare_op);
10827
10828 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
10829 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
10830 sign_bit_compare_p = true;
10831
10832 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
10833 HImode insns, we'd be swallowed in word prefix ops. */
10834
10835 if ((mode != HImode || TARGET_FAST_PREFIX)
10836 && (mode != (TARGET_64BIT ? TImode : DImode))
10837 && GET_CODE (operands[2]) == CONST_INT
10838 && GET_CODE (operands[3]) == CONST_INT)
10839 {
10840 rtx out = operands[0];
10841 HOST_WIDE_INT ct = INTVAL (operands[2]);
10842 HOST_WIDE_INT cf = INTVAL (operands[3]);
10843 HOST_WIDE_INT diff;
10844
10845 diff = ct - cf;
10846 /* Sign bit compares are better done using shifts than we do by using
10847 sbb. */
10848 if (sign_bit_compare_p
10849 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10850 ix86_compare_op1, &compare_op))
10851 {
10852 /* Detect overlap between destination and compare sources. */
10853 rtx tmp = out;
10854
10855 if (!sign_bit_compare_p)
10856 {
10857 bool fpcmp = false;
10858
10859 compare_code = GET_CODE (compare_op);
10860
10861 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10862 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10863 {
10864 fpcmp = true;
10865 compare_code = ix86_fp_compare_code_to_integer (compare_code);
10866 }
10867
10868 /* To simplify rest of code, restrict to the GEU case. */
10869 if (compare_code == LTU)
10870 {
10871 HOST_WIDE_INT tmp = ct;
10872 ct = cf;
10873 cf = tmp;
10874 compare_code = reverse_condition (compare_code);
10875 code = reverse_condition (code);
10876 }
10877 else
10878 {
10879 if (fpcmp)
10880 PUT_CODE (compare_op,
10881 reverse_condition_maybe_unordered
10882 (GET_CODE (compare_op)));
10883 else
10884 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10885 }
10886 diff = ct - cf;
10887
10888 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
10889 || reg_overlap_mentioned_p (out, ix86_compare_op1))
10890 tmp = gen_reg_rtx (mode);
10891
10892 if (mode == DImode)
10893 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
10894 else
10895 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
10896 }
10897 else
10898 {
10899 if (code == GT || code == GE)
10900 code = reverse_condition (code);
10901 else
10902 {
10903 HOST_WIDE_INT tmp = ct;
10904 ct = cf;
10905 cf = tmp;
10906 diff = ct - cf;
10907 }
10908 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
10909 ix86_compare_op1, VOIDmode, 0, -1);
10910 }
10911
10912 if (diff == 1)
10913 {
10914 /*
10915 * cmpl op0,op1
10916 * sbbl dest,dest
10917 * [addl dest, ct]
10918 *
10919 * Size 5 - 8.
10920 */
10921 if (ct)
10922 tmp = expand_simple_binop (mode, PLUS,
10923 tmp, GEN_INT (ct),
10924 copy_rtx (tmp), 1, OPTAB_DIRECT);
10925 }
10926 else if (cf == -1)
10927 {
10928 /*
10929 * cmpl op0,op1
10930 * sbbl dest,dest
10931 * orl $ct, dest
10932 *
10933 * Size 8.
10934 */
10935 tmp = expand_simple_binop (mode, IOR,
10936 tmp, GEN_INT (ct),
10937 copy_rtx (tmp), 1, OPTAB_DIRECT);
10938 }
10939 else if (diff == -1 && ct)
10940 {
10941 /*
10942 * cmpl op0,op1
10943 * sbbl dest,dest
10944 * notl dest
10945 * [addl dest, cf]
10946 *
10947 * Size 8 - 11.
10948 */
10949 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10950 if (cf)
10951 tmp = expand_simple_binop (mode, PLUS,
10952 copy_rtx (tmp), GEN_INT (cf),
10953 copy_rtx (tmp), 1, OPTAB_DIRECT);
10954 }
10955 else
10956 {
10957 /*
10958 * cmpl op0,op1
10959 * sbbl dest,dest
10960 * [notl dest]
10961 * andl cf - ct, dest
10962 * [addl dest, ct]
10963 *
10964 * Size 8 - 11.
10965 */
10966
10967 if (cf == 0)
10968 {
10969 cf = ct;
10970 ct = 0;
10971 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10972 }
10973
10974 tmp = expand_simple_binop (mode, AND,
10975 copy_rtx (tmp),
10976 gen_int_mode (cf - ct, mode),
10977 copy_rtx (tmp), 1, OPTAB_DIRECT);
10978 if (ct)
10979 tmp = expand_simple_binop (mode, PLUS,
10980 copy_rtx (tmp), GEN_INT (ct),
10981 copy_rtx (tmp), 1, OPTAB_DIRECT);
10982 }
10983
10984 if (!rtx_equal_p (tmp, out))
10985 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
10986
10987 return 1; /* DONE */
10988 }
10989
10990 if (diff < 0)
10991 {
10992 HOST_WIDE_INT tmp;
10993 tmp = ct, ct = cf, cf = tmp;
10994 diff = -diff;
10995 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10996 {
10997 /* We may be reversing unordered compare to normal compare, that
10998 is not valid in general (we may convert non-trapping condition
10999 to trapping one), however on i386 we currently emit all
11000 comparisons unordered. */
11001 compare_code = reverse_condition_maybe_unordered (compare_code);
11002 code = reverse_condition_maybe_unordered (code);
11003 }
11004 else
11005 {
11006 compare_code = reverse_condition (compare_code);
11007 code = reverse_condition (code);
11008 }
11009 }
11010
11011 compare_code = UNKNOWN;
11012 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
11013 && GET_CODE (ix86_compare_op1) == CONST_INT)
11014 {
11015 if (ix86_compare_op1 == const0_rtx
11016 && (code == LT || code == GE))
11017 compare_code = code;
11018 else if (ix86_compare_op1 == constm1_rtx)
11019 {
11020 if (code == LE)
11021 compare_code = LT;
11022 else if (code == GT)
11023 compare_code = GE;
11024 }
11025 }
11026
11027 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11028 if (compare_code != UNKNOWN
11029 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11030 && (cf == -1 || ct == -1))
11031 {
11032 /* If lea code below could be used, only optimize
11033 if it results in a 2 insn sequence. */
11034
11035 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11036 || diff == 3 || diff == 5 || diff == 9)
11037 || (compare_code == LT && ct == -1)
11038 || (compare_code == GE && cf == -1))
11039 {
11040 /*
11041 * notl op1 (if necessary)
11042 * sarl $31, op1
11043 * orl cf, op1
11044 */
11045 if (ct != -1)
11046 {
11047 cf = ct;
11048 ct = -1;
11049 code = reverse_condition (code);
11050 }
11051
11052 out = emit_store_flag (out, code, ix86_compare_op0,
11053 ix86_compare_op1, VOIDmode, 0, -1);
11054
11055 out = expand_simple_binop (mode, IOR,
11056 out, GEN_INT (cf),
11057 out, 1, OPTAB_DIRECT);
11058 if (out != operands[0])
11059 emit_move_insn (operands[0], out);
11060
11061 return 1; /* DONE */
11062 }
11063 }
11064
11065
11066 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11067 || diff == 3 || diff == 5 || diff == 9)
11068 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11069 && (mode != DImode
11070 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11071 {
11072 /*
11073 * xorl dest,dest
11074 * cmpl op1,op2
11075 * setcc dest
11076 * lea cf(dest*(ct-cf)),dest
11077 *
11078 * Size 14.
11079 *
11080 * This also catches the degenerate setcc-only case.
11081 */
11082
11083 rtx tmp;
11084 int nops;
11085
11086 out = emit_store_flag (out, code, ix86_compare_op0,
11087 ix86_compare_op1, VOIDmode, 0, 1);
11088
11089 nops = 0;
11090 /* On x86_64 the lea instruction operates on Pmode, so we need
11091 to get arithmetics done in proper mode to match. */
11092 if (diff == 1)
11093 tmp = copy_rtx (out);
11094 else
11095 {
11096 rtx out1;
11097 out1 = copy_rtx (out);
11098 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11099 nops++;
11100 if (diff & 1)
11101 {
11102 tmp = gen_rtx_PLUS (mode, tmp, out1);
11103 nops++;
11104 }
11105 }
11106 if (cf != 0)
11107 {
11108 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11109 nops++;
11110 }
11111 if (!rtx_equal_p (tmp, out))
11112 {
11113 if (nops == 1)
11114 out = force_operand (tmp, copy_rtx (out));
11115 else
11116 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
11117 }
11118 if (!rtx_equal_p (out, operands[0]))
11119 emit_move_insn (operands[0], copy_rtx (out));
11120
11121 return 1; /* DONE */
11122 }
11123
11124 /*
11125 * General case: Jumpful:
11126 * xorl dest,dest cmpl op1, op2
11127 * cmpl op1, op2 movl ct, dest
11128 * setcc dest jcc 1f
11129 * decl dest movl cf, dest
11130 * andl (cf-ct),dest 1:
11131 * addl ct,dest
11132 *
11133 * Size 20. Size 14.
11134 *
11135 * This is reasonably steep, but branch mispredict costs are
11136 * high on modern cpus, so consider failing only if optimizing
11137 * for space.
11138 */
11139
11140 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11141 && BRANCH_COST >= 2)
11142 {
11143 if (cf == 0)
11144 {
11145 cf = ct;
11146 ct = 0;
11147 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11148 /* We may be reversing unordered compare to normal compare,
11149 that is not valid in general (we may convert non-trapping
11150 condition to trapping one), however on i386 we currently
11151 emit all comparisons unordered. */
11152 code = reverse_condition_maybe_unordered (code);
11153 else
11154 {
11155 code = reverse_condition (code);
11156 if (compare_code != UNKNOWN)
11157 compare_code = reverse_condition (compare_code);
11158 }
11159 }
11160
11161 if (compare_code != UNKNOWN)
11162 {
11163 /* notl op1 (if needed)
11164 sarl $31, op1
11165 andl (cf-ct), op1
11166 addl ct, op1
11167
11168 For x < 0 (resp. x <= -1) there will be no notl,
11169 so if possible swap the constants to get rid of the
11170 complement.
11171 True/false will be -1/0 while code below (store flag
11172 followed by decrement) is 0/-1, so the constants need
11173 to be exchanged once more. */
11174
11175 if (compare_code == GE || !cf)
11176 {
11177 code = reverse_condition (code);
11178 compare_code = LT;
11179 }
11180 else
11181 {
11182 HOST_WIDE_INT tmp = cf;
11183 cf = ct;
11184 ct = tmp;
11185 }
11186
11187 out = emit_store_flag (out, code, ix86_compare_op0,
11188 ix86_compare_op1, VOIDmode, 0, -1);
11189 }
11190 else
11191 {
11192 out = emit_store_flag (out, code, ix86_compare_op0,
11193 ix86_compare_op1, VOIDmode, 0, 1);
11194
11195 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
11196 copy_rtx (out), 1, OPTAB_DIRECT);
11197 }
11198
11199 out = expand_simple_binop (mode, AND, copy_rtx (out),
11200 gen_int_mode (cf - ct, mode),
11201 copy_rtx (out), 1, OPTAB_DIRECT);
11202 if (ct)
11203 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
11204 copy_rtx (out), 1, OPTAB_DIRECT);
11205 if (!rtx_equal_p (out, operands[0]))
11206 emit_move_insn (operands[0], copy_rtx (out));
11207
11208 return 1; /* DONE */
11209 }
11210 }
11211
11212 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11213 {
11214 /* Try a few things more with specific constants and a variable. */
11215
11216 optab op;
11217 rtx var, orig_out, out, tmp;
11218
11219 if (BRANCH_COST <= 2)
11220 return 0; /* FAIL */
11221
11222 /* If one of the two operands is an interesting constant, load a
11223 constant with the above and mask it in with a logical operation. */
11224
11225 if (GET_CODE (operands[2]) == CONST_INT)
11226 {
11227 var = operands[3];
11228 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
11229 operands[3] = constm1_rtx, op = and_optab;
11230 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
11231 operands[3] = const0_rtx, op = ior_optab;
11232 else
11233 return 0; /* FAIL */
11234 }
11235 else if (GET_CODE (operands[3]) == CONST_INT)
11236 {
11237 var = operands[2];
11238 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
11239 operands[2] = constm1_rtx, op = and_optab;
11240 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
11241 operands[2] = const0_rtx, op = ior_optab;
11242 else
11243 return 0; /* FAIL */
11244 }
11245 else
11246 return 0; /* FAIL */
11247
11248 orig_out = operands[0];
11249 tmp = gen_reg_rtx (mode);
11250 operands[0] = tmp;
11251
11252 /* Recurse to get the constant loaded. */
11253 if (ix86_expand_int_movcc (operands) == 0)
11254 return 0; /* FAIL */
11255
11256 /* Mask in the interesting variable. */
11257 out = expand_binop (mode, op, var, tmp, orig_out, 0,
11258 OPTAB_WIDEN);
11259 if (!rtx_equal_p (out, orig_out))
11260 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
11261
11262 return 1; /* DONE */
11263 }
11264
11265 /*
11266 * For comparison with above,
11267 *
11268 * movl cf,dest
11269 * movl ct,tmp
11270 * cmpl op1,op2
11271 * cmovcc tmp,dest
11272 *
11273 * Size 15.
11274 */
11275
11276 if (! nonimmediate_operand (operands[2], mode))
11277 operands[2] = force_reg (mode, operands[2]);
11278 if (! nonimmediate_operand (operands[3], mode))
11279 operands[3] = force_reg (mode, operands[3]);
11280
11281 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11282 {
11283 rtx tmp = gen_reg_rtx (mode);
11284 emit_move_insn (tmp, operands[3]);
11285 operands[3] = tmp;
11286 }
11287 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11288 {
11289 rtx tmp = gen_reg_rtx (mode);
11290 emit_move_insn (tmp, operands[2]);
11291 operands[2] = tmp;
11292 }
11293
11294 if (! register_operand (operands[2], VOIDmode)
11295 && (mode == QImode
11296 || ! register_operand (operands[3], VOIDmode)))
11297 operands[2] = force_reg (mode, operands[2]);
11298
11299 if (mode == QImode
11300 && ! register_operand (operands[3], VOIDmode))
11301 operands[3] = force_reg (mode, operands[3]);
11302
11303 emit_insn (compare_seq);
11304 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11305 gen_rtx_IF_THEN_ELSE (mode,
11306 compare_op, operands[2],
11307 operands[3])));
11308 if (bypass_test)
11309 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11310 gen_rtx_IF_THEN_ELSE (mode,
11311 bypass_test,
11312 copy_rtx (operands[3]),
11313 copy_rtx (operands[0]))));
11314 if (second_test)
11315 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11316 gen_rtx_IF_THEN_ELSE (mode,
11317 second_test,
11318 copy_rtx (operands[2]),
11319 copy_rtx (operands[0]))));
11320
11321 return 1; /* DONE */
11322 }
11323
11324 /* Swap, force into registers, or otherwise massage the two operands
11325 to an sse comparison with a mask result. Thus we differ a bit from
11326 ix86_prepare_fp_compare_args which expects to produce a flags result.
11327
11328 The DEST operand exists to help determine whether to commute commutative
11329 operators. The POP0/POP1 operands are updated in place. The new
11330 comparison code is returned, or UNKNOWN if not implementable. */
11331
11332 static enum rtx_code
11333 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
11334 rtx *pop0, rtx *pop1)
11335 {
11336 rtx tmp;
11337
11338 switch (code)
11339 {
11340 case LTGT:
11341 case UNEQ:
11342 /* We have no LTGT as an operator. We could implement it with
11343 NE & ORDERED, but this requires an extra temporary. It's
11344 not clear that it's worth it. */
11345 return UNKNOWN;
11346
11347 case LT:
11348 case LE:
11349 case UNGT:
11350 case UNGE:
11351 /* These are supported directly. */
11352 break;
11353
11354 case EQ:
11355 case NE:
11356 case UNORDERED:
11357 case ORDERED:
11358 /* For commutative operators, try to canonicalize the destination
11359 operand to be first in the comparison - this helps reload to
11360 avoid extra moves. */
11361 if (!dest || !rtx_equal_p (dest, *pop1))
11362 break;
11363 /* FALLTHRU */
11364
11365 case GE:
11366 case GT:
11367 case UNLE:
11368 case UNLT:
11369 /* These are not supported directly. Swap the comparison operands
11370 to transform into something that is supported. */
11371 tmp = *pop0;
11372 *pop0 = *pop1;
11373 *pop1 = tmp;
11374 code = swap_condition (code);
11375 break;
11376
11377 default:
11378 gcc_unreachable ();
11379 }
11380
11381 return code;
11382 }
11383
11384 /* Detect conditional moves that exactly match min/max operational
11385 semantics. Note that this is IEEE safe, as long as we don't
11386 interchange the operands.
11387
11388 Returns FALSE if this conditional move doesn't match a MIN/MAX,
11389 and TRUE if the operation is successful and instructions are emitted. */
11390
11391 static bool
11392 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
11393 rtx cmp_op1, rtx if_true, rtx if_false)
11394 {
11395 enum machine_mode mode;
11396 bool is_min;
11397 rtx tmp;
11398
11399 if (code == LT)
11400 ;
11401 else if (code == UNGE)
11402 {
11403 tmp = if_true;
11404 if_true = if_false;
11405 if_false = tmp;
11406 }
11407 else
11408 return false;
11409
11410 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
11411 is_min = true;
11412 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
11413 is_min = false;
11414 else
11415 return false;
11416
11417 mode = GET_MODE (dest);
11418
11419 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11420 but MODE may be a vector mode and thus not appropriate. */
11421 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
11422 {
11423 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
11424 rtvec v;
11425
11426 if_true = force_reg (mode, if_true);
11427 v = gen_rtvec (2, if_true, if_false);
11428 tmp = gen_rtx_UNSPEC (mode, v, u);
11429 }
11430 else
11431 {
11432 code = is_min ? SMIN : SMAX;
11433 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
11434 }
11435
11436 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
11437 return true;
11438 }
11439
11440 /* Expand an sse vector comparison. Return the register with the result. */
11441
11442 static rtx
11443 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
11444 rtx op_true, rtx op_false)
11445 {
11446 enum machine_mode mode = GET_MODE (dest);
11447 rtx x;
11448
11449 cmp_op0 = force_reg (mode, cmp_op0);
11450 if (!nonimmediate_operand (cmp_op1, mode))
11451 cmp_op1 = force_reg (mode, cmp_op1);
11452
11453 if (optimize
11454 || reg_overlap_mentioned_p (dest, op_true)
11455 || reg_overlap_mentioned_p (dest, op_false))
11456 dest = gen_reg_rtx (mode);
11457
11458 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
11459 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11460
11461 return dest;
11462 }
11463
11464 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11465 operations. This is used for both scalar and vector conditional moves. */
11466
11467 static void
11468 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
11469 {
11470 enum machine_mode mode = GET_MODE (dest);
11471 rtx t2, t3, x;
11472
11473 if (op_false == CONST0_RTX (mode))
11474 {
11475 op_true = force_reg (mode, op_true);
11476 x = gen_rtx_AND (mode, cmp, op_true);
11477 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11478 }
11479 else if (op_true == CONST0_RTX (mode))
11480 {
11481 op_false = force_reg (mode, op_false);
11482 x = gen_rtx_NOT (mode, cmp);
11483 x = gen_rtx_AND (mode, x, op_false);
11484 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11485 }
11486 else
11487 {
11488 op_true = force_reg (mode, op_true);
11489 op_false = force_reg (mode, op_false);
11490
11491 t2 = gen_reg_rtx (mode);
11492 if (optimize)
11493 t3 = gen_reg_rtx (mode);
11494 else
11495 t3 = dest;
11496
11497 x = gen_rtx_AND (mode, op_true, cmp);
11498 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
11499
11500 x = gen_rtx_NOT (mode, cmp);
11501 x = gen_rtx_AND (mode, x, op_false);
11502 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
11503
11504 x = gen_rtx_IOR (mode, t3, t2);
11505 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11506 }
11507 }
11508
11509 /* Expand a floating-point conditional move. Return true if successful. */
11510
11511 int
11512 ix86_expand_fp_movcc (rtx operands[])
11513 {
11514 enum machine_mode mode = GET_MODE (operands[0]);
11515 enum rtx_code code = GET_CODE (operands[1]);
11516 rtx tmp, compare_op, second_test, bypass_test;
11517
11518 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
11519 {
11520 enum machine_mode cmode;
11521
11522 /* Since we've no cmove for sse registers, don't force bad register
11523 allocation just to gain access to it. Deny movcc when the
11524 comparison mode doesn't match the move mode. */
11525 cmode = GET_MODE (ix86_compare_op0);
11526 if (cmode == VOIDmode)
11527 cmode = GET_MODE (ix86_compare_op1);
11528 if (cmode != mode)
11529 return 0;
11530
11531 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11532 &ix86_compare_op0,
11533 &ix86_compare_op1);
11534 if (code == UNKNOWN)
11535 return 0;
11536
11537 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
11538 ix86_compare_op1, operands[2],
11539 operands[3]))
11540 return 1;
11541
11542 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
11543 ix86_compare_op1, operands[2], operands[3]);
11544 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
11545 return 1;
11546 }
11547
11548 /* The floating point conditional move instructions don't directly
11549 support conditions resulting from a signed integer comparison. */
11550
11551 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11552
11553 /* The floating point conditional move instructions don't directly
11554 support signed integer comparisons. */
11555
11556 if (!fcmov_comparison_operator (compare_op, VOIDmode))
11557 {
11558 gcc_assert (!second_test && !bypass_test);
11559 tmp = gen_reg_rtx (QImode);
11560 ix86_expand_setcc (code, tmp);
11561 code = NE;
11562 ix86_compare_op0 = tmp;
11563 ix86_compare_op1 = const0_rtx;
11564 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11565 }
11566 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11567 {
11568 tmp = gen_reg_rtx (mode);
11569 emit_move_insn (tmp, operands[3]);
11570 operands[3] = tmp;
11571 }
11572 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11573 {
11574 tmp = gen_reg_rtx (mode);
11575 emit_move_insn (tmp, operands[2]);
11576 operands[2] = tmp;
11577 }
11578
11579 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11580 gen_rtx_IF_THEN_ELSE (mode, compare_op,
11581 operands[2], operands[3])));
11582 if (bypass_test)
11583 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11584 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
11585 operands[3], operands[0])));
11586 if (second_test)
11587 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11588 gen_rtx_IF_THEN_ELSE (mode, second_test,
11589 operands[2], operands[0])));
11590
11591 return 1;
11592 }
11593
11594 /* Expand a floating-point vector conditional move; a vcond operation
11595 rather than a movcc operation. */
11596
11597 bool
11598 ix86_expand_fp_vcond (rtx operands[])
11599 {
11600 enum rtx_code code = GET_CODE (operands[3]);
11601 rtx cmp;
11602
11603 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11604 &operands[4], &operands[5]);
11605 if (code == UNKNOWN)
11606 return false;
11607
11608 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
11609 operands[5], operands[1], operands[2]))
11610 return true;
11611
11612 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
11613 operands[1], operands[2]);
11614 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
11615 return true;
11616 }
11617
11618 /* Expand a signed integral vector conditional move. */
11619
11620 bool
11621 ix86_expand_int_vcond (rtx operands[])
11622 {
11623 enum machine_mode mode = GET_MODE (operands[0]);
11624 enum rtx_code code = GET_CODE (operands[3]);
11625 bool negate = false;
11626 rtx x, cop0, cop1;
11627
11628 cop0 = operands[4];
11629 cop1 = operands[5];
11630
11631 /* Canonicalize the comparison to EQ, GT, GTU. */
11632 switch (code)
11633 {
11634 case EQ:
11635 case GT:
11636 case GTU:
11637 break;
11638
11639 case NE:
11640 case LE:
11641 case LEU:
11642 code = reverse_condition (code);
11643 negate = true;
11644 break;
11645
11646 case GE:
11647 case GEU:
11648 code = reverse_condition (code);
11649 negate = true;
11650 /* FALLTHRU */
11651
11652 case LT:
11653 case LTU:
11654 code = swap_condition (code);
11655 x = cop0, cop0 = cop1, cop1 = x;
11656 break;
11657
11658 default:
11659 gcc_unreachable ();
11660 }
11661
11662 /* Unsigned parallel compare is not supported by the hardware. Play some
11663 tricks to turn this into a signed comparison against 0. */
11664 if (code == GTU)
11665 {
11666 cop0 = force_reg (mode, cop0);
11667
11668 switch (mode)
11669 {
11670 case V4SImode:
11671 {
11672 rtx t1, t2, mask;
11673
11674 /* Perform a parallel modulo subtraction. */
11675 t1 = gen_reg_rtx (mode);
11676 emit_insn (gen_subv4si3 (t1, cop0, cop1));
11677
11678 /* Extract the original sign bit of op0. */
11679 mask = GEN_INT (-0x80000000);
11680 mask = gen_rtx_CONST_VECTOR (mode,
11681 gen_rtvec (4, mask, mask, mask, mask));
11682 mask = force_reg (mode, mask);
11683 t2 = gen_reg_rtx (mode);
11684 emit_insn (gen_andv4si3 (t2, cop0, mask));
11685
11686 /* XOR it back into the result of the subtraction. This results
11687 in the sign bit set iff we saw unsigned underflow. */
11688 x = gen_reg_rtx (mode);
11689 emit_insn (gen_xorv4si3 (x, t1, t2));
11690
11691 code = GT;
11692 }
11693 break;
11694
11695 case V16QImode:
11696 case V8HImode:
11697 /* Perform a parallel unsigned saturating subtraction. */
11698 x = gen_reg_rtx (mode);
11699 emit_insn (gen_rtx_SET (VOIDmode, x,
11700 gen_rtx_US_MINUS (mode, cop0, cop1)));
11701
11702 code = EQ;
11703 negate = !negate;
11704 break;
11705
11706 default:
11707 gcc_unreachable ();
11708 }
11709
11710 cop0 = x;
11711 cop1 = CONST0_RTX (mode);
11712 }
11713
11714 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
11715 operands[1+negate], operands[2-negate]);
11716
11717 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
11718 operands[2-negate]);
11719 return true;
11720 }
11721
11722 /* Expand conditional increment or decrement using adb/sbb instructions.
11723 The default case using setcc followed by the conditional move can be
11724 done by generic code. */
11725 int
11726 ix86_expand_int_addcc (rtx operands[])
11727 {
11728 enum rtx_code code = GET_CODE (operands[1]);
11729 rtx compare_op;
11730 rtx val = const0_rtx;
11731 bool fpcmp = false;
11732 enum machine_mode mode = GET_MODE (operands[0]);
11733
11734 if (operands[3] != const1_rtx
11735 && operands[3] != constm1_rtx)
11736 return 0;
11737 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11738 ix86_compare_op1, &compare_op))
11739 return 0;
11740 code = GET_CODE (compare_op);
11741
11742 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11743 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11744 {
11745 fpcmp = true;
11746 code = ix86_fp_compare_code_to_integer (code);
11747 }
11748
11749 if (code != LTU)
11750 {
11751 val = constm1_rtx;
11752 if (fpcmp)
11753 PUT_CODE (compare_op,
11754 reverse_condition_maybe_unordered
11755 (GET_CODE (compare_op)));
11756 else
11757 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11758 }
11759 PUT_MODE (compare_op, mode);
11760
11761 /* Construct either adc or sbb insn. */
11762 if ((code == LTU) == (operands[3] == constm1_rtx))
11763 {
11764 switch (GET_MODE (operands[0]))
11765 {
11766 case QImode:
11767 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
11768 break;
11769 case HImode:
11770 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
11771 break;
11772 case SImode:
11773 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
11774 break;
11775 case DImode:
11776 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11777 break;
11778 default:
11779 gcc_unreachable ();
11780 }
11781 }
11782 else
11783 {
11784 switch (GET_MODE (operands[0]))
11785 {
11786 case QImode:
11787 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
11788 break;
11789 case HImode:
11790 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
11791 break;
11792 case SImode:
11793 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
11794 break;
11795 case DImode:
11796 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11797 break;
11798 default:
11799 gcc_unreachable ();
11800 }
11801 }
11802 return 1; /* DONE */
11803 }
11804
11805
11806 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
11807 works for floating pointer parameters and nonoffsetable memories.
11808 For pushes, it returns just stack offsets; the values will be saved
11809 in the right order. Maximally three parts are generated. */
11810
11811 static int
11812 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
11813 {
11814 int size;
11815
11816 if (!TARGET_64BIT)
11817 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
11818 else
11819 size = (GET_MODE_SIZE (mode) + 4) / 8;
11820
11821 gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
11822 gcc_assert (size >= 2 && size <= 3);
11823
11824 /* Optimize constant pool reference to immediates. This is used by fp
11825 moves, that force all constants to memory to allow combining. */
11826 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
11827 {
11828 rtx tmp = maybe_get_pool_constant (operand);
11829 if (tmp)
11830 operand = tmp;
11831 }
11832
11833 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
11834 {
11835 /* The only non-offsetable memories we handle are pushes. */
11836 int ok = push_operand (operand, VOIDmode);
11837
11838 gcc_assert (ok);
11839
11840 operand = copy_rtx (operand);
11841 PUT_MODE (operand, Pmode);
11842 parts[0] = parts[1] = parts[2] = operand;
11843 return size;
11844 }
11845
11846 if (GET_CODE (operand) == CONST_VECTOR)
11847 {
11848 enum machine_mode imode = int_mode_for_mode (mode);
11849 /* Caution: if we looked through a constant pool memory above,
11850 the operand may actually have a different mode now. That's
11851 ok, since we want to pun this all the way back to an integer. */
11852 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
11853 gcc_assert (operand != NULL);
11854 mode = imode;
11855 }
11856
11857 if (!TARGET_64BIT)
11858 {
11859 if (mode == DImode)
11860 split_di (&operand, 1, &parts[0], &parts[1]);
11861 else
11862 {
11863 if (REG_P (operand))
11864 {
11865 gcc_assert (reload_completed);
11866 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
11867 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
11868 if (size == 3)
11869 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
11870 }
11871 else if (offsettable_memref_p (operand))
11872 {
11873 operand = adjust_address (operand, SImode, 0);
11874 parts[0] = operand;
11875 parts[1] = adjust_address (operand, SImode, 4);
11876 if (size == 3)
11877 parts[2] = adjust_address (operand, SImode, 8);
11878 }
11879 else if (GET_CODE (operand) == CONST_DOUBLE)
11880 {
11881 REAL_VALUE_TYPE r;
11882 long l[4];
11883
11884 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11885 switch (mode)
11886 {
11887 case XFmode:
11888 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
11889 parts[2] = gen_int_mode (l[2], SImode);
11890 break;
11891 case DFmode:
11892 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
11893 break;
11894 default:
11895 gcc_unreachable ();
11896 }
11897 parts[1] = gen_int_mode (l[1], SImode);
11898 parts[0] = gen_int_mode (l[0], SImode);
11899 }
11900 else
11901 gcc_unreachable ();
11902 }
11903 }
11904 else
11905 {
11906 if (mode == TImode)
11907 split_ti (&operand, 1, &parts[0], &parts[1]);
11908 if (mode == XFmode || mode == TFmode)
11909 {
11910 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
11911 if (REG_P (operand))
11912 {
11913 gcc_assert (reload_completed);
11914 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
11915 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
11916 }
11917 else if (offsettable_memref_p (operand))
11918 {
11919 operand = adjust_address (operand, DImode, 0);
11920 parts[0] = operand;
11921 parts[1] = adjust_address (operand, upper_mode, 8);
11922 }
11923 else if (GET_CODE (operand) == CONST_DOUBLE)
11924 {
11925 REAL_VALUE_TYPE r;
11926 long l[4];
11927
11928 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11929 real_to_target (l, &r, mode);
11930
11931 /* Do not use shift by 32 to avoid warning on 32bit systems. */
11932 if (HOST_BITS_PER_WIDE_INT >= 64)
11933 parts[0]
11934 = gen_int_mode
11935 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
11936 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
11937 DImode);
11938 else
11939 parts[0] = immed_double_const (l[0], l[1], DImode);
11940
11941 if (upper_mode == SImode)
11942 parts[1] = gen_int_mode (l[2], SImode);
11943 else if (HOST_BITS_PER_WIDE_INT >= 64)
11944 parts[1]
11945 = gen_int_mode
11946 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
11947 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
11948 DImode);
11949 else
11950 parts[1] = immed_double_const (l[2], l[3], DImode);
11951 }
11952 else
11953 gcc_unreachable ();
11954 }
11955 }
11956
11957 return size;
11958 }
11959
11960 /* Emit insns to perform a move or push of DI, DF, and XF values.
11961 Return false when normal moves are needed; true when all required
11962 insns have been emitted. Operands 2-4 contain the input values
11963 int the correct order; operands 5-7 contain the output values. */
11964
11965 void
11966 ix86_split_long_move (rtx operands[])
11967 {
11968 rtx part[2][3];
11969 int nparts;
11970 int push = 0;
11971 int collisions = 0;
11972 enum machine_mode mode = GET_MODE (operands[0]);
11973
11974 /* The DFmode expanders may ask us to move double.
11975 For 64bit target this is single move. By hiding the fact
11976 here we simplify i386.md splitters. */
11977 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
11978 {
11979 /* Optimize constant pool reference to immediates. This is used by
11980 fp moves, that force all constants to memory to allow combining. */
11981
11982 if (GET_CODE (operands[1]) == MEM
11983 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
11984 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
11985 operands[1] = get_pool_constant (XEXP (operands[1], 0));
11986 if (push_operand (operands[0], VOIDmode))
11987 {
11988 operands[0] = copy_rtx (operands[0]);
11989 PUT_MODE (operands[0], Pmode);
11990 }
11991 else
11992 operands[0] = gen_lowpart (DImode, operands[0]);
11993 operands[1] = gen_lowpart (DImode, operands[1]);
11994 emit_move_insn (operands[0], operands[1]);
11995 return;
11996 }
11997
11998 /* The only non-offsettable memory we handle is push. */
11999 if (push_operand (operands[0], VOIDmode))
12000 push = 1;
12001 else
12002 gcc_assert (GET_CODE (operands[0]) != MEM
12003 || offsettable_memref_p (operands[0]));
12004
12005 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
12006 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
12007
12008 /* When emitting push, take care for source operands on the stack. */
12009 if (push && GET_CODE (operands[1]) == MEM
12010 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
12011 {
12012 if (nparts == 3)
12013 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
12014 XEXP (part[1][2], 0));
12015 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
12016 XEXP (part[1][1], 0));
12017 }
12018
12019 /* We need to do copy in the right order in case an address register
12020 of the source overlaps the destination. */
12021 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
12022 {
12023 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
12024 collisions++;
12025 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12026 collisions++;
12027 if (nparts == 3
12028 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
12029 collisions++;
12030
12031 /* Collision in the middle part can be handled by reordering. */
12032 if (collisions == 1 && nparts == 3
12033 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12034 {
12035 rtx tmp;
12036 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
12037 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
12038 }
12039
12040 /* If there are more collisions, we can't handle it by reordering.
12041 Do an lea to the last part and use only one colliding move. */
12042 else if (collisions > 1)
12043 {
12044 rtx base;
12045
12046 collisions = 1;
12047
12048 base = part[0][nparts - 1];
12049
12050 /* Handle the case when the last part isn't valid for lea.
12051 Happens in 64-bit mode storing the 12-byte XFmode. */
12052 if (GET_MODE (base) != Pmode)
12053 base = gen_rtx_REG (Pmode, REGNO (base));
12054
12055 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
12056 part[1][0] = replace_equiv_address (part[1][0], base);
12057 part[1][1] = replace_equiv_address (part[1][1],
12058 plus_constant (base, UNITS_PER_WORD));
12059 if (nparts == 3)
12060 part[1][2] = replace_equiv_address (part[1][2],
12061 plus_constant (base, 8));
12062 }
12063 }
12064
12065 if (push)
12066 {
12067 if (!TARGET_64BIT)
12068 {
12069 if (nparts == 3)
12070 {
12071 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
12072 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
12073 emit_move_insn (part[0][2], part[1][2]);
12074 }
12075 }
12076 else
12077 {
12078 /* In 64bit mode we don't have 32bit push available. In case this is
12079 register, it is OK - we will just use larger counterpart. We also
12080 retype memory - these comes from attempt to avoid REX prefix on
12081 moving of second half of TFmode value. */
12082 if (GET_MODE (part[1][1]) == SImode)
12083 {
12084 switch (GET_CODE (part[1][1]))
12085 {
12086 case MEM:
12087 part[1][1] = adjust_address (part[1][1], DImode, 0);
12088 break;
12089
12090 case REG:
12091 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
12092 break;
12093
12094 default:
12095 gcc_unreachable ();
12096 }
12097
12098 if (GET_MODE (part[1][0]) == SImode)
12099 part[1][0] = part[1][1];
12100 }
12101 }
12102 emit_move_insn (part[0][1], part[1][1]);
12103 emit_move_insn (part[0][0], part[1][0]);
12104 return;
12105 }
12106
12107 /* Choose correct order to not overwrite the source before it is copied. */
12108 if ((REG_P (part[0][0])
12109 && REG_P (part[1][1])
12110 && (REGNO (part[0][0]) == REGNO (part[1][1])
12111 || (nparts == 3
12112 && REGNO (part[0][0]) == REGNO (part[1][2]))))
12113 || (collisions > 0
12114 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
12115 {
12116 if (nparts == 3)
12117 {
12118 operands[2] = part[0][2];
12119 operands[3] = part[0][1];
12120 operands[4] = part[0][0];
12121 operands[5] = part[1][2];
12122 operands[6] = part[1][1];
12123 operands[7] = part[1][0];
12124 }
12125 else
12126 {
12127 operands[2] = part[0][1];
12128 operands[3] = part[0][0];
12129 operands[5] = part[1][1];
12130 operands[6] = part[1][0];
12131 }
12132 }
12133 else
12134 {
12135 if (nparts == 3)
12136 {
12137 operands[2] = part[0][0];
12138 operands[3] = part[0][1];
12139 operands[4] = part[0][2];
12140 operands[5] = part[1][0];
12141 operands[6] = part[1][1];
12142 operands[7] = part[1][2];
12143 }
12144 else
12145 {
12146 operands[2] = part[0][0];
12147 operands[3] = part[0][1];
12148 operands[5] = part[1][0];
12149 operands[6] = part[1][1];
12150 }
12151 }
12152
12153 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12154 if (optimize_size)
12155 {
12156 if (GET_CODE (operands[5]) == CONST_INT
12157 && operands[5] != const0_rtx
12158 && REG_P (operands[2]))
12159 {
12160 if (GET_CODE (operands[6]) == CONST_INT
12161 && INTVAL (operands[6]) == INTVAL (operands[5]))
12162 operands[6] = operands[2];
12163
12164 if (nparts == 3
12165 && GET_CODE (operands[7]) == CONST_INT
12166 && INTVAL (operands[7]) == INTVAL (operands[5]))
12167 operands[7] = operands[2];
12168 }
12169
12170 if (nparts == 3
12171 && GET_CODE (operands[6]) == CONST_INT
12172 && operands[6] != const0_rtx
12173 && REG_P (operands[3])
12174 && GET_CODE (operands[7]) == CONST_INT
12175 && INTVAL (operands[7]) == INTVAL (operands[6]))
12176 operands[7] = operands[3];
12177 }
12178
12179 emit_move_insn (operands[2], operands[5]);
12180 emit_move_insn (operands[3], operands[6]);
12181 if (nparts == 3)
12182 emit_move_insn (operands[4], operands[7]);
12183
12184 return;
12185 }
12186
12187 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
12188 left shift by a constant, either using a single shift or
12189 a sequence of add instructions. */
12190
12191 static void
12192 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
12193 {
12194 if (count == 1)
12195 {
12196 emit_insn ((mode == DImode
12197 ? gen_addsi3
12198 : gen_adddi3) (operand, operand, operand));
12199 }
12200 else if (!optimize_size
12201 && count * ix86_cost->add <= ix86_cost->shift_const)
12202 {
12203 int i;
12204 for (i=0; i<count; i++)
12205 {
12206 emit_insn ((mode == DImode
12207 ? gen_addsi3
12208 : gen_adddi3) (operand, operand, operand));
12209 }
12210 }
12211 else
12212 emit_insn ((mode == DImode
12213 ? gen_ashlsi3
12214 : gen_ashldi3) (operand, operand, GEN_INT (count)));
12215 }
12216
12217 void
12218 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
12219 {
12220 rtx low[2], high[2];
12221 int count;
12222 const int single_width = mode == DImode ? 32 : 64;
12223
12224 if (GET_CODE (operands[2]) == CONST_INT)
12225 {
12226 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12227 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12228
12229 if (count >= single_width)
12230 {
12231 emit_move_insn (high[0], low[1]);
12232 emit_move_insn (low[0], const0_rtx);
12233
12234 if (count > single_width)
12235 ix86_expand_ashl_const (high[0], count - single_width, mode);
12236 }
12237 else
12238 {
12239 if (!rtx_equal_p (operands[0], operands[1]))
12240 emit_move_insn (operands[0], operands[1]);
12241 emit_insn ((mode == DImode
12242 ? gen_x86_shld_1
12243 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
12244 ix86_expand_ashl_const (low[0], count, mode);
12245 }
12246 return;
12247 }
12248
12249 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12250
12251 if (operands[1] == const1_rtx)
12252 {
12253 /* Assuming we've chosen a QImode capable registers, then 1 << N
12254 can be done with two 32/64-bit shifts, no branches, no cmoves. */
12255 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
12256 {
12257 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
12258
12259 ix86_expand_clear (low[0]);
12260 ix86_expand_clear (high[0]);
12261 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
12262
12263 d = gen_lowpart (QImode, low[0]);
12264 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12265 s = gen_rtx_EQ (QImode, flags, const0_rtx);
12266 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12267
12268 d = gen_lowpart (QImode, high[0]);
12269 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12270 s = gen_rtx_NE (QImode, flags, const0_rtx);
12271 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12272 }
12273
12274 /* Otherwise, we can get the same results by manually performing
12275 a bit extract operation on bit 5/6, and then performing the two
12276 shifts. The two methods of getting 0/1 into low/high are exactly
12277 the same size. Avoiding the shift in the bit extract case helps
12278 pentium4 a bit; no one else seems to care much either way. */
12279 else
12280 {
12281 rtx x;
12282
12283 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
12284 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
12285 else
12286 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
12287 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
12288
12289 emit_insn ((mode == DImode
12290 ? gen_lshrsi3
12291 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
12292 emit_insn ((mode == DImode
12293 ? gen_andsi3
12294 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
12295 emit_move_insn (low[0], high[0]);
12296 emit_insn ((mode == DImode
12297 ? gen_xorsi3
12298 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
12299 }
12300
12301 emit_insn ((mode == DImode
12302 ? gen_ashlsi3
12303 : gen_ashldi3) (low[0], low[0], operands[2]));
12304 emit_insn ((mode == DImode
12305 ? gen_ashlsi3
12306 : gen_ashldi3) (high[0], high[0], operands[2]));
12307 return;
12308 }
12309
12310 if (operands[1] == constm1_rtx)
12311 {
12312 /* For -1 << N, we can avoid the shld instruction, because we
12313 know that we're shifting 0...31/63 ones into a -1. */
12314 emit_move_insn (low[0], constm1_rtx);
12315 if (optimize_size)
12316 emit_move_insn (high[0], low[0]);
12317 else
12318 emit_move_insn (high[0], constm1_rtx);
12319 }
12320 else
12321 {
12322 if (!rtx_equal_p (operands[0], operands[1]))
12323 emit_move_insn (operands[0], operands[1]);
12324
12325 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12326 emit_insn ((mode == DImode
12327 ? gen_x86_shld_1
12328 : gen_x86_64_shld) (high[0], low[0], operands[2]));
12329 }
12330
12331 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
12332
12333 if (TARGET_CMOVE && scratch)
12334 {
12335 ix86_expand_clear (scratch);
12336 emit_insn ((mode == DImode
12337 ? gen_x86_shift_adj_1
12338 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
12339 }
12340 else
12341 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
12342 }
12343
12344 void
12345 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
12346 {
12347 rtx low[2], high[2];
12348 int count;
12349 const int single_width = mode == DImode ? 32 : 64;
12350
12351 if (GET_CODE (operands[2]) == CONST_INT)
12352 {
12353 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12354 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12355
12356 if (count == single_width * 2 - 1)
12357 {
12358 emit_move_insn (high[0], high[1]);
12359 emit_insn ((mode == DImode
12360 ? gen_ashrsi3
12361 : gen_ashrdi3) (high[0], high[0],
12362 GEN_INT (single_width - 1)));
12363 emit_move_insn (low[0], high[0]);
12364
12365 }
12366 else if (count >= single_width)
12367 {
12368 emit_move_insn (low[0], high[1]);
12369 emit_move_insn (high[0], low[0]);
12370 emit_insn ((mode == DImode
12371 ? gen_ashrsi3
12372 : gen_ashrdi3) (high[0], high[0],
12373 GEN_INT (single_width - 1)));
12374 if (count > single_width)
12375 emit_insn ((mode == DImode
12376 ? gen_ashrsi3
12377 : gen_ashrdi3) (low[0], low[0],
12378 GEN_INT (count - single_width)));
12379 }
12380 else
12381 {
12382 if (!rtx_equal_p (operands[0], operands[1]))
12383 emit_move_insn (operands[0], operands[1]);
12384 emit_insn ((mode == DImode
12385 ? gen_x86_shrd_1
12386 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12387 emit_insn ((mode == DImode
12388 ? gen_ashrsi3
12389 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
12390 }
12391 }
12392 else
12393 {
12394 if (!rtx_equal_p (operands[0], operands[1]))
12395 emit_move_insn (operands[0], operands[1]);
12396
12397 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12398
12399 emit_insn ((mode == DImode
12400 ? gen_x86_shrd_1
12401 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12402 emit_insn ((mode == DImode
12403 ? gen_ashrsi3
12404 : gen_ashrdi3) (high[0], high[0], operands[2]));
12405
12406 if (TARGET_CMOVE && scratch)
12407 {
12408 emit_move_insn (scratch, high[0]);
12409 emit_insn ((mode == DImode
12410 ? gen_ashrsi3
12411 : gen_ashrdi3) (scratch, scratch,
12412 GEN_INT (single_width - 1)));
12413 emit_insn ((mode == DImode
12414 ? gen_x86_shift_adj_1
12415 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12416 scratch));
12417 }
12418 else
12419 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
12420 }
12421 }
12422
12423 void
12424 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
12425 {
12426 rtx low[2], high[2];
12427 int count;
12428 const int single_width = mode == DImode ? 32 : 64;
12429
12430 if (GET_CODE (operands[2]) == CONST_INT)
12431 {
12432 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12433 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12434
12435 if (count >= single_width)
12436 {
12437 emit_move_insn (low[0], high[1]);
12438 ix86_expand_clear (high[0]);
12439
12440 if (count > single_width)
12441 emit_insn ((mode == DImode
12442 ? gen_lshrsi3
12443 : gen_lshrdi3) (low[0], low[0],
12444 GEN_INT (count - single_width)));
12445 }
12446 else
12447 {
12448 if (!rtx_equal_p (operands[0], operands[1]))
12449 emit_move_insn (operands[0], operands[1]);
12450 emit_insn ((mode == DImode
12451 ? gen_x86_shrd_1
12452 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12453 emit_insn ((mode == DImode
12454 ? gen_lshrsi3
12455 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
12456 }
12457 }
12458 else
12459 {
12460 if (!rtx_equal_p (operands[0], operands[1]))
12461 emit_move_insn (operands[0], operands[1]);
12462
12463 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12464
12465 emit_insn ((mode == DImode
12466 ? gen_x86_shrd_1
12467 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12468 emit_insn ((mode == DImode
12469 ? gen_lshrsi3
12470 : gen_lshrdi3) (high[0], high[0], operands[2]));
12471
12472 /* Heh. By reversing the arguments, we can reuse this pattern. */
12473 if (TARGET_CMOVE && scratch)
12474 {
12475 ix86_expand_clear (scratch);
12476 emit_insn ((mode == DImode
12477 ? gen_x86_shift_adj_1
12478 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12479 scratch));
12480 }
12481 else
12482 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
12483 }
12484 }
12485
12486 /* Helper function for the string operations below. Dest VARIABLE whether
12487 it is aligned to VALUE bytes. If true, jump to the label. */
12488 static rtx
12489 ix86_expand_aligntest (rtx variable, int value)
12490 {
12491 rtx label = gen_label_rtx ();
12492 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
12493 if (GET_MODE (variable) == DImode)
12494 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
12495 else
12496 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
12497 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
12498 1, label);
12499 return label;
12500 }
12501
12502 /* Adjust COUNTER by the VALUE. */
12503 static void
12504 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
12505 {
12506 if (GET_MODE (countreg) == DImode)
12507 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
12508 else
12509 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
12510 }
12511
12512 /* Zero extend possibly SImode EXP to Pmode register. */
12513 rtx
12514 ix86_zero_extend_to_Pmode (rtx exp)
12515 {
12516 rtx r;
12517 if (GET_MODE (exp) == VOIDmode)
12518 return force_reg (Pmode, exp);
12519 if (GET_MODE (exp) == Pmode)
12520 return copy_to_mode_reg (Pmode, exp);
12521 r = gen_reg_rtx (Pmode);
12522 emit_insn (gen_zero_extendsidi2 (r, exp));
12523 return r;
12524 }
12525
12526 /* Expand string move (memcpy) operation. Use i386 string operations when
12527 profitable. expand_clrmem contains similar code. */
12528 int
12529 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
12530 {
12531 rtx srcreg, destreg, countreg, srcexp, destexp;
12532 enum machine_mode counter_mode;
12533 HOST_WIDE_INT align = 0;
12534 unsigned HOST_WIDE_INT count = 0;
12535
12536 if (GET_CODE (align_exp) == CONST_INT)
12537 align = INTVAL (align_exp);
12538
12539 /* Can't use any of this if the user has appropriated esi or edi. */
12540 if (global_regs[4] || global_regs[5])
12541 return 0;
12542
12543 /* This simple hack avoids all inlining code and simplifies code below. */
12544 if (!TARGET_ALIGN_STRINGOPS)
12545 align = 64;
12546
12547 if (GET_CODE (count_exp) == CONST_INT)
12548 {
12549 count = INTVAL (count_exp);
12550 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12551 return 0;
12552 }
12553
12554 /* Figure out proper mode for counter. For 32bits it is always SImode,
12555 for 64bits use SImode when possible, otherwise DImode.
12556 Set count to number of bytes copied when known at compile time. */
12557 if (!TARGET_64BIT
12558 || GET_MODE (count_exp) == SImode
12559 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12560 counter_mode = SImode;
12561 else
12562 counter_mode = DImode;
12563
12564 gcc_assert (counter_mode == SImode || counter_mode == DImode);
12565
12566 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12567 if (destreg != XEXP (dst, 0))
12568 dst = replace_equiv_address_nv (dst, destreg);
12569 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
12570 if (srcreg != XEXP (src, 0))
12571 src = replace_equiv_address_nv (src, srcreg);
12572
12573 /* When optimizing for size emit simple rep ; movsb instruction for
12574 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
12575 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
12576 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
12577 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
12578 but we don't know whether upper 24 (resp. 56) bits of %ecx will be
12579 known to be zero or not. The rep; movsb sequence causes higher
12580 register pressure though, so take that into account. */
12581
12582 if ((!optimize || optimize_size)
12583 && (count == 0
12584 || ((count & 0x03)
12585 && (!optimize_size
12586 || count > 5 * 4
12587 || (count & 3) + count / 4 > 6))))
12588 {
12589 emit_insn (gen_cld ());
12590 countreg = ix86_zero_extend_to_Pmode (count_exp);
12591 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12592 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
12593 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
12594 destexp, srcexp));
12595 }
12596
12597 /* For constant aligned (or small unaligned) copies use rep movsl
12598 followed by code copying the rest. For PentiumPro ensure 8 byte
12599 alignment to allow rep movsl acceleration. */
12600
12601 else if (count != 0
12602 && (align >= 8
12603 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12604 || optimize_size || count < (unsigned int) 64))
12605 {
12606 unsigned HOST_WIDE_INT offset = 0;
12607 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12608 rtx srcmem, dstmem;
12609
12610 emit_insn (gen_cld ());
12611 if (count & ~(size - 1))
12612 {
12613 if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
12614 {
12615 enum machine_mode movs_mode = size == 4 ? SImode : DImode;
12616
12617 while (offset < (count & ~(size - 1)))
12618 {
12619 srcmem = adjust_automodify_address_nv (src, movs_mode,
12620 srcreg, offset);
12621 dstmem = adjust_automodify_address_nv (dst, movs_mode,
12622 destreg, offset);
12623 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12624 offset += size;
12625 }
12626 }
12627 else
12628 {
12629 countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
12630 & (TARGET_64BIT ? -1 : 0x3fffffff));
12631 countreg = copy_to_mode_reg (counter_mode, countreg);
12632 countreg = ix86_zero_extend_to_Pmode (countreg);
12633
12634 destexp = gen_rtx_ASHIFT (Pmode, countreg,
12635 GEN_INT (size == 4 ? 2 : 3));
12636 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12637 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12638
12639 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12640 countreg, destexp, srcexp));
12641 offset = count & ~(size - 1);
12642 }
12643 }
12644 if (size == 8 && (count & 0x04))
12645 {
12646 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
12647 offset);
12648 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
12649 offset);
12650 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12651 offset += 4;
12652 }
12653 if (count & 0x02)
12654 {
12655 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
12656 offset);
12657 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
12658 offset);
12659 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12660 offset += 2;
12661 }
12662 if (count & 0x01)
12663 {
12664 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
12665 offset);
12666 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
12667 offset);
12668 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12669 }
12670 }
12671 /* The generic code based on the glibc implementation:
12672 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
12673 allowing accelerated copying there)
12674 - copy the data using rep movsl
12675 - copy the rest. */
12676 else
12677 {
12678 rtx countreg2;
12679 rtx label = NULL;
12680 rtx srcmem, dstmem;
12681 int desired_alignment = (TARGET_PENTIUMPRO
12682 && (count == 0 || count >= (unsigned int) 260)
12683 ? 8 : UNITS_PER_WORD);
12684 /* Get rid of MEM_OFFSETs, they won't be accurate. */
12685 dst = change_address (dst, BLKmode, destreg);
12686 src = change_address (src, BLKmode, srcreg);
12687
12688 /* In case we don't know anything about the alignment, default to
12689 library version, since it is usually equally fast and result in
12690 shorter code.
12691
12692 Also emit call when we know that the count is large and call overhead
12693 will not be important. */
12694 if (!TARGET_INLINE_ALL_STRINGOPS
12695 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12696 return 0;
12697
12698 if (TARGET_SINGLE_STRINGOP)
12699 emit_insn (gen_cld ());
12700
12701 countreg2 = gen_reg_rtx (Pmode);
12702 countreg = copy_to_mode_reg (counter_mode, count_exp);
12703
12704 /* We don't use loops to align destination and to copy parts smaller
12705 than 4 bytes, because gcc is able to optimize such code better (in
12706 the case the destination or the count really is aligned, gcc is often
12707 able to predict the branches) and also it is friendlier to the
12708 hardware branch prediction.
12709
12710 Using loops is beneficial for generic case, because we can
12711 handle small counts using the loops. Many CPUs (such as Athlon)
12712 have large REP prefix setup costs.
12713
12714 This is quite costly. Maybe we can revisit this decision later or
12715 add some customizability to this code. */
12716
12717 if (count == 0 && align < desired_alignment)
12718 {
12719 label = gen_label_rtx ();
12720 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12721 LEU, 0, counter_mode, 1, label);
12722 }
12723 if (align <= 1)
12724 {
12725 rtx label = ix86_expand_aligntest (destreg, 1);
12726 srcmem = change_address (src, QImode, srcreg);
12727 dstmem = change_address (dst, QImode, destreg);
12728 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12729 ix86_adjust_counter (countreg, 1);
12730 emit_label (label);
12731 LABEL_NUSES (label) = 1;
12732 }
12733 if (align <= 2)
12734 {
12735 rtx label = ix86_expand_aligntest (destreg, 2);
12736 srcmem = change_address (src, HImode, srcreg);
12737 dstmem = change_address (dst, HImode, destreg);
12738 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12739 ix86_adjust_counter (countreg, 2);
12740 emit_label (label);
12741 LABEL_NUSES (label) = 1;
12742 }
12743 if (align <= 4 && desired_alignment > 4)
12744 {
12745 rtx label = ix86_expand_aligntest (destreg, 4);
12746 srcmem = change_address (src, SImode, srcreg);
12747 dstmem = change_address (dst, SImode, destreg);
12748 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12749 ix86_adjust_counter (countreg, 4);
12750 emit_label (label);
12751 LABEL_NUSES (label) = 1;
12752 }
12753
12754 if (label && desired_alignment > 4 && !TARGET_64BIT)
12755 {
12756 emit_label (label);
12757 LABEL_NUSES (label) = 1;
12758 label = NULL_RTX;
12759 }
12760 if (!TARGET_SINGLE_STRINGOP)
12761 emit_insn (gen_cld ());
12762 if (TARGET_64BIT)
12763 {
12764 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
12765 GEN_INT (3)));
12766 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
12767 }
12768 else
12769 {
12770 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
12771 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
12772 }
12773 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12774 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12775 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12776 countreg2, destexp, srcexp));
12777
12778 if (label)
12779 {
12780 emit_label (label);
12781 LABEL_NUSES (label) = 1;
12782 }
12783 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
12784 {
12785 srcmem = change_address (src, SImode, srcreg);
12786 dstmem = change_address (dst, SImode, destreg);
12787 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12788 }
12789 if ((align <= 4 || count == 0) && TARGET_64BIT)
12790 {
12791 rtx label = ix86_expand_aligntest (countreg, 4);
12792 srcmem = change_address (src, SImode, srcreg);
12793 dstmem = change_address (dst, SImode, destreg);
12794 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12795 emit_label (label);
12796 LABEL_NUSES (label) = 1;
12797 }
12798 if (align > 2 && count != 0 && (count & 2))
12799 {
12800 srcmem = change_address (src, HImode, srcreg);
12801 dstmem = change_address (dst, HImode, destreg);
12802 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12803 }
12804 if (align <= 2 || count == 0)
12805 {
12806 rtx label = ix86_expand_aligntest (countreg, 2);
12807 srcmem = change_address (src, HImode, srcreg);
12808 dstmem = change_address (dst, HImode, destreg);
12809 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12810 emit_label (label);
12811 LABEL_NUSES (label) = 1;
12812 }
12813 if (align > 1 && count != 0 && (count & 1))
12814 {
12815 srcmem = change_address (src, QImode, srcreg);
12816 dstmem = change_address (dst, QImode, destreg);
12817 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12818 }
12819 if (align <= 1 || count == 0)
12820 {
12821 rtx label = ix86_expand_aligntest (countreg, 1);
12822 srcmem = change_address (src, QImode, srcreg);
12823 dstmem = change_address (dst, QImode, destreg);
12824 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12825 emit_label (label);
12826 LABEL_NUSES (label) = 1;
12827 }
12828 }
12829
12830 return 1;
12831 }
12832
12833 /* Expand string clear operation (bzero). Use i386 string operations when
12834 profitable. expand_movmem contains similar code. */
12835 int
12836 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
12837 {
12838 rtx destreg, zeroreg, countreg, destexp;
12839 enum machine_mode counter_mode;
12840 HOST_WIDE_INT align = 0;
12841 unsigned HOST_WIDE_INT count = 0;
12842
12843 if (GET_CODE (align_exp) == CONST_INT)
12844 align = INTVAL (align_exp);
12845
12846 /* Can't use any of this if the user has appropriated esi. */
12847 if (global_regs[4])
12848 return 0;
12849
12850 /* This simple hack avoids all inlining code and simplifies code below. */
12851 if (!TARGET_ALIGN_STRINGOPS)
12852 align = 32;
12853
12854 if (GET_CODE (count_exp) == CONST_INT)
12855 {
12856 count = INTVAL (count_exp);
12857 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12858 return 0;
12859 }
12860 /* Figure out proper mode for counter. For 32bits it is always SImode,
12861 for 64bits use SImode when possible, otherwise DImode.
12862 Set count to number of bytes copied when known at compile time. */
12863 if (!TARGET_64BIT
12864 || GET_MODE (count_exp) == SImode
12865 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12866 counter_mode = SImode;
12867 else
12868 counter_mode = DImode;
12869
12870 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12871 if (destreg != XEXP (dst, 0))
12872 dst = replace_equiv_address_nv (dst, destreg);
12873
12874
12875 /* When optimizing for size emit simple rep ; movsb instruction for
12876 counts not divisible by 4. The movl $N, %ecx; rep; stosb
12877 sequence is 7 bytes long, so if optimizing for size and count is
12878 small enough that some stosl, stosw and stosb instructions without
12879 rep are shorter, fall back into the next if. */
12880
12881 if ((!optimize || optimize_size)
12882 && (count == 0
12883 || ((count & 0x03)
12884 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
12885 {
12886 emit_insn (gen_cld ());
12887
12888 countreg = ix86_zero_extend_to_Pmode (count_exp);
12889 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
12890 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12891 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
12892 }
12893 else if (count != 0
12894 && (align >= 8
12895 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12896 || optimize_size || count < (unsigned int) 64))
12897 {
12898 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12899 unsigned HOST_WIDE_INT offset = 0;
12900
12901 emit_insn (gen_cld ());
12902
12903 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
12904 if (count & ~(size - 1))
12905 {
12906 unsigned HOST_WIDE_INT repcount;
12907 unsigned int max_nonrep;
12908
12909 repcount = count >> (size == 4 ? 2 : 3);
12910 if (!TARGET_64BIT)
12911 repcount &= 0x3fffffff;
12912
12913 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
12914 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
12915 bytes. In both cases the latter seems to be faster for small
12916 values of N. */
12917 max_nonrep = size == 4 ? 7 : 4;
12918 if (!optimize_size)
12919 switch (ix86_tune)
12920 {
12921 case PROCESSOR_PENTIUM4:
12922 case PROCESSOR_NOCONA:
12923 max_nonrep = 3;
12924 break;
12925 default:
12926 break;
12927 }
12928
12929 if (repcount <= max_nonrep)
12930 while (repcount-- > 0)
12931 {
12932 rtx mem = adjust_automodify_address_nv (dst,
12933 GET_MODE (zeroreg),
12934 destreg, offset);
12935 emit_insn (gen_strset (destreg, mem, zeroreg));
12936 offset += size;
12937 }
12938 else
12939 {
12940 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
12941 countreg = ix86_zero_extend_to_Pmode (countreg);
12942 destexp = gen_rtx_ASHIFT (Pmode, countreg,
12943 GEN_INT (size == 4 ? 2 : 3));
12944 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12945 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
12946 destexp));
12947 offset = count & ~(size - 1);
12948 }
12949 }
12950 if (size == 8 && (count & 0x04))
12951 {
12952 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
12953 offset);
12954 emit_insn (gen_strset (destreg, mem,
12955 gen_rtx_SUBREG (SImode, zeroreg, 0)));
12956 offset += 4;
12957 }
12958 if (count & 0x02)
12959 {
12960 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
12961 offset);
12962 emit_insn (gen_strset (destreg, mem,
12963 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12964 offset += 2;
12965 }
12966 if (count & 0x01)
12967 {
12968 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
12969 offset);
12970 emit_insn (gen_strset (destreg, mem,
12971 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12972 }
12973 }
12974 else
12975 {
12976 rtx countreg2;
12977 rtx label = NULL;
12978 /* Compute desired alignment of the string operation. */
12979 int desired_alignment = (TARGET_PENTIUMPRO
12980 && (count == 0 || count >= (unsigned int) 260)
12981 ? 8 : UNITS_PER_WORD);
12982
12983 /* In case we don't know anything about the alignment, default to
12984 library version, since it is usually equally fast and result in
12985 shorter code.
12986
12987 Also emit call when we know that the count is large and call overhead
12988 will not be important. */
12989 if (!TARGET_INLINE_ALL_STRINGOPS
12990 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12991 return 0;
12992
12993 if (TARGET_SINGLE_STRINGOP)
12994 emit_insn (gen_cld ());
12995
12996 countreg2 = gen_reg_rtx (Pmode);
12997 countreg = copy_to_mode_reg (counter_mode, count_exp);
12998 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
12999 /* Get rid of MEM_OFFSET, it won't be accurate. */
13000 dst = change_address (dst, BLKmode, destreg);
13001
13002 if (count == 0 && align < desired_alignment)
13003 {
13004 label = gen_label_rtx ();
13005 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
13006 LEU, 0, counter_mode, 1, label);
13007 }
13008 if (align <= 1)
13009 {
13010 rtx label = ix86_expand_aligntest (destreg, 1);
13011 emit_insn (gen_strset (destreg, dst,
13012 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13013 ix86_adjust_counter (countreg, 1);
13014 emit_label (label);
13015 LABEL_NUSES (label) = 1;
13016 }
13017 if (align <= 2)
13018 {
13019 rtx label = ix86_expand_aligntest (destreg, 2);
13020 emit_insn (gen_strset (destreg, dst,
13021 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13022 ix86_adjust_counter (countreg, 2);
13023 emit_label (label);
13024 LABEL_NUSES (label) = 1;
13025 }
13026 if (align <= 4 && desired_alignment > 4)
13027 {
13028 rtx label = ix86_expand_aligntest (destreg, 4);
13029 emit_insn (gen_strset (destreg, dst,
13030 (TARGET_64BIT
13031 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
13032 : zeroreg)));
13033 ix86_adjust_counter (countreg, 4);
13034 emit_label (label);
13035 LABEL_NUSES (label) = 1;
13036 }
13037
13038 if (label && desired_alignment > 4 && !TARGET_64BIT)
13039 {
13040 emit_label (label);
13041 LABEL_NUSES (label) = 1;
13042 label = NULL_RTX;
13043 }
13044
13045 if (!TARGET_SINGLE_STRINGOP)
13046 emit_insn (gen_cld ());
13047 if (TARGET_64BIT)
13048 {
13049 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
13050 GEN_INT (3)));
13051 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
13052 }
13053 else
13054 {
13055 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
13056 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
13057 }
13058 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13059 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
13060
13061 if (label)
13062 {
13063 emit_label (label);
13064 LABEL_NUSES (label) = 1;
13065 }
13066
13067 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
13068 emit_insn (gen_strset (destreg, dst,
13069 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13070 if (TARGET_64BIT && (align <= 4 || count == 0))
13071 {
13072 rtx label = ix86_expand_aligntest (countreg, 4);
13073 emit_insn (gen_strset (destreg, dst,
13074 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13075 emit_label (label);
13076 LABEL_NUSES (label) = 1;
13077 }
13078 if (align > 2 && count != 0 && (count & 2))
13079 emit_insn (gen_strset (destreg, dst,
13080 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13081 if (align <= 2 || count == 0)
13082 {
13083 rtx label = ix86_expand_aligntest (countreg, 2);
13084 emit_insn (gen_strset (destreg, dst,
13085 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13086 emit_label (label);
13087 LABEL_NUSES (label) = 1;
13088 }
13089 if (align > 1 && count != 0 && (count & 1))
13090 emit_insn (gen_strset (destreg, dst,
13091 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13092 if (align <= 1 || count == 0)
13093 {
13094 rtx label = ix86_expand_aligntest (countreg, 1);
13095 emit_insn (gen_strset (destreg, dst,
13096 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13097 emit_label (label);
13098 LABEL_NUSES (label) = 1;
13099 }
13100 }
13101 return 1;
13102 }
13103
13104 /* Expand strlen. */
13105 int
13106 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
13107 {
13108 rtx addr, scratch1, scratch2, scratch3, scratch4;
13109
13110 /* The generic case of strlen expander is long. Avoid it's
13111 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
13112
13113 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13114 && !TARGET_INLINE_ALL_STRINGOPS
13115 && !optimize_size
13116 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
13117 return 0;
13118
13119 addr = force_reg (Pmode, XEXP (src, 0));
13120 scratch1 = gen_reg_rtx (Pmode);
13121
13122 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13123 && !optimize_size)
13124 {
13125 /* Well it seems that some optimizer does not combine a call like
13126 foo(strlen(bar), strlen(bar));
13127 when the move and the subtraction is done here. It does calculate
13128 the length just once when these instructions are done inside of
13129 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
13130 often used and I use one fewer register for the lifetime of
13131 output_strlen_unroll() this is better. */
13132
13133 emit_move_insn (out, addr);
13134
13135 ix86_expand_strlensi_unroll_1 (out, src, align);
13136
13137 /* strlensi_unroll_1 returns the address of the zero at the end of
13138 the string, like memchr(), so compute the length by subtracting
13139 the start address. */
13140 if (TARGET_64BIT)
13141 emit_insn (gen_subdi3 (out, out, addr));
13142 else
13143 emit_insn (gen_subsi3 (out, out, addr));
13144 }
13145 else
13146 {
13147 rtx unspec;
13148 scratch2 = gen_reg_rtx (Pmode);
13149 scratch3 = gen_reg_rtx (Pmode);
13150 scratch4 = force_reg (Pmode, constm1_rtx);
13151
13152 emit_move_insn (scratch3, addr);
13153 eoschar = force_reg (QImode, eoschar);
13154
13155 emit_insn (gen_cld ());
13156 src = replace_equiv_address_nv (src, scratch3);
13157
13158 /* If .md starts supporting :P, this can be done in .md. */
13159 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
13160 scratch4), UNSPEC_SCAS);
13161 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
13162 if (TARGET_64BIT)
13163 {
13164 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
13165 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
13166 }
13167 else
13168 {
13169 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
13170 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
13171 }
13172 }
13173 return 1;
13174 }
13175
13176 /* Expand the appropriate insns for doing strlen if not just doing
13177 repnz; scasb
13178
13179 out = result, initialized with the start address
13180 align_rtx = alignment of the address.
13181 scratch = scratch register, initialized with the startaddress when
13182 not aligned, otherwise undefined
13183
13184 This is just the body. It needs the initializations mentioned above and
13185 some address computing at the end. These things are done in i386.md. */
13186
13187 static void
13188 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
13189 {
13190 int align;
13191 rtx tmp;
13192 rtx align_2_label = NULL_RTX;
13193 rtx align_3_label = NULL_RTX;
13194 rtx align_4_label = gen_label_rtx ();
13195 rtx end_0_label = gen_label_rtx ();
13196 rtx mem;
13197 rtx tmpreg = gen_reg_rtx (SImode);
13198 rtx scratch = gen_reg_rtx (SImode);
13199 rtx cmp;
13200
13201 align = 0;
13202 if (GET_CODE (align_rtx) == CONST_INT)
13203 align = INTVAL (align_rtx);
13204
13205 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
13206
13207 /* Is there a known alignment and is it less than 4? */
13208 if (align < 4)
13209 {
13210 rtx scratch1 = gen_reg_rtx (Pmode);
13211 emit_move_insn (scratch1, out);
13212 /* Is there a known alignment and is it not 2? */
13213 if (align != 2)
13214 {
13215 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
13216 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
13217
13218 /* Leave just the 3 lower bits. */
13219 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
13220 NULL_RTX, 0, OPTAB_WIDEN);
13221
13222 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13223 Pmode, 1, align_4_label);
13224 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
13225 Pmode, 1, align_2_label);
13226 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
13227 Pmode, 1, align_3_label);
13228 }
13229 else
13230 {
13231 /* Since the alignment is 2, we have to check 2 or 0 bytes;
13232 check if is aligned to 4 - byte. */
13233
13234 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
13235 NULL_RTX, 0, OPTAB_WIDEN);
13236
13237 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13238 Pmode, 1, align_4_label);
13239 }
13240
13241 mem = change_address (src, QImode, out);
13242
13243 /* Now compare the bytes. */
13244
13245 /* Compare the first n unaligned byte on a byte per byte basis. */
13246 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
13247 QImode, 1, end_0_label);
13248
13249 /* Increment the address. */
13250 if (TARGET_64BIT)
13251 emit_insn (gen_adddi3 (out, out, const1_rtx));
13252 else
13253 emit_insn (gen_addsi3 (out, out, const1_rtx));
13254
13255 /* Not needed with an alignment of 2 */
13256 if (align != 2)
13257 {
13258 emit_label (align_2_label);
13259
13260 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13261 end_0_label);
13262
13263 if (TARGET_64BIT)
13264 emit_insn (gen_adddi3 (out, out, const1_rtx));
13265 else
13266 emit_insn (gen_addsi3 (out, out, const1_rtx));
13267
13268 emit_label (align_3_label);
13269 }
13270
13271 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13272 end_0_label);
13273
13274 if (TARGET_64BIT)
13275 emit_insn (gen_adddi3 (out, out, const1_rtx));
13276 else
13277 emit_insn (gen_addsi3 (out, out, const1_rtx));
13278 }
13279
13280 /* Generate loop to check 4 bytes at a time. It is not a good idea to
13281 align this loop. It gives only huge programs, but does not help to
13282 speed up. */
13283 emit_label (align_4_label);
13284
13285 mem = change_address (src, SImode, out);
13286 emit_move_insn (scratch, mem);
13287 if (TARGET_64BIT)
13288 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
13289 else
13290 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
13291
13292 /* This formula yields a nonzero result iff one of the bytes is zero.
13293 This saves three branches inside loop and many cycles. */
13294
13295 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
13296 emit_insn (gen_one_cmplsi2 (scratch, scratch));
13297 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
13298 emit_insn (gen_andsi3 (tmpreg, tmpreg,
13299 gen_int_mode (0x80808080, SImode)));
13300 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
13301 align_4_label);
13302
13303 if (TARGET_CMOVE)
13304 {
13305 rtx reg = gen_reg_rtx (SImode);
13306 rtx reg2 = gen_reg_rtx (Pmode);
13307 emit_move_insn (reg, tmpreg);
13308 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
13309
13310 /* If zero is not in the first two bytes, move two bytes forward. */
13311 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13312 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13313 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13314 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
13315 gen_rtx_IF_THEN_ELSE (SImode, tmp,
13316 reg,
13317 tmpreg)));
13318 /* Emit lea manually to avoid clobbering of flags. */
13319 emit_insn (gen_rtx_SET (SImode, reg2,
13320 gen_rtx_PLUS (Pmode, out, const2_rtx)));
13321
13322 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13323 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13324 emit_insn (gen_rtx_SET (VOIDmode, out,
13325 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
13326 reg2,
13327 out)));
13328
13329 }
13330 else
13331 {
13332 rtx end_2_label = gen_label_rtx ();
13333 /* Is zero in the first two bytes? */
13334
13335 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13336 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13337 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
13338 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13339 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
13340 pc_rtx);
13341 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
13342 JUMP_LABEL (tmp) = end_2_label;
13343
13344 /* Not in the first two. Move two bytes forward. */
13345 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
13346 if (TARGET_64BIT)
13347 emit_insn (gen_adddi3 (out, out, const2_rtx));
13348 else
13349 emit_insn (gen_addsi3 (out, out, const2_rtx));
13350
13351 emit_label (end_2_label);
13352
13353 }
13354
13355 /* Avoid branch in fixing the byte. */
13356 tmpreg = gen_lowpart (QImode, tmpreg);
13357 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
13358 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
13359 if (TARGET_64BIT)
13360 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
13361 else
13362 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
13363
13364 emit_label (end_0_label);
13365 }
13366
13367 void
13368 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
13369 rtx callarg2 ATTRIBUTE_UNUSED,
13370 rtx pop, int sibcall)
13371 {
13372 rtx use = NULL, call;
13373
13374 if (pop == const0_rtx)
13375 pop = NULL;
13376 gcc_assert (!TARGET_64BIT || !pop);
13377
13378 if (TARGET_MACHO && !TARGET_64BIT)
13379 {
13380 #if TARGET_MACHO
13381 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
13382 fnaddr = machopic_indirect_call_target (fnaddr);
13383 #endif
13384 }
13385 else
13386 {
13387 /* Static functions and indirect calls don't need the pic register. */
13388 if (! TARGET_64BIT && flag_pic
13389 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
13390 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
13391 use_reg (&use, pic_offset_table_rtx);
13392 }
13393
13394 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
13395 {
13396 rtx al = gen_rtx_REG (QImode, 0);
13397 emit_move_insn (al, callarg2);
13398 use_reg (&use, al);
13399 }
13400
13401 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
13402 {
13403 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13404 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13405 }
13406 if (sibcall && TARGET_64BIT
13407 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
13408 {
13409 rtx addr;
13410 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13411 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
13412 emit_move_insn (fnaddr, addr);
13413 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13414 }
13415
13416 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
13417 if (retval)
13418 call = gen_rtx_SET (VOIDmode, retval, call);
13419 if (pop)
13420 {
13421 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
13422 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
13423 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
13424 }
13425
13426 call = emit_call_insn (call);
13427 if (use)
13428 CALL_INSN_FUNCTION_USAGE (call) = use;
13429 }
13430
13431 \f
13432 /* Clear stack slot assignments remembered from previous functions.
13433 This is called from INIT_EXPANDERS once before RTL is emitted for each
13434 function. */
13435
13436 static struct machine_function *
13437 ix86_init_machine_status (void)
13438 {
13439 struct machine_function *f;
13440
13441 f = ggc_alloc_cleared (sizeof (struct machine_function));
13442 f->use_fast_prologue_epilogue_nregs = -1;
13443 f->tls_descriptor_call_expanded_p = 0;
13444
13445 return f;
13446 }
13447
13448 /* Return a MEM corresponding to a stack slot with mode MODE.
13449 Allocate a new slot if necessary.
13450
13451 The RTL for a function can have several slots available: N is
13452 which slot to use. */
13453
13454 rtx
13455 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
13456 {
13457 struct stack_local_entry *s;
13458
13459 gcc_assert (n < MAX_386_STACK_LOCALS);
13460
13461 for (s = ix86_stack_locals; s; s = s->next)
13462 if (s->mode == mode && s->n == n)
13463 return s->rtl;
13464
13465 s = (struct stack_local_entry *)
13466 ggc_alloc (sizeof (struct stack_local_entry));
13467 s->n = n;
13468 s->mode = mode;
13469 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
13470
13471 s->next = ix86_stack_locals;
13472 ix86_stack_locals = s;
13473 return s->rtl;
13474 }
13475
13476 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13477
13478 static GTY(()) rtx ix86_tls_symbol;
13479 rtx
13480 ix86_tls_get_addr (void)
13481 {
13482
13483 if (!ix86_tls_symbol)
13484 {
13485 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
13486 (TARGET_ANY_GNU_TLS
13487 && !TARGET_64BIT)
13488 ? "___tls_get_addr"
13489 : "__tls_get_addr");
13490 }
13491
13492 return ix86_tls_symbol;
13493 }
13494
13495 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13496
13497 static GTY(()) rtx ix86_tls_module_base_symbol;
13498 rtx
13499 ix86_tls_module_base (void)
13500 {
13501
13502 if (!ix86_tls_module_base_symbol)
13503 {
13504 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
13505 "_TLS_MODULE_BASE_");
13506 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13507 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13508 }
13509
13510 return ix86_tls_module_base_symbol;
13511 }
13512 \f
13513 /* Calculate the length of the memory address in the instruction
13514 encoding. Does not include the one-byte modrm, opcode, or prefix. */
13515
13516 int
13517 memory_address_length (rtx addr)
13518 {
13519 struct ix86_address parts;
13520 rtx base, index, disp;
13521 int len;
13522 int ok;
13523
13524 if (GET_CODE (addr) == PRE_DEC
13525 || GET_CODE (addr) == POST_INC
13526 || GET_CODE (addr) == PRE_MODIFY
13527 || GET_CODE (addr) == POST_MODIFY)
13528 return 0;
13529
13530 ok = ix86_decompose_address (addr, &parts);
13531 gcc_assert (ok);
13532
13533 if (parts.base && GET_CODE (parts.base) == SUBREG)
13534 parts.base = SUBREG_REG (parts.base);
13535 if (parts.index && GET_CODE (parts.index) == SUBREG)
13536 parts.index = SUBREG_REG (parts.index);
13537
13538 base = parts.base;
13539 index = parts.index;
13540 disp = parts.disp;
13541 len = 0;
13542
13543 /* Rule of thumb:
13544 - esp as the base always wants an index,
13545 - ebp as the base always wants a displacement. */
13546
13547 /* Register Indirect. */
13548 if (base && !index && !disp)
13549 {
13550 /* esp (for its index) and ebp (for its displacement) need
13551 the two-byte modrm form. */
13552 if (addr == stack_pointer_rtx
13553 || addr == arg_pointer_rtx
13554 || addr == frame_pointer_rtx
13555 || addr == hard_frame_pointer_rtx)
13556 len = 1;
13557 }
13558
13559 /* Direct Addressing. */
13560 else if (disp && !base && !index)
13561 len = 4;
13562
13563 else
13564 {
13565 /* Find the length of the displacement constant. */
13566 if (disp)
13567 {
13568 if (base && satisfies_constraint_K (disp))
13569 len = 1;
13570 else
13571 len = 4;
13572 }
13573 /* ebp always wants a displacement. */
13574 else if (base == hard_frame_pointer_rtx)
13575 len = 1;
13576
13577 /* An index requires the two-byte modrm form.... */
13578 if (index
13579 /* ...like esp, which always wants an index. */
13580 || base == stack_pointer_rtx
13581 || base == arg_pointer_rtx
13582 || base == frame_pointer_rtx)
13583 len += 1;
13584 }
13585
13586 return len;
13587 }
13588
13589 /* Compute default value for "length_immediate" attribute. When SHORTFORM
13590 is set, expect that insn have 8bit immediate alternative. */
13591 int
13592 ix86_attr_length_immediate_default (rtx insn, int shortform)
13593 {
13594 int len = 0;
13595 int i;
13596 extract_insn_cached (insn);
13597 for (i = recog_data.n_operands - 1; i >= 0; --i)
13598 if (CONSTANT_P (recog_data.operand[i]))
13599 {
13600 gcc_assert (!len);
13601 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
13602 len = 1;
13603 else
13604 {
13605 switch (get_attr_mode (insn))
13606 {
13607 case MODE_QI:
13608 len+=1;
13609 break;
13610 case MODE_HI:
13611 len+=2;
13612 break;
13613 case MODE_SI:
13614 len+=4;
13615 break;
13616 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
13617 case MODE_DI:
13618 len+=4;
13619 break;
13620 default:
13621 fatal_insn ("unknown insn mode", insn);
13622 }
13623 }
13624 }
13625 return len;
13626 }
13627 /* Compute default value for "length_address" attribute. */
13628 int
13629 ix86_attr_length_address_default (rtx insn)
13630 {
13631 int i;
13632
13633 if (get_attr_type (insn) == TYPE_LEA)
13634 {
13635 rtx set = PATTERN (insn);
13636
13637 if (GET_CODE (set) == PARALLEL)
13638 set = XVECEXP (set, 0, 0);
13639
13640 gcc_assert (GET_CODE (set) == SET);
13641
13642 return memory_address_length (SET_SRC (set));
13643 }
13644
13645 extract_insn_cached (insn);
13646 for (i = recog_data.n_operands - 1; i >= 0; --i)
13647 if (GET_CODE (recog_data.operand[i]) == MEM)
13648 {
13649 return memory_address_length (XEXP (recog_data.operand[i], 0));
13650 break;
13651 }
13652 return 0;
13653 }
13654 \f
13655 /* Return the maximum number of instructions a cpu can issue. */
13656
13657 static int
13658 ix86_issue_rate (void)
13659 {
13660 switch (ix86_tune)
13661 {
13662 case PROCESSOR_PENTIUM:
13663 case PROCESSOR_K6:
13664 return 2;
13665
13666 case PROCESSOR_PENTIUMPRO:
13667 case PROCESSOR_PENTIUM4:
13668 case PROCESSOR_ATHLON:
13669 case PROCESSOR_K8:
13670 case PROCESSOR_NOCONA:
13671 case PROCESSOR_GENERIC32:
13672 case PROCESSOR_GENERIC64:
13673 return 3;
13674
13675 default:
13676 return 1;
13677 }
13678 }
13679
13680 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
13681 by DEP_INSN and nothing set by DEP_INSN. */
13682
13683 static int
13684 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13685 {
13686 rtx set, set2;
13687
13688 /* Simplify the test for uninteresting insns. */
13689 if (insn_type != TYPE_SETCC
13690 && insn_type != TYPE_ICMOV
13691 && insn_type != TYPE_FCMOV
13692 && insn_type != TYPE_IBR)
13693 return 0;
13694
13695 if ((set = single_set (dep_insn)) != 0)
13696 {
13697 set = SET_DEST (set);
13698 set2 = NULL_RTX;
13699 }
13700 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
13701 && XVECLEN (PATTERN (dep_insn), 0) == 2
13702 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
13703 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
13704 {
13705 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13706 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13707 }
13708 else
13709 return 0;
13710
13711 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
13712 return 0;
13713
13714 /* This test is true if the dependent insn reads the flags but
13715 not any other potentially set register. */
13716 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
13717 return 0;
13718
13719 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
13720 return 0;
13721
13722 return 1;
13723 }
13724
13725 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
13726 address with operands set by DEP_INSN. */
13727
13728 static int
13729 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13730 {
13731 rtx addr;
13732
13733 if (insn_type == TYPE_LEA
13734 && TARGET_PENTIUM)
13735 {
13736 addr = PATTERN (insn);
13737
13738 if (GET_CODE (addr) == PARALLEL)
13739 addr = XVECEXP (addr, 0, 0);
13740
13741 gcc_assert (GET_CODE (addr) == SET);
13742
13743 addr = SET_SRC (addr);
13744 }
13745 else
13746 {
13747 int i;
13748 extract_insn_cached (insn);
13749 for (i = recog_data.n_operands - 1; i >= 0; --i)
13750 if (GET_CODE (recog_data.operand[i]) == MEM)
13751 {
13752 addr = XEXP (recog_data.operand[i], 0);
13753 goto found;
13754 }
13755 return 0;
13756 found:;
13757 }
13758
13759 return modified_in_p (addr, dep_insn);
13760 }
13761
13762 static int
13763 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
13764 {
13765 enum attr_type insn_type, dep_insn_type;
13766 enum attr_memory memory;
13767 rtx set, set2;
13768 int dep_insn_code_number;
13769
13770 /* Anti and output dependencies have zero cost on all CPUs. */
13771 if (REG_NOTE_KIND (link) != 0)
13772 return 0;
13773
13774 dep_insn_code_number = recog_memoized (dep_insn);
13775
13776 /* If we can't recognize the insns, we can't really do anything. */
13777 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
13778 return cost;
13779
13780 insn_type = get_attr_type (insn);
13781 dep_insn_type = get_attr_type (dep_insn);
13782
13783 switch (ix86_tune)
13784 {
13785 case PROCESSOR_PENTIUM:
13786 /* Address Generation Interlock adds a cycle of latency. */
13787 if (ix86_agi_dependent (insn, dep_insn, insn_type))
13788 cost += 1;
13789
13790 /* ??? Compares pair with jump/setcc. */
13791 if (ix86_flags_dependent (insn, dep_insn, insn_type))
13792 cost = 0;
13793
13794 /* Floating point stores require value to be ready one cycle earlier. */
13795 if (insn_type == TYPE_FMOV
13796 && get_attr_memory (insn) == MEMORY_STORE
13797 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13798 cost += 1;
13799 break;
13800
13801 case PROCESSOR_PENTIUMPRO:
13802 memory = get_attr_memory (insn);
13803
13804 /* INT->FP conversion is expensive. */
13805 if (get_attr_fp_int_src (dep_insn))
13806 cost += 5;
13807
13808 /* There is one cycle extra latency between an FP op and a store. */
13809 if (insn_type == TYPE_FMOV
13810 && (set = single_set (dep_insn)) != NULL_RTX
13811 && (set2 = single_set (insn)) != NULL_RTX
13812 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
13813 && GET_CODE (SET_DEST (set2)) == MEM)
13814 cost += 1;
13815
13816 /* Show ability of reorder buffer to hide latency of load by executing
13817 in parallel with previous instruction in case
13818 previous instruction is not needed to compute the address. */
13819 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13820 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13821 {
13822 /* Claim moves to take one cycle, as core can issue one load
13823 at time and the next load can start cycle later. */
13824 if (dep_insn_type == TYPE_IMOV
13825 || dep_insn_type == TYPE_FMOV)
13826 cost = 1;
13827 else if (cost > 1)
13828 cost--;
13829 }
13830 break;
13831
13832 case PROCESSOR_K6:
13833 memory = get_attr_memory (insn);
13834
13835 /* The esp dependency is resolved before the instruction is really
13836 finished. */
13837 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
13838 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
13839 return 1;
13840
13841 /* INT->FP conversion is expensive. */
13842 if (get_attr_fp_int_src (dep_insn))
13843 cost += 5;
13844
13845 /* Show ability of reorder buffer to hide latency of load by executing
13846 in parallel with previous instruction in case
13847 previous instruction is not needed to compute the address. */
13848 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13849 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13850 {
13851 /* Claim moves to take one cycle, as core can issue one load
13852 at time and the next load can start cycle later. */
13853 if (dep_insn_type == TYPE_IMOV
13854 || dep_insn_type == TYPE_FMOV)
13855 cost = 1;
13856 else if (cost > 2)
13857 cost -= 2;
13858 else
13859 cost = 1;
13860 }
13861 break;
13862
13863 case PROCESSOR_ATHLON:
13864 case PROCESSOR_K8:
13865 case PROCESSOR_GENERIC32:
13866 case PROCESSOR_GENERIC64:
13867 memory = get_attr_memory (insn);
13868
13869 /* Show ability of reorder buffer to hide latency of load by executing
13870 in parallel with previous instruction in case
13871 previous instruction is not needed to compute the address. */
13872 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13873 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13874 {
13875 enum attr_unit unit = get_attr_unit (insn);
13876 int loadcost = 3;
13877
13878 /* Because of the difference between the length of integer and
13879 floating unit pipeline preparation stages, the memory operands
13880 for floating point are cheaper.
13881
13882 ??? For Athlon it the difference is most probably 2. */
13883 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
13884 loadcost = 3;
13885 else
13886 loadcost = TARGET_ATHLON ? 2 : 0;
13887
13888 if (cost >= loadcost)
13889 cost -= loadcost;
13890 else
13891 cost = 0;
13892 }
13893
13894 default:
13895 break;
13896 }
13897
13898 return cost;
13899 }
13900
13901 /* How many alternative schedules to try. This should be as wide as the
13902 scheduling freedom in the DFA, but no wider. Making this value too
13903 large results extra work for the scheduler. */
13904
13905 static int
13906 ia32_multipass_dfa_lookahead (void)
13907 {
13908 if (ix86_tune == PROCESSOR_PENTIUM)
13909 return 2;
13910
13911 if (ix86_tune == PROCESSOR_PENTIUMPRO
13912 || ix86_tune == PROCESSOR_K6)
13913 return 1;
13914
13915 else
13916 return 0;
13917 }
13918
13919 \f
13920 /* Compute the alignment given to a constant that is being placed in memory.
13921 EXP is the constant and ALIGN is the alignment that the object would
13922 ordinarily have.
13923 The value of this function is used instead of that alignment to align
13924 the object. */
13925
13926 int
13927 ix86_constant_alignment (tree exp, int align)
13928 {
13929 if (TREE_CODE (exp) == REAL_CST)
13930 {
13931 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
13932 return 64;
13933 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
13934 return 128;
13935 }
13936 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
13937 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
13938 return BITS_PER_WORD;
13939
13940 return align;
13941 }
13942
13943 /* Compute the alignment for a static variable.
13944 TYPE is the data type, and ALIGN is the alignment that
13945 the object would ordinarily have. The value of this function is used
13946 instead of that alignment to align the object. */
13947
13948 int
13949 ix86_data_alignment (tree type, int align)
13950 {
13951 int max_align = optimize_size ? BITS_PER_WORD : 256;
13952
13953 if (AGGREGATE_TYPE_P (type)
13954 && TYPE_SIZE (type)
13955 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13956 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
13957 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
13958 && align < max_align)
13959 align = max_align;
13960
13961 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
13962 to 16byte boundary. */
13963 if (TARGET_64BIT)
13964 {
13965 if (AGGREGATE_TYPE_P (type)
13966 && TYPE_SIZE (type)
13967 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13968 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
13969 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
13970 return 128;
13971 }
13972
13973 if (TREE_CODE (type) == ARRAY_TYPE)
13974 {
13975 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
13976 return 64;
13977 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
13978 return 128;
13979 }
13980 else if (TREE_CODE (type) == COMPLEX_TYPE)
13981 {
13982
13983 if (TYPE_MODE (type) == DCmode && align < 64)
13984 return 64;
13985 if (TYPE_MODE (type) == XCmode && align < 128)
13986 return 128;
13987 }
13988 else if ((TREE_CODE (type) == RECORD_TYPE
13989 || TREE_CODE (type) == UNION_TYPE
13990 || TREE_CODE (type) == QUAL_UNION_TYPE)
13991 && TYPE_FIELDS (type))
13992 {
13993 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
13994 return 64;
13995 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
13996 return 128;
13997 }
13998 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
13999 || TREE_CODE (type) == INTEGER_TYPE)
14000 {
14001 if (TYPE_MODE (type) == DFmode && align < 64)
14002 return 64;
14003 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14004 return 128;
14005 }
14006
14007 return align;
14008 }
14009
14010 /* Compute the alignment for a local variable.
14011 TYPE is the data type, and ALIGN is the alignment that
14012 the object would ordinarily have. The value of this macro is used
14013 instead of that alignment to align the object. */
14014
14015 int
14016 ix86_local_alignment (tree type, int align)
14017 {
14018 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14019 to 16byte boundary. */
14020 if (TARGET_64BIT)
14021 {
14022 if (AGGREGATE_TYPE_P (type)
14023 && TYPE_SIZE (type)
14024 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14025 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
14026 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14027 return 128;
14028 }
14029 if (TREE_CODE (type) == ARRAY_TYPE)
14030 {
14031 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14032 return 64;
14033 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14034 return 128;
14035 }
14036 else if (TREE_CODE (type) == COMPLEX_TYPE)
14037 {
14038 if (TYPE_MODE (type) == DCmode && align < 64)
14039 return 64;
14040 if (TYPE_MODE (type) == XCmode && align < 128)
14041 return 128;
14042 }
14043 else if ((TREE_CODE (type) == RECORD_TYPE
14044 || TREE_CODE (type) == UNION_TYPE
14045 || TREE_CODE (type) == QUAL_UNION_TYPE)
14046 && TYPE_FIELDS (type))
14047 {
14048 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14049 return 64;
14050 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14051 return 128;
14052 }
14053 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14054 || TREE_CODE (type) == INTEGER_TYPE)
14055 {
14056
14057 if (TYPE_MODE (type) == DFmode && align < 64)
14058 return 64;
14059 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14060 return 128;
14061 }
14062 return align;
14063 }
14064 \f
14065 /* Emit RTL insns to initialize the variable parts of a trampoline.
14066 FNADDR is an RTX for the address of the function's pure code.
14067 CXT is an RTX for the static chain value for the function. */
14068 void
14069 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
14070 {
14071 if (!TARGET_64BIT)
14072 {
14073 /* Compute offset from the end of the jmp to the target function. */
14074 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
14075 plus_constant (tramp, 10),
14076 NULL_RTX, 1, OPTAB_DIRECT);
14077 emit_move_insn (gen_rtx_MEM (QImode, tramp),
14078 gen_int_mode (0xb9, QImode));
14079 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
14080 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
14081 gen_int_mode (0xe9, QImode));
14082 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
14083 }
14084 else
14085 {
14086 int offset = 0;
14087 /* Try to load address using shorter movl instead of movabs.
14088 We may want to support movq for kernel mode, but kernel does not use
14089 trampolines at the moment. */
14090 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
14091 {
14092 fnaddr = copy_to_mode_reg (DImode, fnaddr);
14093 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14094 gen_int_mode (0xbb41, HImode));
14095 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
14096 gen_lowpart (SImode, fnaddr));
14097 offset += 6;
14098 }
14099 else
14100 {
14101 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14102 gen_int_mode (0xbb49, HImode));
14103 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14104 fnaddr);
14105 offset += 10;
14106 }
14107 /* Load static chain using movabs to r10. */
14108 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14109 gen_int_mode (0xba49, HImode));
14110 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14111 cxt);
14112 offset += 10;
14113 /* Jump to the r11 */
14114 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14115 gen_int_mode (0xff49, HImode));
14116 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
14117 gen_int_mode (0xe3, QImode));
14118 offset += 3;
14119 gcc_assert (offset <= TRAMPOLINE_SIZE);
14120 }
14121
14122 #ifdef ENABLE_EXECUTE_STACK
14123 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
14124 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
14125 #endif
14126 }
14127 \f
14128 /* Codes for all the SSE/MMX builtins. */
14129 enum ix86_builtins
14130 {
14131 IX86_BUILTIN_ADDPS,
14132 IX86_BUILTIN_ADDSS,
14133 IX86_BUILTIN_DIVPS,
14134 IX86_BUILTIN_DIVSS,
14135 IX86_BUILTIN_MULPS,
14136 IX86_BUILTIN_MULSS,
14137 IX86_BUILTIN_SUBPS,
14138 IX86_BUILTIN_SUBSS,
14139
14140 IX86_BUILTIN_CMPEQPS,
14141 IX86_BUILTIN_CMPLTPS,
14142 IX86_BUILTIN_CMPLEPS,
14143 IX86_BUILTIN_CMPGTPS,
14144 IX86_BUILTIN_CMPGEPS,
14145 IX86_BUILTIN_CMPNEQPS,
14146 IX86_BUILTIN_CMPNLTPS,
14147 IX86_BUILTIN_CMPNLEPS,
14148 IX86_BUILTIN_CMPNGTPS,
14149 IX86_BUILTIN_CMPNGEPS,
14150 IX86_BUILTIN_CMPORDPS,
14151 IX86_BUILTIN_CMPUNORDPS,
14152 IX86_BUILTIN_CMPEQSS,
14153 IX86_BUILTIN_CMPLTSS,
14154 IX86_BUILTIN_CMPLESS,
14155 IX86_BUILTIN_CMPNEQSS,
14156 IX86_BUILTIN_CMPNLTSS,
14157 IX86_BUILTIN_CMPNLESS,
14158 IX86_BUILTIN_CMPNGTSS,
14159 IX86_BUILTIN_CMPNGESS,
14160 IX86_BUILTIN_CMPORDSS,
14161 IX86_BUILTIN_CMPUNORDSS,
14162
14163 IX86_BUILTIN_COMIEQSS,
14164 IX86_BUILTIN_COMILTSS,
14165 IX86_BUILTIN_COMILESS,
14166 IX86_BUILTIN_COMIGTSS,
14167 IX86_BUILTIN_COMIGESS,
14168 IX86_BUILTIN_COMINEQSS,
14169 IX86_BUILTIN_UCOMIEQSS,
14170 IX86_BUILTIN_UCOMILTSS,
14171 IX86_BUILTIN_UCOMILESS,
14172 IX86_BUILTIN_UCOMIGTSS,
14173 IX86_BUILTIN_UCOMIGESS,
14174 IX86_BUILTIN_UCOMINEQSS,
14175
14176 IX86_BUILTIN_CVTPI2PS,
14177 IX86_BUILTIN_CVTPS2PI,
14178 IX86_BUILTIN_CVTSI2SS,
14179 IX86_BUILTIN_CVTSI642SS,
14180 IX86_BUILTIN_CVTSS2SI,
14181 IX86_BUILTIN_CVTSS2SI64,
14182 IX86_BUILTIN_CVTTPS2PI,
14183 IX86_BUILTIN_CVTTSS2SI,
14184 IX86_BUILTIN_CVTTSS2SI64,
14185
14186 IX86_BUILTIN_MAXPS,
14187 IX86_BUILTIN_MAXSS,
14188 IX86_BUILTIN_MINPS,
14189 IX86_BUILTIN_MINSS,
14190
14191 IX86_BUILTIN_LOADUPS,
14192 IX86_BUILTIN_STOREUPS,
14193 IX86_BUILTIN_MOVSS,
14194
14195 IX86_BUILTIN_MOVHLPS,
14196 IX86_BUILTIN_MOVLHPS,
14197 IX86_BUILTIN_LOADHPS,
14198 IX86_BUILTIN_LOADLPS,
14199 IX86_BUILTIN_STOREHPS,
14200 IX86_BUILTIN_STORELPS,
14201
14202 IX86_BUILTIN_MASKMOVQ,
14203 IX86_BUILTIN_MOVMSKPS,
14204 IX86_BUILTIN_PMOVMSKB,
14205
14206 IX86_BUILTIN_MOVNTPS,
14207 IX86_BUILTIN_MOVNTQ,
14208
14209 IX86_BUILTIN_LOADDQU,
14210 IX86_BUILTIN_STOREDQU,
14211
14212 IX86_BUILTIN_PACKSSWB,
14213 IX86_BUILTIN_PACKSSDW,
14214 IX86_BUILTIN_PACKUSWB,
14215
14216 IX86_BUILTIN_PADDB,
14217 IX86_BUILTIN_PADDW,
14218 IX86_BUILTIN_PADDD,
14219 IX86_BUILTIN_PADDQ,
14220 IX86_BUILTIN_PADDSB,
14221 IX86_BUILTIN_PADDSW,
14222 IX86_BUILTIN_PADDUSB,
14223 IX86_BUILTIN_PADDUSW,
14224 IX86_BUILTIN_PSUBB,
14225 IX86_BUILTIN_PSUBW,
14226 IX86_BUILTIN_PSUBD,
14227 IX86_BUILTIN_PSUBQ,
14228 IX86_BUILTIN_PSUBSB,
14229 IX86_BUILTIN_PSUBSW,
14230 IX86_BUILTIN_PSUBUSB,
14231 IX86_BUILTIN_PSUBUSW,
14232
14233 IX86_BUILTIN_PAND,
14234 IX86_BUILTIN_PANDN,
14235 IX86_BUILTIN_POR,
14236 IX86_BUILTIN_PXOR,
14237
14238 IX86_BUILTIN_PAVGB,
14239 IX86_BUILTIN_PAVGW,
14240
14241 IX86_BUILTIN_PCMPEQB,
14242 IX86_BUILTIN_PCMPEQW,
14243 IX86_BUILTIN_PCMPEQD,
14244 IX86_BUILTIN_PCMPGTB,
14245 IX86_BUILTIN_PCMPGTW,
14246 IX86_BUILTIN_PCMPGTD,
14247
14248 IX86_BUILTIN_PMADDWD,
14249
14250 IX86_BUILTIN_PMAXSW,
14251 IX86_BUILTIN_PMAXUB,
14252 IX86_BUILTIN_PMINSW,
14253 IX86_BUILTIN_PMINUB,
14254
14255 IX86_BUILTIN_PMULHUW,
14256 IX86_BUILTIN_PMULHW,
14257 IX86_BUILTIN_PMULLW,
14258
14259 IX86_BUILTIN_PSADBW,
14260 IX86_BUILTIN_PSHUFW,
14261
14262 IX86_BUILTIN_PSLLW,
14263 IX86_BUILTIN_PSLLD,
14264 IX86_BUILTIN_PSLLQ,
14265 IX86_BUILTIN_PSRAW,
14266 IX86_BUILTIN_PSRAD,
14267 IX86_BUILTIN_PSRLW,
14268 IX86_BUILTIN_PSRLD,
14269 IX86_BUILTIN_PSRLQ,
14270 IX86_BUILTIN_PSLLWI,
14271 IX86_BUILTIN_PSLLDI,
14272 IX86_BUILTIN_PSLLQI,
14273 IX86_BUILTIN_PSRAWI,
14274 IX86_BUILTIN_PSRADI,
14275 IX86_BUILTIN_PSRLWI,
14276 IX86_BUILTIN_PSRLDI,
14277 IX86_BUILTIN_PSRLQI,
14278
14279 IX86_BUILTIN_PUNPCKHBW,
14280 IX86_BUILTIN_PUNPCKHWD,
14281 IX86_BUILTIN_PUNPCKHDQ,
14282 IX86_BUILTIN_PUNPCKLBW,
14283 IX86_BUILTIN_PUNPCKLWD,
14284 IX86_BUILTIN_PUNPCKLDQ,
14285
14286 IX86_BUILTIN_SHUFPS,
14287
14288 IX86_BUILTIN_RCPPS,
14289 IX86_BUILTIN_RCPSS,
14290 IX86_BUILTIN_RSQRTPS,
14291 IX86_BUILTIN_RSQRTSS,
14292 IX86_BUILTIN_SQRTPS,
14293 IX86_BUILTIN_SQRTSS,
14294
14295 IX86_BUILTIN_UNPCKHPS,
14296 IX86_BUILTIN_UNPCKLPS,
14297
14298 IX86_BUILTIN_ANDPS,
14299 IX86_BUILTIN_ANDNPS,
14300 IX86_BUILTIN_ORPS,
14301 IX86_BUILTIN_XORPS,
14302
14303 IX86_BUILTIN_EMMS,
14304 IX86_BUILTIN_LDMXCSR,
14305 IX86_BUILTIN_STMXCSR,
14306 IX86_BUILTIN_SFENCE,
14307
14308 /* 3DNow! Original */
14309 IX86_BUILTIN_FEMMS,
14310 IX86_BUILTIN_PAVGUSB,
14311 IX86_BUILTIN_PF2ID,
14312 IX86_BUILTIN_PFACC,
14313 IX86_BUILTIN_PFADD,
14314 IX86_BUILTIN_PFCMPEQ,
14315 IX86_BUILTIN_PFCMPGE,
14316 IX86_BUILTIN_PFCMPGT,
14317 IX86_BUILTIN_PFMAX,
14318 IX86_BUILTIN_PFMIN,
14319 IX86_BUILTIN_PFMUL,
14320 IX86_BUILTIN_PFRCP,
14321 IX86_BUILTIN_PFRCPIT1,
14322 IX86_BUILTIN_PFRCPIT2,
14323 IX86_BUILTIN_PFRSQIT1,
14324 IX86_BUILTIN_PFRSQRT,
14325 IX86_BUILTIN_PFSUB,
14326 IX86_BUILTIN_PFSUBR,
14327 IX86_BUILTIN_PI2FD,
14328 IX86_BUILTIN_PMULHRW,
14329
14330 /* 3DNow! Athlon Extensions */
14331 IX86_BUILTIN_PF2IW,
14332 IX86_BUILTIN_PFNACC,
14333 IX86_BUILTIN_PFPNACC,
14334 IX86_BUILTIN_PI2FW,
14335 IX86_BUILTIN_PSWAPDSI,
14336 IX86_BUILTIN_PSWAPDSF,
14337
14338 /* SSE2 */
14339 IX86_BUILTIN_ADDPD,
14340 IX86_BUILTIN_ADDSD,
14341 IX86_BUILTIN_DIVPD,
14342 IX86_BUILTIN_DIVSD,
14343 IX86_BUILTIN_MULPD,
14344 IX86_BUILTIN_MULSD,
14345 IX86_BUILTIN_SUBPD,
14346 IX86_BUILTIN_SUBSD,
14347
14348 IX86_BUILTIN_CMPEQPD,
14349 IX86_BUILTIN_CMPLTPD,
14350 IX86_BUILTIN_CMPLEPD,
14351 IX86_BUILTIN_CMPGTPD,
14352 IX86_BUILTIN_CMPGEPD,
14353 IX86_BUILTIN_CMPNEQPD,
14354 IX86_BUILTIN_CMPNLTPD,
14355 IX86_BUILTIN_CMPNLEPD,
14356 IX86_BUILTIN_CMPNGTPD,
14357 IX86_BUILTIN_CMPNGEPD,
14358 IX86_BUILTIN_CMPORDPD,
14359 IX86_BUILTIN_CMPUNORDPD,
14360 IX86_BUILTIN_CMPNEPD,
14361 IX86_BUILTIN_CMPEQSD,
14362 IX86_BUILTIN_CMPLTSD,
14363 IX86_BUILTIN_CMPLESD,
14364 IX86_BUILTIN_CMPNEQSD,
14365 IX86_BUILTIN_CMPNLTSD,
14366 IX86_BUILTIN_CMPNLESD,
14367 IX86_BUILTIN_CMPORDSD,
14368 IX86_BUILTIN_CMPUNORDSD,
14369 IX86_BUILTIN_CMPNESD,
14370
14371 IX86_BUILTIN_COMIEQSD,
14372 IX86_BUILTIN_COMILTSD,
14373 IX86_BUILTIN_COMILESD,
14374 IX86_BUILTIN_COMIGTSD,
14375 IX86_BUILTIN_COMIGESD,
14376 IX86_BUILTIN_COMINEQSD,
14377 IX86_BUILTIN_UCOMIEQSD,
14378 IX86_BUILTIN_UCOMILTSD,
14379 IX86_BUILTIN_UCOMILESD,
14380 IX86_BUILTIN_UCOMIGTSD,
14381 IX86_BUILTIN_UCOMIGESD,
14382 IX86_BUILTIN_UCOMINEQSD,
14383
14384 IX86_BUILTIN_MAXPD,
14385 IX86_BUILTIN_MAXSD,
14386 IX86_BUILTIN_MINPD,
14387 IX86_BUILTIN_MINSD,
14388
14389 IX86_BUILTIN_ANDPD,
14390 IX86_BUILTIN_ANDNPD,
14391 IX86_BUILTIN_ORPD,
14392 IX86_BUILTIN_XORPD,
14393
14394 IX86_BUILTIN_SQRTPD,
14395 IX86_BUILTIN_SQRTSD,
14396
14397 IX86_BUILTIN_UNPCKHPD,
14398 IX86_BUILTIN_UNPCKLPD,
14399
14400 IX86_BUILTIN_SHUFPD,
14401
14402 IX86_BUILTIN_LOADUPD,
14403 IX86_BUILTIN_STOREUPD,
14404 IX86_BUILTIN_MOVSD,
14405
14406 IX86_BUILTIN_LOADHPD,
14407 IX86_BUILTIN_LOADLPD,
14408
14409 IX86_BUILTIN_CVTDQ2PD,
14410 IX86_BUILTIN_CVTDQ2PS,
14411
14412 IX86_BUILTIN_CVTPD2DQ,
14413 IX86_BUILTIN_CVTPD2PI,
14414 IX86_BUILTIN_CVTPD2PS,
14415 IX86_BUILTIN_CVTTPD2DQ,
14416 IX86_BUILTIN_CVTTPD2PI,
14417
14418 IX86_BUILTIN_CVTPI2PD,
14419 IX86_BUILTIN_CVTSI2SD,
14420 IX86_BUILTIN_CVTSI642SD,
14421
14422 IX86_BUILTIN_CVTSD2SI,
14423 IX86_BUILTIN_CVTSD2SI64,
14424 IX86_BUILTIN_CVTSD2SS,
14425 IX86_BUILTIN_CVTSS2SD,
14426 IX86_BUILTIN_CVTTSD2SI,
14427 IX86_BUILTIN_CVTTSD2SI64,
14428
14429 IX86_BUILTIN_CVTPS2DQ,
14430 IX86_BUILTIN_CVTPS2PD,
14431 IX86_BUILTIN_CVTTPS2DQ,
14432
14433 IX86_BUILTIN_MOVNTI,
14434 IX86_BUILTIN_MOVNTPD,
14435 IX86_BUILTIN_MOVNTDQ,
14436
14437 /* SSE2 MMX */
14438 IX86_BUILTIN_MASKMOVDQU,
14439 IX86_BUILTIN_MOVMSKPD,
14440 IX86_BUILTIN_PMOVMSKB128,
14441
14442 IX86_BUILTIN_PACKSSWB128,
14443 IX86_BUILTIN_PACKSSDW128,
14444 IX86_BUILTIN_PACKUSWB128,
14445
14446 IX86_BUILTIN_PADDB128,
14447 IX86_BUILTIN_PADDW128,
14448 IX86_BUILTIN_PADDD128,
14449 IX86_BUILTIN_PADDQ128,
14450 IX86_BUILTIN_PADDSB128,
14451 IX86_BUILTIN_PADDSW128,
14452 IX86_BUILTIN_PADDUSB128,
14453 IX86_BUILTIN_PADDUSW128,
14454 IX86_BUILTIN_PSUBB128,
14455 IX86_BUILTIN_PSUBW128,
14456 IX86_BUILTIN_PSUBD128,
14457 IX86_BUILTIN_PSUBQ128,
14458 IX86_BUILTIN_PSUBSB128,
14459 IX86_BUILTIN_PSUBSW128,
14460 IX86_BUILTIN_PSUBUSB128,
14461 IX86_BUILTIN_PSUBUSW128,
14462
14463 IX86_BUILTIN_PAND128,
14464 IX86_BUILTIN_PANDN128,
14465 IX86_BUILTIN_POR128,
14466 IX86_BUILTIN_PXOR128,
14467
14468 IX86_BUILTIN_PAVGB128,
14469 IX86_BUILTIN_PAVGW128,
14470
14471 IX86_BUILTIN_PCMPEQB128,
14472 IX86_BUILTIN_PCMPEQW128,
14473 IX86_BUILTIN_PCMPEQD128,
14474 IX86_BUILTIN_PCMPGTB128,
14475 IX86_BUILTIN_PCMPGTW128,
14476 IX86_BUILTIN_PCMPGTD128,
14477
14478 IX86_BUILTIN_PMADDWD128,
14479
14480 IX86_BUILTIN_PMAXSW128,
14481 IX86_BUILTIN_PMAXUB128,
14482 IX86_BUILTIN_PMINSW128,
14483 IX86_BUILTIN_PMINUB128,
14484
14485 IX86_BUILTIN_PMULUDQ,
14486 IX86_BUILTIN_PMULUDQ128,
14487 IX86_BUILTIN_PMULHUW128,
14488 IX86_BUILTIN_PMULHW128,
14489 IX86_BUILTIN_PMULLW128,
14490
14491 IX86_BUILTIN_PSADBW128,
14492 IX86_BUILTIN_PSHUFHW,
14493 IX86_BUILTIN_PSHUFLW,
14494 IX86_BUILTIN_PSHUFD,
14495
14496 IX86_BUILTIN_PSLLW128,
14497 IX86_BUILTIN_PSLLD128,
14498 IX86_BUILTIN_PSLLQ128,
14499 IX86_BUILTIN_PSRAW128,
14500 IX86_BUILTIN_PSRAD128,
14501 IX86_BUILTIN_PSRLW128,
14502 IX86_BUILTIN_PSRLD128,
14503 IX86_BUILTIN_PSRLQ128,
14504 IX86_BUILTIN_PSLLDQI128,
14505 IX86_BUILTIN_PSLLWI128,
14506 IX86_BUILTIN_PSLLDI128,
14507 IX86_BUILTIN_PSLLQI128,
14508 IX86_BUILTIN_PSRAWI128,
14509 IX86_BUILTIN_PSRADI128,
14510 IX86_BUILTIN_PSRLDQI128,
14511 IX86_BUILTIN_PSRLWI128,
14512 IX86_BUILTIN_PSRLDI128,
14513 IX86_BUILTIN_PSRLQI128,
14514
14515 IX86_BUILTIN_PUNPCKHBW128,
14516 IX86_BUILTIN_PUNPCKHWD128,
14517 IX86_BUILTIN_PUNPCKHDQ128,
14518 IX86_BUILTIN_PUNPCKHQDQ128,
14519 IX86_BUILTIN_PUNPCKLBW128,
14520 IX86_BUILTIN_PUNPCKLWD128,
14521 IX86_BUILTIN_PUNPCKLDQ128,
14522 IX86_BUILTIN_PUNPCKLQDQ128,
14523
14524 IX86_BUILTIN_CLFLUSH,
14525 IX86_BUILTIN_MFENCE,
14526 IX86_BUILTIN_LFENCE,
14527
14528 /* Prescott New Instructions. */
14529 IX86_BUILTIN_ADDSUBPS,
14530 IX86_BUILTIN_HADDPS,
14531 IX86_BUILTIN_HSUBPS,
14532 IX86_BUILTIN_MOVSHDUP,
14533 IX86_BUILTIN_MOVSLDUP,
14534 IX86_BUILTIN_ADDSUBPD,
14535 IX86_BUILTIN_HADDPD,
14536 IX86_BUILTIN_HSUBPD,
14537 IX86_BUILTIN_LDDQU,
14538
14539 IX86_BUILTIN_MONITOR,
14540 IX86_BUILTIN_MWAIT,
14541
14542 /* SSSE3. */
14543 IX86_BUILTIN_PHADDW,
14544 IX86_BUILTIN_PHADDD,
14545 IX86_BUILTIN_PHADDSW,
14546 IX86_BUILTIN_PHSUBW,
14547 IX86_BUILTIN_PHSUBD,
14548 IX86_BUILTIN_PHSUBSW,
14549 IX86_BUILTIN_PMADDUBSW,
14550 IX86_BUILTIN_PMULHRSW,
14551 IX86_BUILTIN_PSHUFB,
14552 IX86_BUILTIN_PSIGNB,
14553 IX86_BUILTIN_PSIGNW,
14554 IX86_BUILTIN_PSIGND,
14555 IX86_BUILTIN_PALIGNR,
14556 IX86_BUILTIN_PABSB,
14557 IX86_BUILTIN_PABSW,
14558 IX86_BUILTIN_PABSD,
14559
14560 IX86_BUILTIN_PHADDW128,
14561 IX86_BUILTIN_PHADDD128,
14562 IX86_BUILTIN_PHADDSW128,
14563 IX86_BUILTIN_PHSUBW128,
14564 IX86_BUILTIN_PHSUBD128,
14565 IX86_BUILTIN_PHSUBSW128,
14566 IX86_BUILTIN_PMADDUBSW128,
14567 IX86_BUILTIN_PMULHRSW128,
14568 IX86_BUILTIN_PSHUFB128,
14569 IX86_BUILTIN_PSIGNB128,
14570 IX86_BUILTIN_PSIGNW128,
14571 IX86_BUILTIN_PSIGND128,
14572 IX86_BUILTIN_PALIGNR128,
14573 IX86_BUILTIN_PABSB128,
14574 IX86_BUILTIN_PABSW128,
14575 IX86_BUILTIN_PABSD128,
14576
14577 IX86_BUILTIN_VEC_INIT_V2SI,
14578 IX86_BUILTIN_VEC_INIT_V4HI,
14579 IX86_BUILTIN_VEC_INIT_V8QI,
14580 IX86_BUILTIN_VEC_EXT_V2DF,
14581 IX86_BUILTIN_VEC_EXT_V2DI,
14582 IX86_BUILTIN_VEC_EXT_V4SF,
14583 IX86_BUILTIN_VEC_EXT_V4SI,
14584 IX86_BUILTIN_VEC_EXT_V8HI,
14585 IX86_BUILTIN_VEC_EXT_V2SI,
14586 IX86_BUILTIN_VEC_EXT_V4HI,
14587 IX86_BUILTIN_VEC_SET_V8HI,
14588 IX86_BUILTIN_VEC_SET_V4HI,
14589
14590 IX86_BUILTIN_MAX
14591 };
14592
14593 #define def_builtin(MASK, NAME, TYPE, CODE) \
14594 do { \
14595 if ((MASK) & target_flags \
14596 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
14597 add_builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
14598 NULL, NULL_TREE); \
14599 } while (0)
14600
14601 /* Bits for builtin_description.flag. */
14602
14603 /* Set when we don't support the comparison natively, and should
14604 swap_comparison in order to support it. */
14605 #define BUILTIN_DESC_SWAP_OPERANDS 1
14606
14607 struct builtin_description
14608 {
14609 const unsigned int mask;
14610 const enum insn_code icode;
14611 const char *const name;
14612 const enum ix86_builtins code;
14613 const enum rtx_code comparison;
14614 const unsigned int flag;
14615 };
14616
14617 static const struct builtin_description bdesc_comi[] =
14618 {
14619 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
14620 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
14621 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
14622 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
14623 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
14624 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
14625 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
14626 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
14627 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
14628 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
14629 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
14630 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
14631 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
14632 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
14633 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
14634 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
14635 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
14636 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
14637 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
14638 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
14639 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
14640 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
14641 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
14642 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
14643 };
14644
14645 static const struct builtin_description bdesc_2arg[] =
14646 {
14647 /* SSE */
14648 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
14649 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
14650 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
14651 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
14652 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
14653 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
14654 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
14655 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
14656
14657 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
14658 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
14659 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
14660 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
14661 BUILTIN_DESC_SWAP_OPERANDS },
14662 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
14663 BUILTIN_DESC_SWAP_OPERANDS },
14664 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
14665 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
14666 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
14667 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
14668 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
14669 BUILTIN_DESC_SWAP_OPERANDS },
14670 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
14671 BUILTIN_DESC_SWAP_OPERANDS },
14672 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
14673 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
14674 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
14675 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
14676 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
14677 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
14678 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
14679 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
14680 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
14681 BUILTIN_DESC_SWAP_OPERANDS },
14682 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
14683 BUILTIN_DESC_SWAP_OPERANDS },
14684 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
14685
14686 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
14687 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
14688 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
14689 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
14690
14691 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
14692 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
14693 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
14694 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
14695
14696 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
14697 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
14698 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
14699 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
14700 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
14701
14702 /* MMX */
14703 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
14704 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
14705 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
14706 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
14707 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
14708 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
14709 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
14710 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
14711
14712 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
14713 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
14714 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
14715 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
14716 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
14717 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
14718 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
14719 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
14720
14721 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
14722 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
14723 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
14724
14725 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
14726 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
14727 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
14728 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
14729
14730 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
14731 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
14732
14733 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
14734 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
14735 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
14736 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
14737 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
14738 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
14739
14740 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
14741 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
14742 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
14743 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
14744
14745 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
14746 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
14747 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
14748 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
14749 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
14750 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
14751
14752 /* Special. */
14753 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
14754 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
14755 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
14756
14757 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
14758 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
14759 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
14760
14761 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
14762 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
14763 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
14764 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
14765 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
14766 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
14767
14768 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
14769 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
14770 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
14771 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
14772 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
14773 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
14774
14775 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
14776 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
14777 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
14778 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
14779
14780 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
14781 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
14782
14783 /* SSE2 */
14784 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
14785 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
14786 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
14787 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
14788 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
14789 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
14790 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
14791 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
14792
14793 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
14794 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
14795 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
14796 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
14797 BUILTIN_DESC_SWAP_OPERANDS },
14798 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
14799 BUILTIN_DESC_SWAP_OPERANDS },
14800 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
14801 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
14802 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
14803 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
14804 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
14805 BUILTIN_DESC_SWAP_OPERANDS },
14806 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
14807 BUILTIN_DESC_SWAP_OPERANDS },
14808 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
14809 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
14810 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
14811 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
14812 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
14813 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
14814 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
14815 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
14816 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
14817
14818 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
14819 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
14820 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
14821 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
14822
14823 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
14824 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
14825 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
14826 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
14827
14828 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
14829 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
14830 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
14831
14832 /* SSE2 MMX */
14833 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
14834 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
14835 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
14836 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
14837 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
14838 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
14839 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
14840 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
14841
14842 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
14843 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
14844 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
14845 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
14846 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
14847 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
14848 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
14849 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
14850
14851 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
14852 { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
14853
14854 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
14855 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
14856 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
14857 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
14858
14859 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
14860 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
14861
14862 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
14863 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
14864 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
14865 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
14866 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
14867 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
14868
14869 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
14870 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
14871 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
14872 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
14873
14874 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
14875 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
14876 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
14877 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
14878 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
14879 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
14880 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
14881 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
14882
14883 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
14884 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
14885 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
14886
14887 { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
14888 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
14889
14890 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
14891 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
14892
14893 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
14894 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
14895 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
14896
14897 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
14898 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
14899 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
14900
14901 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
14902 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
14903
14904 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
14905
14906 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
14907 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
14908 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
14909 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
14910
14911 /* SSE3 MMX */
14912 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
14913 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
14914 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
14915 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
14916 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
14917 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
14918
14919 /* SSSE3 */
14920 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
14921 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
14922 { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
14923 { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
14924 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
14925 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
14926 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
14927 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
14928 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
14929 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
14930 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
14931 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
14932 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
14933 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
14934 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
14935 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
14936 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
14937 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
14938 { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
14939 { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
14940 { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
14941 { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
14942 { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
14943 { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
14944 };
14945
14946 static const struct builtin_description bdesc_1arg[] =
14947 {
14948 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
14949 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
14950
14951 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
14952 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
14953 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
14954
14955 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
14956 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
14957 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
14958 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
14959 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
14960 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
14961
14962 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
14963 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
14964
14965 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
14966
14967 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
14968 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
14969
14970 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
14971 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
14972 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
14973 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
14974 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
14975
14976 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
14977
14978 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
14979 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
14980 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
14981 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
14982
14983 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
14984 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
14985 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
14986
14987 /* SSE3 */
14988 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
14989 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
14990
14991 /* SSSE3 */
14992 { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
14993 { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
14994 { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
14995 { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
14996 { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
14997 { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
14998 };
14999
15000 static void
15001 ix86_init_builtins (void)
15002 {
15003 if (TARGET_MMX)
15004 ix86_init_mmx_sse_builtins ();
15005 }
15006
15007 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
15008 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
15009 builtins. */
15010 static void
15011 ix86_init_mmx_sse_builtins (void)
15012 {
15013 const struct builtin_description * d;
15014 size_t i;
15015
15016 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
15017 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
15018 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
15019 tree V2DI_type_node
15020 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
15021 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
15022 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
15023 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
15024 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
15025 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
15026 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
15027
15028 tree pchar_type_node = build_pointer_type (char_type_node);
15029 tree pcchar_type_node = build_pointer_type (
15030 build_type_variant (char_type_node, 1, 0));
15031 tree pfloat_type_node = build_pointer_type (float_type_node);
15032 tree pcfloat_type_node = build_pointer_type (
15033 build_type_variant (float_type_node, 1, 0));
15034 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
15035 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
15036 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
15037
15038 /* Comparisons. */
15039 tree int_ftype_v4sf_v4sf
15040 = build_function_type_list (integer_type_node,
15041 V4SF_type_node, V4SF_type_node, NULL_TREE);
15042 tree v4si_ftype_v4sf_v4sf
15043 = build_function_type_list (V4SI_type_node,
15044 V4SF_type_node, V4SF_type_node, NULL_TREE);
15045 /* MMX/SSE/integer conversions. */
15046 tree int_ftype_v4sf
15047 = build_function_type_list (integer_type_node,
15048 V4SF_type_node, NULL_TREE);
15049 tree int64_ftype_v4sf
15050 = build_function_type_list (long_long_integer_type_node,
15051 V4SF_type_node, NULL_TREE);
15052 tree int_ftype_v8qi
15053 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
15054 tree v4sf_ftype_v4sf_int
15055 = build_function_type_list (V4SF_type_node,
15056 V4SF_type_node, integer_type_node, NULL_TREE);
15057 tree v4sf_ftype_v4sf_int64
15058 = build_function_type_list (V4SF_type_node,
15059 V4SF_type_node, long_long_integer_type_node,
15060 NULL_TREE);
15061 tree v4sf_ftype_v4sf_v2si
15062 = build_function_type_list (V4SF_type_node,
15063 V4SF_type_node, V2SI_type_node, NULL_TREE);
15064
15065 /* Miscellaneous. */
15066 tree v8qi_ftype_v4hi_v4hi
15067 = build_function_type_list (V8QI_type_node,
15068 V4HI_type_node, V4HI_type_node, NULL_TREE);
15069 tree v4hi_ftype_v2si_v2si
15070 = build_function_type_list (V4HI_type_node,
15071 V2SI_type_node, V2SI_type_node, NULL_TREE);
15072 tree v4sf_ftype_v4sf_v4sf_int
15073 = build_function_type_list (V4SF_type_node,
15074 V4SF_type_node, V4SF_type_node,
15075 integer_type_node, NULL_TREE);
15076 tree v2si_ftype_v4hi_v4hi
15077 = build_function_type_list (V2SI_type_node,
15078 V4HI_type_node, V4HI_type_node, NULL_TREE);
15079 tree v4hi_ftype_v4hi_int
15080 = build_function_type_list (V4HI_type_node,
15081 V4HI_type_node, integer_type_node, NULL_TREE);
15082 tree v4hi_ftype_v4hi_di
15083 = build_function_type_list (V4HI_type_node,
15084 V4HI_type_node, long_long_unsigned_type_node,
15085 NULL_TREE);
15086 tree v2si_ftype_v2si_di
15087 = build_function_type_list (V2SI_type_node,
15088 V2SI_type_node, long_long_unsigned_type_node,
15089 NULL_TREE);
15090 tree void_ftype_void
15091 = build_function_type (void_type_node, void_list_node);
15092 tree void_ftype_unsigned
15093 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
15094 tree void_ftype_unsigned_unsigned
15095 = build_function_type_list (void_type_node, unsigned_type_node,
15096 unsigned_type_node, NULL_TREE);
15097 tree void_ftype_pcvoid_unsigned_unsigned
15098 = build_function_type_list (void_type_node, const_ptr_type_node,
15099 unsigned_type_node, unsigned_type_node,
15100 NULL_TREE);
15101 tree unsigned_ftype_void
15102 = build_function_type (unsigned_type_node, void_list_node);
15103 tree v2si_ftype_v4sf
15104 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
15105 /* Loads/stores. */
15106 tree void_ftype_v8qi_v8qi_pchar
15107 = build_function_type_list (void_type_node,
15108 V8QI_type_node, V8QI_type_node,
15109 pchar_type_node, NULL_TREE);
15110 tree v4sf_ftype_pcfloat
15111 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
15112 /* @@@ the type is bogus */
15113 tree v4sf_ftype_v4sf_pv2si
15114 = build_function_type_list (V4SF_type_node,
15115 V4SF_type_node, pv2si_type_node, NULL_TREE);
15116 tree void_ftype_pv2si_v4sf
15117 = build_function_type_list (void_type_node,
15118 pv2si_type_node, V4SF_type_node, NULL_TREE);
15119 tree void_ftype_pfloat_v4sf
15120 = build_function_type_list (void_type_node,
15121 pfloat_type_node, V4SF_type_node, NULL_TREE);
15122 tree void_ftype_pdi_di
15123 = build_function_type_list (void_type_node,
15124 pdi_type_node, long_long_unsigned_type_node,
15125 NULL_TREE);
15126 tree void_ftype_pv2di_v2di
15127 = build_function_type_list (void_type_node,
15128 pv2di_type_node, V2DI_type_node, NULL_TREE);
15129 /* Normal vector unops. */
15130 tree v4sf_ftype_v4sf
15131 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
15132 tree v16qi_ftype_v16qi
15133 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
15134 tree v8hi_ftype_v8hi
15135 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
15136 tree v4si_ftype_v4si
15137 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
15138 tree v8qi_ftype_v8qi
15139 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
15140 tree v4hi_ftype_v4hi
15141 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
15142
15143 /* Normal vector binops. */
15144 tree v4sf_ftype_v4sf_v4sf
15145 = build_function_type_list (V4SF_type_node,
15146 V4SF_type_node, V4SF_type_node, NULL_TREE);
15147 tree v8qi_ftype_v8qi_v8qi
15148 = build_function_type_list (V8QI_type_node,
15149 V8QI_type_node, V8QI_type_node, NULL_TREE);
15150 tree v4hi_ftype_v4hi_v4hi
15151 = build_function_type_list (V4HI_type_node,
15152 V4HI_type_node, V4HI_type_node, NULL_TREE);
15153 tree v2si_ftype_v2si_v2si
15154 = build_function_type_list (V2SI_type_node,
15155 V2SI_type_node, V2SI_type_node, NULL_TREE);
15156 tree di_ftype_di_di
15157 = build_function_type_list (long_long_unsigned_type_node,
15158 long_long_unsigned_type_node,
15159 long_long_unsigned_type_node, NULL_TREE);
15160
15161 tree di_ftype_di_di_int
15162 = build_function_type_list (long_long_unsigned_type_node,
15163 long_long_unsigned_type_node,
15164 long_long_unsigned_type_node,
15165 integer_type_node, NULL_TREE);
15166
15167 tree v2si_ftype_v2sf
15168 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
15169 tree v2sf_ftype_v2si
15170 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
15171 tree v2si_ftype_v2si
15172 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
15173 tree v2sf_ftype_v2sf
15174 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
15175 tree v2sf_ftype_v2sf_v2sf
15176 = build_function_type_list (V2SF_type_node,
15177 V2SF_type_node, V2SF_type_node, NULL_TREE);
15178 tree v2si_ftype_v2sf_v2sf
15179 = build_function_type_list (V2SI_type_node,
15180 V2SF_type_node, V2SF_type_node, NULL_TREE);
15181 tree pint_type_node = build_pointer_type (integer_type_node);
15182 tree pdouble_type_node = build_pointer_type (double_type_node);
15183 tree pcdouble_type_node = build_pointer_type (
15184 build_type_variant (double_type_node, 1, 0));
15185 tree int_ftype_v2df_v2df
15186 = build_function_type_list (integer_type_node,
15187 V2DF_type_node, V2DF_type_node, NULL_TREE);
15188
15189 tree void_ftype_pcvoid
15190 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
15191 tree v4sf_ftype_v4si
15192 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
15193 tree v4si_ftype_v4sf
15194 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
15195 tree v2df_ftype_v4si
15196 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
15197 tree v4si_ftype_v2df
15198 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
15199 tree v2si_ftype_v2df
15200 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
15201 tree v4sf_ftype_v2df
15202 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
15203 tree v2df_ftype_v2si
15204 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
15205 tree v2df_ftype_v4sf
15206 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
15207 tree int_ftype_v2df
15208 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
15209 tree int64_ftype_v2df
15210 = build_function_type_list (long_long_integer_type_node,
15211 V2DF_type_node, NULL_TREE);
15212 tree v2df_ftype_v2df_int
15213 = build_function_type_list (V2DF_type_node,
15214 V2DF_type_node, integer_type_node, NULL_TREE);
15215 tree v2df_ftype_v2df_int64
15216 = build_function_type_list (V2DF_type_node,
15217 V2DF_type_node, long_long_integer_type_node,
15218 NULL_TREE);
15219 tree v4sf_ftype_v4sf_v2df
15220 = build_function_type_list (V4SF_type_node,
15221 V4SF_type_node, V2DF_type_node, NULL_TREE);
15222 tree v2df_ftype_v2df_v4sf
15223 = build_function_type_list (V2DF_type_node,
15224 V2DF_type_node, V4SF_type_node, NULL_TREE);
15225 tree v2df_ftype_v2df_v2df_int
15226 = build_function_type_list (V2DF_type_node,
15227 V2DF_type_node, V2DF_type_node,
15228 integer_type_node,
15229 NULL_TREE);
15230 tree v2df_ftype_v2df_pcdouble
15231 = build_function_type_list (V2DF_type_node,
15232 V2DF_type_node, pcdouble_type_node, NULL_TREE);
15233 tree void_ftype_pdouble_v2df
15234 = build_function_type_list (void_type_node,
15235 pdouble_type_node, V2DF_type_node, NULL_TREE);
15236 tree void_ftype_pint_int
15237 = build_function_type_list (void_type_node,
15238 pint_type_node, integer_type_node, NULL_TREE);
15239 tree void_ftype_v16qi_v16qi_pchar
15240 = build_function_type_list (void_type_node,
15241 V16QI_type_node, V16QI_type_node,
15242 pchar_type_node, NULL_TREE);
15243 tree v2df_ftype_pcdouble
15244 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
15245 tree v2df_ftype_v2df_v2df
15246 = build_function_type_list (V2DF_type_node,
15247 V2DF_type_node, V2DF_type_node, NULL_TREE);
15248 tree v16qi_ftype_v16qi_v16qi
15249 = build_function_type_list (V16QI_type_node,
15250 V16QI_type_node, V16QI_type_node, NULL_TREE);
15251 tree v8hi_ftype_v8hi_v8hi
15252 = build_function_type_list (V8HI_type_node,
15253 V8HI_type_node, V8HI_type_node, NULL_TREE);
15254 tree v4si_ftype_v4si_v4si
15255 = build_function_type_list (V4SI_type_node,
15256 V4SI_type_node, V4SI_type_node, NULL_TREE);
15257 tree v2di_ftype_v2di_v2di
15258 = build_function_type_list (V2DI_type_node,
15259 V2DI_type_node, V2DI_type_node, NULL_TREE);
15260 tree v2di_ftype_v2df_v2df
15261 = build_function_type_list (V2DI_type_node,
15262 V2DF_type_node, V2DF_type_node, NULL_TREE);
15263 tree v2df_ftype_v2df
15264 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
15265 tree v2di_ftype_v2di_int
15266 = build_function_type_list (V2DI_type_node,
15267 V2DI_type_node, integer_type_node, NULL_TREE);
15268 tree v2di_ftype_v2di_v2di_int
15269 = build_function_type_list (V2DI_type_node, V2DI_type_node,
15270 V2DI_type_node, integer_type_node, NULL_TREE);
15271 tree v4si_ftype_v4si_int
15272 = build_function_type_list (V4SI_type_node,
15273 V4SI_type_node, integer_type_node, NULL_TREE);
15274 tree v8hi_ftype_v8hi_int
15275 = build_function_type_list (V8HI_type_node,
15276 V8HI_type_node, integer_type_node, NULL_TREE);
15277 tree v8hi_ftype_v8hi_v2di
15278 = build_function_type_list (V8HI_type_node,
15279 V8HI_type_node, V2DI_type_node, NULL_TREE);
15280 tree v4si_ftype_v4si_v2di
15281 = build_function_type_list (V4SI_type_node,
15282 V4SI_type_node, V2DI_type_node, NULL_TREE);
15283 tree v4si_ftype_v8hi_v8hi
15284 = build_function_type_list (V4SI_type_node,
15285 V8HI_type_node, V8HI_type_node, NULL_TREE);
15286 tree di_ftype_v8qi_v8qi
15287 = build_function_type_list (long_long_unsigned_type_node,
15288 V8QI_type_node, V8QI_type_node, NULL_TREE);
15289 tree di_ftype_v2si_v2si
15290 = build_function_type_list (long_long_unsigned_type_node,
15291 V2SI_type_node, V2SI_type_node, NULL_TREE);
15292 tree v2di_ftype_v16qi_v16qi
15293 = build_function_type_list (V2DI_type_node,
15294 V16QI_type_node, V16QI_type_node, NULL_TREE);
15295 tree v2di_ftype_v4si_v4si
15296 = build_function_type_list (V2DI_type_node,
15297 V4SI_type_node, V4SI_type_node, NULL_TREE);
15298 tree int_ftype_v16qi
15299 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
15300 tree v16qi_ftype_pcchar
15301 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
15302 tree void_ftype_pchar_v16qi
15303 = build_function_type_list (void_type_node,
15304 pchar_type_node, V16QI_type_node, NULL_TREE);
15305
15306 tree float80_type;
15307 tree float128_type;
15308 tree ftype;
15309
15310 /* The __float80 type. */
15311 if (TYPE_MODE (long_double_type_node) == XFmode)
15312 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
15313 "__float80");
15314 else
15315 {
15316 /* The __float80 type. */
15317 float80_type = make_node (REAL_TYPE);
15318 TYPE_PRECISION (float80_type) = 80;
15319 layout_type (float80_type);
15320 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
15321 }
15322
15323 if (TARGET_64BIT)
15324 {
15325 float128_type = make_node (REAL_TYPE);
15326 TYPE_PRECISION (float128_type) = 128;
15327 layout_type (float128_type);
15328 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
15329 }
15330
15331 /* Add all builtins that are more or less simple operations on two
15332 operands. */
15333 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15334 {
15335 /* Use one of the operands; the target can have a different mode for
15336 mask-generating compares. */
15337 enum machine_mode mode;
15338 tree type;
15339
15340 if (d->name == 0)
15341 continue;
15342 mode = insn_data[d->icode].operand[1].mode;
15343
15344 switch (mode)
15345 {
15346 case V16QImode:
15347 type = v16qi_ftype_v16qi_v16qi;
15348 break;
15349 case V8HImode:
15350 type = v8hi_ftype_v8hi_v8hi;
15351 break;
15352 case V4SImode:
15353 type = v4si_ftype_v4si_v4si;
15354 break;
15355 case V2DImode:
15356 type = v2di_ftype_v2di_v2di;
15357 break;
15358 case V2DFmode:
15359 type = v2df_ftype_v2df_v2df;
15360 break;
15361 case V4SFmode:
15362 type = v4sf_ftype_v4sf_v4sf;
15363 break;
15364 case V8QImode:
15365 type = v8qi_ftype_v8qi_v8qi;
15366 break;
15367 case V4HImode:
15368 type = v4hi_ftype_v4hi_v4hi;
15369 break;
15370 case V2SImode:
15371 type = v2si_ftype_v2si_v2si;
15372 break;
15373 case DImode:
15374 type = di_ftype_di_di;
15375 break;
15376
15377 default:
15378 gcc_unreachable ();
15379 }
15380
15381 /* Override for comparisons. */
15382 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
15383 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
15384 type = v4si_ftype_v4sf_v4sf;
15385
15386 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
15387 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
15388 type = v2di_ftype_v2df_v2df;
15389
15390 def_builtin (d->mask, d->name, type, d->code);
15391 }
15392
15393 /* Add all builtins that are more or less simple operations on 1 operand. */
15394 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15395 {
15396 enum machine_mode mode;
15397 tree type;
15398
15399 if (d->name == 0)
15400 continue;
15401 mode = insn_data[d->icode].operand[1].mode;
15402
15403 switch (mode)
15404 {
15405 case V16QImode:
15406 type = v16qi_ftype_v16qi;
15407 break;
15408 case V8HImode:
15409 type = v8hi_ftype_v8hi;
15410 break;
15411 case V4SImode:
15412 type = v4si_ftype_v4si;
15413 break;
15414 case V2DFmode:
15415 type = v2df_ftype_v2df;
15416 break;
15417 case V4SFmode:
15418 type = v4sf_ftype_v4sf;
15419 break;
15420 case V8QImode:
15421 type = v8qi_ftype_v8qi;
15422 break;
15423 case V4HImode:
15424 type = v4hi_ftype_v4hi;
15425 break;
15426 case V2SImode:
15427 type = v2si_ftype_v2si;
15428 break;
15429
15430 default:
15431 abort ();
15432 }
15433
15434 def_builtin (d->mask, d->name, type, d->code);
15435 }
15436
15437 /* Add the remaining MMX insns with somewhat more complicated types. */
15438 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
15439 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
15440 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
15441 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
15442
15443 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
15444 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
15445 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
15446
15447 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
15448 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
15449
15450 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
15451 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
15452
15453 /* comi/ucomi insns. */
15454 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
15455 if (d->mask == MASK_SSE2)
15456 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
15457 else
15458 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
15459
15460 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
15461 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
15462 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
15463
15464 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
15465 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
15466 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
15467 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
15468 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
15469 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
15470 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
15471 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
15472 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
15473 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
15474 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
15475
15476 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
15477
15478 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
15479 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
15480
15481 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
15482 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
15483 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
15484 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
15485
15486 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
15487 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
15488 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
15489 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
15490
15491 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
15492
15493 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
15494
15495 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
15496 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
15497 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
15498 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
15499 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
15500 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
15501
15502 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
15503
15504 /* Original 3DNow! */
15505 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
15506 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
15507 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
15508 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
15509 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
15510 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
15511 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
15512 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
15513 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
15514 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
15515 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
15516 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
15517 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
15518 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
15519 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
15520 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
15521 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
15522 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
15523 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
15524 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
15525
15526 /* 3DNow! extension as used in the Athlon CPU. */
15527 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
15528 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
15529 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
15530 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
15531 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
15532 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
15533
15534 /* SSE2 */
15535 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
15536
15537 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
15538 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
15539
15540 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
15541 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
15542
15543 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
15544 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
15545 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
15546 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
15547 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
15548
15549 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
15550 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
15551 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
15552 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
15553
15554 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
15555 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
15556
15557 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
15558
15559 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
15560 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
15561
15562 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
15563 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
15564 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
15565 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
15566 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
15567
15568 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
15569
15570 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
15571 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
15572 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
15573 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
15574
15575 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
15576 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
15577 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
15578
15579 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
15580 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
15581 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
15582 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
15583
15584 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
15585 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
15586 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
15587
15588 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
15589 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
15590
15591 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
15592 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
15593
15594 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
15595 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
15596 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
15597
15598 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
15599 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
15600 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
15601
15602 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
15603 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
15604
15605 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
15606 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
15607 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
15608 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
15609
15610 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
15611 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
15612 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
15613 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
15614
15615 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
15616 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
15617
15618 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
15619
15620 /* Prescott New Instructions. */
15621 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
15622 void_ftype_pcvoid_unsigned_unsigned,
15623 IX86_BUILTIN_MONITOR);
15624 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
15625 void_ftype_unsigned_unsigned,
15626 IX86_BUILTIN_MWAIT);
15627 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
15628 v4sf_ftype_v4sf,
15629 IX86_BUILTIN_MOVSHDUP);
15630 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
15631 v4sf_ftype_v4sf,
15632 IX86_BUILTIN_MOVSLDUP);
15633 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
15634 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
15635
15636 /* SSSE3. */
15637 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
15638 v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
15639 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
15640 IX86_BUILTIN_PALIGNR);
15641
15642 /* Access to the vec_init patterns. */
15643 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
15644 integer_type_node, NULL_TREE);
15645 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
15646 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
15647
15648 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
15649 short_integer_type_node,
15650 short_integer_type_node,
15651 short_integer_type_node, NULL_TREE);
15652 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
15653 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
15654
15655 ftype = build_function_type_list (V8QI_type_node, char_type_node,
15656 char_type_node, char_type_node,
15657 char_type_node, char_type_node,
15658 char_type_node, char_type_node,
15659 char_type_node, NULL_TREE);
15660 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
15661 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
15662
15663 /* Access to the vec_extract patterns. */
15664 ftype = build_function_type_list (double_type_node, V2DF_type_node,
15665 integer_type_node, NULL_TREE);
15666 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
15667 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
15668
15669 ftype = build_function_type_list (long_long_integer_type_node,
15670 V2DI_type_node, integer_type_node,
15671 NULL_TREE);
15672 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
15673 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
15674
15675 ftype = build_function_type_list (float_type_node, V4SF_type_node,
15676 integer_type_node, NULL_TREE);
15677 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
15678 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
15679
15680 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
15681 integer_type_node, NULL_TREE);
15682 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
15683 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
15684
15685 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
15686 integer_type_node, NULL_TREE);
15687 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
15688 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
15689
15690 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
15691 integer_type_node, NULL_TREE);
15692 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
15693 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
15694
15695 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
15696 integer_type_node, NULL_TREE);
15697 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
15698 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
15699
15700 /* Access to the vec_set patterns. */
15701 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
15702 intHI_type_node,
15703 integer_type_node, NULL_TREE);
15704 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
15705 ftype, IX86_BUILTIN_VEC_SET_V8HI);
15706
15707 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
15708 intHI_type_node,
15709 integer_type_node, NULL_TREE);
15710 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
15711 ftype, IX86_BUILTIN_VEC_SET_V4HI);
15712 }
15713
15714 /* Errors in the source file can cause expand_expr to return const0_rtx
15715 where we expect a vector. To avoid crashing, use one of the vector
15716 clear instructions. */
15717 static rtx
15718 safe_vector_operand (rtx x, enum machine_mode mode)
15719 {
15720 if (x == const0_rtx)
15721 x = CONST0_RTX (mode);
15722 return x;
15723 }
15724
15725 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
15726
15727 static rtx
15728 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
15729 {
15730 rtx pat, xops[3];
15731 tree arg0 = TREE_VALUE (arglist);
15732 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15733 rtx op0 = expand_normal (arg0);
15734 rtx op1 = expand_normal (arg1);
15735 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15736 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15737 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
15738
15739 if (VECTOR_MODE_P (mode0))
15740 op0 = safe_vector_operand (op0, mode0);
15741 if (VECTOR_MODE_P (mode1))
15742 op1 = safe_vector_operand (op1, mode1);
15743
15744 if (optimize || !target
15745 || GET_MODE (target) != tmode
15746 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15747 target = gen_reg_rtx (tmode);
15748
15749 if (GET_MODE (op1) == SImode && mode1 == TImode)
15750 {
15751 rtx x = gen_reg_rtx (V4SImode);
15752 emit_insn (gen_sse2_loadd (x, op1));
15753 op1 = gen_lowpart (TImode, x);
15754 }
15755
15756 /* The insn must want input operands in the same modes as the
15757 result. */
15758 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
15759 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
15760
15761 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
15762 op0 = copy_to_mode_reg (mode0, op0);
15763 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
15764 op1 = copy_to_mode_reg (mode1, op1);
15765
15766 /* ??? Using ix86_fixup_binary_operands is problematic when
15767 we've got mismatched modes. Fake it. */
15768
15769 xops[0] = target;
15770 xops[1] = op0;
15771 xops[2] = op1;
15772
15773 if (tmode == mode0 && tmode == mode1)
15774 {
15775 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
15776 op0 = xops[1];
15777 op1 = xops[2];
15778 }
15779 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
15780 {
15781 op0 = force_reg (mode0, op0);
15782 op1 = force_reg (mode1, op1);
15783 target = gen_reg_rtx (tmode);
15784 }
15785
15786 pat = GEN_FCN (icode) (target, op0, op1);
15787 if (! pat)
15788 return 0;
15789 emit_insn (pat);
15790 return target;
15791 }
15792
15793 /* Subroutine of ix86_expand_builtin to take care of stores. */
15794
15795 static rtx
15796 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
15797 {
15798 rtx pat;
15799 tree arg0 = TREE_VALUE (arglist);
15800 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15801 rtx op0 = expand_normal (arg0);
15802 rtx op1 = expand_normal (arg1);
15803 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
15804 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
15805
15806 if (VECTOR_MODE_P (mode1))
15807 op1 = safe_vector_operand (op1, mode1);
15808
15809 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15810 op1 = copy_to_mode_reg (mode1, op1);
15811
15812 pat = GEN_FCN (icode) (op0, op1);
15813 if (pat)
15814 emit_insn (pat);
15815 return 0;
15816 }
15817
15818 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
15819
15820 static rtx
15821 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
15822 rtx target, int do_load)
15823 {
15824 rtx pat;
15825 tree arg0 = TREE_VALUE (arglist);
15826 rtx op0 = expand_normal (arg0);
15827 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15828 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15829
15830 if (optimize || !target
15831 || GET_MODE (target) != tmode
15832 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15833 target = gen_reg_rtx (tmode);
15834 if (do_load)
15835 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15836 else
15837 {
15838 if (VECTOR_MODE_P (mode0))
15839 op0 = safe_vector_operand (op0, mode0);
15840
15841 if ((optimize && !register_operand (op0, mode0))
15842 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15843 op0 = copy_to_mode_reg (mode0, op0);
15844 }
15845
15846 pat = GEN_FCN (icode) (target, op0);
15847 if (! pat)
15848 return 0;
15849 emit_insn (pat);
15850 return target;
15851 }
15852
15853 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
15854 sqrtss, rsqrtss, rcpss. */
15855
15856 static rtx
15857 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
15858 {
15859 rtx pat;
15860 tree arg0 = TREE_VALUE (arglist);
15861 rtx op1, op0 = expand_normal (arg0);
15862 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15863 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15864
15865 if (optimize || !target
15866 || GET_MODE (target) != tmode
15867 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15868 target = gen_reg_rtx (tmode);
15869
15870 if (VECTOR_MODE_P (mode0))
15871 op0 = safe_vector_operand (op0, mode0);
15872
15873 if ((optimize && !register_operand (op0, mode0))
15874 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15875 op0 = copy_to_mode_reg (mode0, op0);
15876
15877 op1 = op0;
15878 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
15879 op1 = copy_to_mode_reg (mode0, op1);
15880
15881 pat = GEN_FCN (icode) (target, op0, op1);
15882 if (! pat)
15883 return 0;
15884 emit_insn (pat);
15885 return target;
15886 }
15887
15888 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
15889
15890 static rtx
15891 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
15892 rtx target)
15893 {
15894 rtx pat;
15895 tree arg0 = TREE_VALUE (arglist);
15896 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15897 rtx op0 = expand_normal (arg0);
15898 rtx op1 = expand_normal (arg1);
15899 rtx op2;
15900 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
15901 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
15902 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
15903 enum rtx_code comparison = d->comparison;
15904
15905 if (VECTOR_MODE_P (mode0))
15906 op0 = safe_vector_operand (op0, mode0);
15907 if (VECTOR_MODE_P (mode1))
15908 op1 = safe_vector_operand (op1, mode1);
15909
15910 /* Swap operands if we have a comparison that isn't available in
15911 hardware. */
15912 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15913 {
15914 rtx tmp = gen_reg_rtx (mode1);
15915 emit_move_insn (tmp, op1);
15916 op1 = op0;
15917 op0 = tmp;
15918 }
15919
15920 if (optimize || !target
15921 || GET_MODE (target) != tmode
15922 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
15923 target = gen_reg_rtx (tmode);
15924
15925 if ((optimize && !register_operand (op0, mode0))
15926 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
15927 op0 = copy_to_mode_reg (mode0, op0);
15928 if ((optimize && !register_operand (op1, mode1))
15929 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
15930 op1 = copy_to_mode_reg (mode1, op1);
15931
15932 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15933 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
15934 if (! pat)
15935 return 0;
15936 emit_insn (pat);
15937 return target;
15938 }
15939
15940 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
15941
15942 static rtx
15943 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
15944 rtx target)
15945 {
15946 rtx pat;
15947 tree arg0 = TREE_VALUE (arglist);
15948 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15949 rtx op0 = expand_normal (arg0);
15950 rtx op1 = expand_normal (arg1);
15951 rtx op2;
15952 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
15953 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
15954 enum rtx_code comparison = d->comparison;
15955
15956 if (VECTOR_MODE_P (mode0))
15957 op0 = safe_vector_operand (op0, mode0);
15958 if (VECTOR_MODE_P (mode1))
15959 op1 = safe_vector_operand (op1, mode1);
15960
15961 /* Swap operands if we have a comparison that isn't available in
15962 hardware. */
15963 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15964 {
15965 rtx tmp = op1;
15966 op1 = op0;
15967 op0 = tmp;
15968 }
15969
15970 target = gen_reg_rtx (SImode);
15971 emit_move_insn (target, const0_rtx);
15972 target = gen_rtx_SUBREG (QImode, target, 0);
15973
15974 if ((optimize && !register_operand (op0, mode0))
15975 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
15976 op0 = copy_to_mode_reg (mode0, op0);
15977 if ((optimize && !register_operand (op1, mode1))
15978 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
15979 op1 = copy_to_mode_reg (mode1, op1);
15980
15981 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15982 pat = GEN_FCN (d->icode) (op0, op1);
15983 if (! pat)
15984 return 0;
15985 emit_insn (pat);
15986 emit_insn (gen_rtx_SET (VOIDmode,
15987 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
15988 gen_rtx_fmt_ee (comparison, QImode,
15989 SET_DEST (pat),
15990 const0_rtx)));
15991
15992 return SUBREG_REG (target);
15993 }
15994
15995 /* Return the integer constant in ARG. Constrain it to be in the range
15996 of the subparts of VEC_TYPE; issue an error if not. */
15997
15998 static int
15999 get_element_number (tree vec_type, tree arg)
16000 {
16001 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
16002
16003 if (!host_integerp (arg, 1)
16004 || (elt = tree_low_cst (arg, 1), elt > max))
16005 {
16006 error ("selector must be an integer constant in the range 0..%wi", max);
16007 return 0;
16008 }
16009
16010 return elt;
16011 }
16012
16013 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16014 ix86_expand_vector_init. We DO have language-level syntax for this, in
16015 the form of (type){ init-list }. Except that since we can't place emms
16016 instructions from inside the compiler, we can't allow the use of MMX
16017 registers unless the user explicitly asks for it. So we do *not* define
16018 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
16019 we have builtins invoked by mmintrin.h that gives us license to emit
16020 these sorts of instructions. */
16021
16022 static rtx
16023 ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
16024 {
16025 enum machine_mode tmode = TYPE_MODE (type);
16026 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
16027 int i, n_elt = GET_MODE_NUNITS (tmode);
16028 rtvec v = rtvec_alloc (n_elt);
16029
16030 gcc_assert (VECTOR_MODE_P (tmode));
16031
16032 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
16033 {
16034 rtx x = expand_normal (TREE_VALUE (arglist));
16035 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
16036 }
16037
16038 gcc_assert (arglist == NULL);
16039
16040 if (!target || !register_operand (target, tmode))
16041 target = gen_reg_rtx (tmode);
16042
16043 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
16044 return target;
16045 }
16046
16047 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16048 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
16049 had a language-level syntax for referencing vector elements. */
16050
16051 static rtx
16052 ix86_expand_vec_ext_builtin (tree arglist, rtx target)
16053 {
16054 enum machine_mode tmode, mode0;
16055 tree arg0, arg1;
16056 int elt;
16057 rtx op0;
16058
16059 arg0 = TREE_VALUE (arglist);
16060 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16061
16062 op0 = expand_normal (arg0);
16063 elt = get_element_number (TREE_TYPE (arg0), arg1);
16064
16065 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16066 mode0 = TYPE_MODE (TREE_TYPE (arg0));
16067 gcc_assert (VECTOR_MODE_P (mode0));
16068
16069 op0 = force_reg (mode0, op0);
16070
16071 if (optimize || !target || !register_operand (target, tmode))
16072 target = gen_reg_rtx (tmode);
16073
16074 ix86_expand_vector_extract (true, target, op0, elt);
16075
16076 return target;
16077 }
16078
16079 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16080 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
16081 a language-level syntax for referencing vector elements. */
16082
16083 static rtx
16084 ix86_expand_vec_set_builtin (tree arglist)
16085 {
16086 enum machine_mode tmode, mode1;
16087 tree arg0, arg1, arg2;
16088 int elt;
16089 rtx op0, op1;
16090
16091 arg0 = TREE_VALUE (arglist);
16092 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16093 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16094
16095 tmode = TYPE_MODE (TREE_TYPE (arg0));
16096 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16097 gcc_assert (VECTOR_MODE_P (tmode));
16098
16099 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
16100 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
16101 elt = get_element_number (TREE_TYPE (arg0), arg2);
16102
16103 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
16104 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
16105
16106 op0 = force_reg (tmode, op0);
16107 op1 = force_reg (mode1, op1);
16108
16109 ix86_expand_vector_set (true, op0, op1, elt);
16110
16111 return op0;
16112 }
16113
16114 /* Expand an expression EXP that calls a built-in function,
16115 with result going to TARGET if that's convenient
16116 (and in mode MODE if that's convenient).
16117 SUBTARGET may be used as the target for computing one of EXP's operands.
16118 IGNORE is nonzero if the value is to be ignored. */
16119
16120 static rtx
16121 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
16122 enum machine_mode mode ATTRIBUTE_UNUSED,
16123 int ignore ATTRIBUTE_UNUSED)
16124 {
16125 const struct builtin_description *d;
16126 size_t i;
16127 enum insn_code icode;
16128 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
16129 tree arglist = TREE_OPERAND (exp, 1);
16130 tree arg0, arg1, arg2;
16131 rtx op0, op1, op2, pat;
16132 enum machine_mode tmode, mode0, mode1, mode2, mode3;
16133 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
16134
16135 switch (fcode)
16136 {
16137 case IX86_BUILTIN_EMMS:
16138 emit_insn (gen_mmx_emms ());
16139 return 0;
16140
16141 case IX86_BUILTIN_SFENCE:
16142 emit_insn (gen_sse_sfence ());
16143 return 0;
16144
16145 case IX86_BUILTIN_MASKMOVQ:
16146 case IX86_BUILTIN_MASKMOVDQU:
16147 icode = (fcode == IX86_BUILTIN_MASKMOVQ
16148 ? CODE_FOR_mmx_maskmovq
16149 : CODE_FOR_sse2_maskmovdqu);
16150 /* Note the arg order is different from the operand order. */
16151 arg1 = TREE_VALUE (arglist);
16152 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
16153 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16154 op0 = expand_normal (arg0);
16155 op1 = expand_normal (arg1);
16156 op2 = expand_normal (arg2);
16157 mode0 = insn_data[icode].operand[0].mode;
16158 mode1 = insn_data[icode].operand[1].mode;
16159 mode2 = insn_data[icode].operand[2].mode;
16160
16161 op0 = force_reg (Pmode, op0);
16162 op0 = gen_rtx_MEM (mode1, op0);
16163
16164 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16165 op0 = copy_to_mode_reg (mode0, op0);
16166 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
16167 op1 = copy_to_mode_reg (mode1, op1);
16168 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
16169 op2 = copy_to_mode_reg (mode2, op2);
16170 pat = GEN_FCN (icode) (op0, op1, op2);
16171 if (! pat)
16172 return 0;
16173 emit_insn (pat);
16174 return 0;
16175
16176 case IX86_BUILTIN_SQRTSS:
16177 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
16178 case IX86_BUILTIN_RSQRTSS:
16179 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
16180 case IX86_BUILTIN_RCPSS:
16181 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
16182
16183 case IX86_BUILTIN_LOADUPS:
16184 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
16185
16186 case IX86_BUILTIN_STOREUPS:
16187 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
16188
16189 case IX86_BUILTIN_LOADHPS:
16190 case IX86_BUILTIN_LOADLPS:
16191 case IX86_BUILTIN_LOADHPD:
16192 case IX86_BUILTIN_LOADLPD:
16193 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
16194 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
16195 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
16196 : CODE_FOR_sse2_loadlpd);
16197 arg0 = TREE_VALUE (arglist);
16198 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16199 op0 = expand_normal (arg0);
16200 op1 = expand_normal (arg1);
16201 tmode = insn_data[icode].operand[0].mode;
16202 mode0 = insn_data[icode].operand[1].mode;
16203 mode1 = insn_data[icode].operand[2].mode;
16204
16205 op0 = force_reg (mode0, op0);
16206 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
16207 if (optimize || target == 0
16208 || GET_MODE (target) != tmode
16209 || !register_operand (target, tmode))
16210 target = gen_reg_rtx (tmode);
16211 pat = GEN_FCN (icode) (target, op0, op1);
16212 if (! pat)
16213 return 0;
16214 emit_insn (pat);
16215 return target;
16216
16217 case IX86_BUILTIN_STOREHPS:
16218 case IX86_BUILTIN_STORELPS:
16219 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
16220 : CODE_FOR_sse_storelps);
16221 arg0 = TREE_VALUE (arglist);
16222 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16223 op0 = expand_normal (arg0);
16224 op1 = expand_normal (arg1);
16225 mode0 = insn_data[icode].operand[0].mode;
16226 mode1 = insn_data[icode].operand[1].mode;
16227
16228 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16229 op1 = force_reg (mode1, op1);
16230
16231 pat = GEN_FCN (icode) (op0, op1);
16232 if (! pat)
16233 return 0;
16234 emit_insn (pat);
16235 return const0_rtx;
16236
16237 case IX86_BUILTIN_MOVNTPS:
16238 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
16239 case IX86_BUILTIN_MOVNTQ:
16240 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
16241
16242 case IX86_BUILTIN_LDMXCSR:
16243 op0 = expand_normal (TREE_VALUE (arglist));
16244 target = assign_386_stack_local (SImode, SLOT_TEMP);
16245 emit_move_insn (target, op0);
16246 emit_insn (gen_sse_ldmxcsr (target));
16247 return 0;
16248
16249 case IX86_BUILTIN_STMXCSR:
16250 target = assign_386_stack_local (SImode, SLOT_TEMP);
16251 emit_insn (gen_sse_stmxcsr (target));
16252 return copy_to_mode_reg (SImode, target);
16253
16254 case IX86_BUILTIN_SHUFPS:
16255 case IX86_BUILTIN_SHUFPD:
16256 icode = (fcode == IX86_BUILTIN_SHUFPS
16257 ? CODE_FOR_sse_shufps
16258 : CODE_FOR_sse2_shufpd);
16259 arg0 = TREE_VALUE (arglist);
16260 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16261 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16262 op0 = expand_normal (arg0);
16263 op1 = expand_normal (arg1);
16264 op2 = expand_normal (arg2);
16265 tmode = insn_data[icode].operand[0].mode;
16266 mode0 = insn_data[icode].operand[1].mode;
16267 mode1 = insn_data[icode].operand[2].mode;
16268 mode2 = insn_data[icode].operand[3].mode;
16269
16270 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16271 op0 = copy_to_mode_reg (mode0, op0);
16272 if ((optimize && !register_operand (op1, mode1))
16273 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
16274 op1 = copy_to_mode_reg (mode1, op1);
16275 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16276 {
16277 /* @@@ better error message */
16278 error ("mask must be an immediate");
16279 return gen_reg_rtx (tmode);
16280 }
16281 if (optimize || target == 0
16282 || GET_MODE (target) != tmode
16283 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16284 target = gen_reg_rtx (tmode);
16285 pat = GEN_FCN (icode) (target, op0, op1, op2);
16286 if (! pat)
16287 return 0;
16288 emit_insn (pat);
16289 return target;
16290
16291 case IX86_BUILTIN_PSHUFW:
16292 case IX86_BUILTIN_PSHUFD:
16293 case IX86_BUILTIN_PSHUFHW:
16294 case IX86_BUILTIN_PSHUFLW:
16295 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
16296 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
16297 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
16298 : CODE_FOR_mmx_pshufw);
16299 arg0 = TREE_VALUE (arglist);
16300 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16301 op0 = expand_normal (arg0);
16302 op1 = expand_normal (arg1);
16303 tmode = insn_data[icode].operand[0].mode;
16304 mode1 = insn_data[icode].operand[1].mode;
16305 mode2 = insn_data[icode].operand[2].mode;
16306
16307 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16308 op0 = copy_to_mode_reg (mode1, op0);
16309 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16310 {
16311 /* @@@ better error message */
16312 error ("mask must be an immediate");
16313 return const0_rtx;
16314 }
16315 if (target == 0
16316 || GET_MODE (target) != tmode
16317 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16318 target = gen_reg_rtx (tmode);
16319 pat = GEN_FCN (icode) (target, op0, op1);
16320 if (! pat)
16321 return 0;
16322 emit_insn (pat);
16323 return target;
16324
16325 case IX86_BUILTIN_PSLLDQI128:
16326 case IX86_BUILTIN_PSRLDQI128:
16327 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
16328 : CODE_FOR_sse2_lshrti3);
16329 arg0 = TREE_VALUE (arglist);
16330 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16331 op0 = expand_normal (arg0);
16332 op1 = expand_normal (arg1);
16333 tmode = insn_data[icode].operand[0].mode;
16334 mode1 = insn_data[icode].operand[1].mode;
16335 mode2 = insn_data[icode].operand[2].mode;
16336
16337 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16338 {
16339 op0 = copy_to_reg (op0);
16340 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16341 }
16342 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16343 {
16344 error ("shift must be an immediate");
16345 return const0_rtx;
16346 }
16347 target = gen_reg_rtx (V2DImode);
16348 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
16349 if (! pat)
16350 return 0;
16351 emit_insn (pat);
16352 return target;
16353
16354 case IX86_BUILTIN_FEMMS:
16355 emit_insn (gen_mmx_femms ());
16356 return NULL_RTX;
16357
16358 case IX86_BUILTIN_PAVGUSB:
16359 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
16360
16361 case IX86_BUILTIN_PF2ID:
16362 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
16363
16364 case IX86_BUILTIN_PFACC:
16365 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
16366
16367 case IX86_BUILTIN_PFADD:
16368 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
16369
16370 case IX86_BUILTIN_PFCMPEQ:
16371 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
16372
16373 case IX86_BUILTIN_PFCMPGE:
16374 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
16375
16376 case IX86_BUILTIN_PFCMPGT:
16377 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
16378
16379 case IX86_BUILTIN_PFMAX:
16380 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
16381
16382 case IX86_BUILTIN_PFMIN:
16383 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
16384
16385 case IX86_BUILTIN_PFMUL:
16386 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
16387
16388 case IX86_BUILTIN_PFRCP:
16389 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
16390
16391 case IX86_BUILTIN_PFRCPIT1:
16392 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
16393
16394 case IX86_BUILTIN_PFRCPIT2:
16395 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
16396
16397 case IX86_BUILTIN_PFRSQIT1:
16398 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
16399
16400 case IX86_BUILTIN_PFRSQRT:
16401 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
16402
16403 case IX86_BUILTIN_PFSUB:
16404 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
16405
16406 case IX86_BUILTIN_PFSUBR:
16407 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
16408
16409 case IX86_BUILTIN_PI2FD:
16410 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
16411
16412 case IX86_BUILTIN_PMULHRW:
16413 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
16414
16415 case IX86_BUILTIN_PF2IW:
16416 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
16417
16418 case IX86_BUILTIN_PFNACC:
16419 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
16420
16421 case IX86_BUILTIN_PFPNACC:
16422 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
16423
16424 case IX86_BUILTIN_PI2FW:
16425 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
16426
16427 case IX86_BUILTIN_PSWAPDSI:
16428 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
16429
16430 case IX86_BUILTIN_PSWAPDSF:
16431 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
16432
16433 case IX86_BUILTIN_SQRTSD:
16434 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
16435 case IX86_BUILTIN_LOADUPD:
16436 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
16437 case IX86_BUILTIN_STOREUPD:
16438 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
16439
16440 case IX86_BUILTIN_MFENCE:
16441 emit_insn (gen_sse2_mfence ());
16442 return 0;
16443 case IX86_BUILTIN_LFENCE:
16444 emit_insn (gen_sse2_lfence ());
16445 return 0;
16446
16447 case IX86_BUILTIN_CLFLUSH:
16448 arg0 = TREE_VALUE (arglist);
16449 op0 = expand_normal (arg0);
16450 icode = CODE_FOR_sse2_clflush;
16451 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
16452 op0 = copy_to_mode_reg (Pmode, op0);
16453
16454 emit_insn (gen_sse2_clflush (op0));
16455 return 0;
16456
16457 case IX86_BUILTIN_MOVNTPD:
16458 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
16459 case IX86_BUILTIN_MOVNTDQ:
16460 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
16461 case IX86_BUILTIN_MOVNTI:
16462 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
16463
16464 case IX86_BUILTIN_LOADDQU:
16465 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
16466 case IX86_BUILTIN_STOREDQU:
16467 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
16468
16469 case IX86_BUILTIN_MONITOR:
16470 arg0 = TREE_VALUE (arglist);
16471 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16472 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16473 op0 = expand_normal (arg0);
16474 op1 = expand_normal (arg1);
16475 op2 = expand_normal (arg2);
16476 if (!REG_P (op0))
16477 op0 = copy_to_mode_reg (Pmode, op0);
16478 if (!REG_P (op1))
16479 op1 = copy_to_mode_reg (SImode, op1);
16480 if (!REG_P (op2))
16481 op2 = copy_to_mode_reg (SImode, op2);
16482 if (!TARGET_64BIT)
16483 emit_insn (gen_sse3_monitor (op0, op1, op2));
16484 else
16485 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
16486 return 0;
16487
16488 case IX86_BUILTIN_MWAIT:
16489 arg0 = TREE_VALUE (arglist);
16490 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16491 op0 = expand_normal (arg0);
16492 op1 = expand_normal (arg1);
16493 if (!REG_P (op0))
16494 op0 = copy_to_mode_reg (SImode, op0);
16495 if (!REG_P (op1))
16496 op1 = copy_to_mode_reg (SImode, op1);
16497 emit_insn (gen_sse3_mwait (op0, op1));
16498 return 0;
16499
16500 case IX86_BUILTIN_LDDQU:
16501 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
16502 target, 1);
16503
16504 case IX86_BUILTIN_PALIGNR:
16505 case IX86_BUILTIN_PALIGNR128:
16506 if (fcode == IX86_BUILTIN_PALIGNR)
16507 {
16508 icode = CODE_FOR_ssse3_palignrdi;
16509 mode = DImode;
16510 }
16511 else
16512 {
16513 icode = CODE_FOR_ssse3_palignrti;
16514 mode = V2DImode;
16515 }
16516 arg0 = TREE_VALUE (arglist);
16517 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16518 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16519 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
16520 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
16521 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
16522 tmode = insn_data[icode].operand[0].mode;
16523 mode1 = insn_data[icode].operand[1].mode;
16524 mode2 = insn_data[icode].operand[2].mode;
16525 mode3 = insn_data[icode].operand[3].mode;
16526
16527 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16528 {
16529 op0 = copy_to_reg (op0);
16530 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16531 }
16532 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16533 {
16534 op1 = copy_to_reg (op1);
16535 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
16536 }
16537 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
16538 {
16539 error ("shift must be an immediate");
16540 return const0_rtx;
16541 }
16542 target = gen_reg_rtx (mode);
16543 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
16544 op0, op1, op2);
16545 if (! pat)
16546 return 0;
16547 emit_insn (pat);
16548 return target;
16549
16550 case IX86_BUILTIN_VEC_INIT_V2SI:
16551 case IX86_BUILTIN_VEC_INIT_V4HI:
16552 case IX86_BUILTIN_VEC_INIT_V8QI:
16553 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
16554
16555 case IX86_BUILTIN_VEC_EXT_V2DF:
16556 case IX86_BUILTIN_VEC_EXT_V2DI:
16557 case IX86_BUILTIN_VEC_EXT_V4SF:
16558 case IX86_BUILTIN_VEC_EXT_V4SI:
16559 case IX86_BUILTIN_VEC_EXT_V8HI:
16560 case IX86_BUILTIN_VEC_EXT_V2SI:
16561 case IX86_BUILTIN_VEC_EXT_V4HI:
16562 return ix86_expand_vec_ext_builtin (arglist, target);
16563
16564 case IX86_BUILTIN_VEC_SET_V8HI:
16565 case IX86_BUILTIN_VEC_SET_V4HI:
16566 return ix86_expand_vec_set_builtin (arglist);
16567
16568 default:
16569 break;
16570 }
16571
16572 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16573 if (d->code == fcode)
16574 {
16575 /* Compares are treated specially. */
16576 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
16577 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
16578 || d->icode == CODE_FOR_sse2_maskcmpv2df3
16579 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
16580 return ix86_expand_sse_compare (d, arglist, target);
16581
16582 return ix86_expand_binop_builtin (d->icode, arglist, target);
16583 }
16584
16585 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16586 if (d->code == fcode)
16587 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
16588
16589 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
16590 if (d->code == fcode)
16591 return ix86_expand_sse_comi (d, arglist, target);
16592
16593 gcc_unreachable ();
16594 }
16595
16596 /* Store OPERAND to the memory after reload is completed. This means
16597 that we can't easily use assign_stack_local. */
16598 rtx
16599 ix86_force_to_memory (enum machine_mode mode, rtx operand)
16600 {
16601 rtx result;
16602
16603 gcc_assert (reload_completed);
16604 if (TARGET_RED_ZONE)
16605 {
16606 result = gen_rtx_MEM (mode,
16607 gen_rtx_PLUS (Pmode,
16608 stack_pointer_rtx,
16609 GEN_INT (-RED_ZONE_SIZE)));
16610 emit_move_insn (result, operand);
16611 }
16612 else if (!TARGET_RED_ZONE && TARGET_64BIT)
16613 {
16614 switch (mode)
16615 {
16616 case HImode:
16617 case SImode:
16618 operand = gen_lowpart (DImode, operand);
16619 /* FALLTHRU */
16620 case DImode:
16621 emit_insn (
16622 gen_rtx_SET (VOIDmode,
16623 gen_rtx_MEM (DImode,
16624 gen_rtx_PRE_DEC (DImode,
16625 stack_pointer_rtx)),
16626 operand));
16627 break;
16628 default:
16629 gcc_unreachable ();
16630 }
16631 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16632 }
16633 else
16634 {
16635 switch (mode)
16636 {
16637 case DImode:
16638 {
16639 rtx operands[2];
16640 split_di (&operand, 1, operands, operands + 1);
16641 emit_insn (
16642 gen_rtx_SET (VOIDmode,
16643 gen_rtx_MEM (SImode,
16644 gen_rtx_PRE_DEC (Pmode,
16645 stack_pointer_rtx)),
16646 operands[1]));
16647 emit_insn (
16648 gen_rtx_SET (VOIDmode,
16649 gen_rtx_MEM (SImode,
16650 gen_rtx_PRE_DEC (Pmode,
16651 stack_pointer_rtx)),
16652 operands[0]));
16653 }
16654 break;
16655 case HImode:
16656 /* Store HImodes as SImodes. */
16657 operand = gen_lowpart (SImode, operand);
16658 /* FALLTHRU */
16659 case SImode:
16660 emit_insn (
16661 gen_rtx_SET (VOIDmode,
16662 gen_rtx_MEM (GET_MODE (operand),
16663 gen_rtx_PRE_DEC (SImode,
16664 stack_pointer_rtx)),
16665 operand));
16666 break;
16667 default:
16668 gcc_unreachable ();
16669 }
16670 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16671 }
16672 return result;
16673 }
16674
16675 /* Free operand from the memory. */
16676 void
16677 ix86_free_from_memory (enum machine_mode mode)
16678 {
16679 if (!TARGET_RED_ZONE)
16680 {
16681 int size;
16682
16683 if (mode == DImode || TARGET_64BIT)
16684 size = 8;
16685 else
16686 size = 4;
16687 /* Use LEA to deallocate stack space. In peephole2 it will be converted
16688 to pop or add instruction if registers are available. */
16689 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16690 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
16691 GEN_INT (size))));
16692 }
16693 }
16694
16695 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
16696 QImode must go into class Q_REGS.
16697 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
16698 movdf to do mem-to-mem moves through integer regs. */
16699 enum reg_class
16700 ix86_preferred_reload_class (rtx x, enum reg_class class)
16701 {
16702 enum machine_mode mode = GET_MODE (x);
16703
16704 /* We're only allowed to return a subclass of CLASS. Many of the
16705 following checks fail for NO_REGS, so eliminate that early. */
16706 if (class == NO_REGS)
16707 return NO_REGS;
16708
16709 /* All classes can load zeros. */
16710 if (x == CONST0_RTX (mode))
16711 return class;
16712
16713 /* Force constants into memory if we are loading a (nonzero) constant into
16714 an MMX or SSE register. This is because there are no MMX/SSE instructions
16715 to load from a constant. */
16716 if (CONSTANT_P (x)
16717 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
16718 return NO_REGS;
16719
16720 /* Prefer SSE regs only, if we can use them for math. */
16721 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
16722 return SSE_CLASS_P (class) ? class : NO_REGS;
16723
16724 /* Floating-point constants need more complex checks. */
16725 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
16726 {
16727 /* General regs can load everything. */
16728 if (reg_class_subset_p (class, GENERAL_REGS))
16729 return class;
16730
16731 /* Floats can load 0 and 1 plus some others. Note that we eliminated
16732 zero above. We only want to wind up preferring 80387 registers if
16733 we plan on doing computation with them. */
16734 if (TARGET_80387
16735 && standard_80387_constant_p (x))
16736 {
16737 /* Limit class to non-sse. */
16738 if (class == FLOAT_SSE_REGS)
16739 return FLOAT_REGS;
16740 if (class == FP_TOP_SSE_REGS)
16741 return FP_TOP_REG;
16742 if (class == FP_SECOND_SSE_REGS)
16743 return FP_SECOND_REG;
16744 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
16745 return class;
16746 }
16747
16748 return NO_REGS;
16749 }
16750
16751 /* Generally when we see PLUS here, it's the function invariant
16752 (plus soft-fp const_int). Which can only be computed into general
16753 regs. */
16754 if (GET_CODE (x) == PLUS)
16755 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
16756
16757 /* QImode constants are easy to load, but non-constant QImode data
16758 must go into Q_REGS. */
16759 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
16760 {
16761 if (reg_class_subset_p (class, Q_REGS))
16762 return class;
16763 if (reg_class_subset_p (Q_REGS, class))
16764 return Q_REGS;
16765 return NO_REGS;
16766 }
16767
16768 return class;
16769 }
16770
16771 /* Discourage putting floating-point values in SSE registers unless
16772 SSE math is being used, and likewise for the 387 registers. */
16773 enum reg_class
16774 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
16775 {
16776 enum machine_mode mode = GET_MODE (x);
16777
16778 /* Restrict the output reload class to the register bank that we are doing
16779 math on. If we would like not to return a subset of CLASS, reject this
16780 alternative: if reload cannot do this, it will still use its choice. */
16781 mode = GET_MODE (x);
16782 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
16783 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
16784
16785 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
16786 {
16787 if (class == FP_TOP_SSE_REGS)
16788 return FP_TOP_REG;
16789 else if (class == FP_SECOND_SSE_REGS)
16790 return FP_SECOND_REG;
16791 else
16792 return FLOAT_CLASS_P (class) ? class : NO_REGS;
16793 }
16794
16795 return class;
16796 }
16797
16798 /* If we are copying between general and FP registers, we need a memory
16799 location. The same is true for SSE and MMX registers.
16800
16801 The macro can't work reliably when one of the CLASSES is class containing
16802 registers from multiple units (SSE, MMX, integer). We avoid this by never
16803 combining those units in single alternative in the machine description.
16804 Ensure that this constraint holds to avoid unexpected surprises.
16805
16806 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
16807 enforce these sanity checks. */
16808
16809 int
16810 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
16811 enum machine_mode mode, int strict)
16812 {
16813 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
16814 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
16815 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
16816 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
16817 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
16818 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
16819 {
16820 gcc_assert (!strict);
16821 return true;
16822 }
16823
16824 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
16825 return true;
16826
16827 /* ??? This is a lie. We do have moves between mmx/general, and for
16828 mmx/sse2. But by saying we need secondary memory we discourage the
16829 register allocator from using the mmx registers unless needed. */
16830 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
16831 return true;
16832
16833 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16834 {
16835 /* SSE1 doesn't have any direct moves from other classes. */
16836 if (!TARGET_SSE2)
16837 return true;
16838
16839 /* If the target says that inter-unit moves are more expensive
16840 than moving through memory, then don't generate them. */
16841 if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
16842 return true;
16843
16844 /* Between SSE and general, we have moves no larger than word size. */
16845 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
16846 return true;
16847
16848 /* ??? For the cost of one register reformat penalty, we could use
16849 the same instructions to move SFmode and DFmode data, but the
16850 relevant move patterns don't support those alternatives. */
16851 if (mode == SFmode || mode == DFmode)
16852 return true;
16853 }
16854
16855 return false;
16856 }
16857
16858 /* Return true if the registers in CLASS cannot represent the change from
16859 modes FROM to TO. */
16860
16861 bool
16862 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
16863 enum reg_class class)
16864 {
16865 if (from == to)
16866 return false;
16867
16868 /* x87 registers can't do subreg at all, as all values are reformatted
16869 to extended precision. */
16870 if (MAYBE_FLOAT_CLASS_P (class))
16871 return true;
16872
16873 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
16874 {
16875 /* Vector registers do not support QI or HImode loads. If we don't
16876 disallow a change to these modes, reload will assume it's ok to
16877 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
16878 the vec_dupv4hi pattern. */
16879 if (GET_MODE_SIZE (from) < 4)
16880 return true;
16881
16882 /* Vector registers do not support subreg with nonzero offsets, which
16883 are otherwise valid for integer registers. Since we can't see
16884 whether we have a nonzero offset from here, prohibit all
16885 nonparadoxical subregs changing size. */
16886 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
16887 return true;
16888 }
16889
16890 return false;
16891 }
16892
16893 /* Return the cost of moving data from a register in class CLASS1 to
16894 one in class CLASS2.
16895
16896 It is not required that the cost always equal 2 when FROM is the same as TO;
16897 on some machines it is expensive to move between registers if they are not
16898 general registers. */
16899
16900 int
16901 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
16902 enum reg_class class2)
16903 {
16904 /* In case we require secondary memory, compute cost of the store followed
16905 by load. In order to avoid bad register allocation choices, we need
16906 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
16907
16908 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
16909 {
16910 int cost = 1;
16911
16912 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
16913 MEMORY_MOVE_COST (mode, class1, 1));
16914 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
16915 MEMORY_MOVE_COST (mode, class2, 1));
16916
16917 /* In case of copying from general_purpose_register we may emit multiple
16918 stores followed by single load causing memory size mismatch stall.
16919 Count this as arbitrarily high cost of 20. */
16920 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
16921 cost += 20;
16922
16923 /* In the case of FP/MMX moves, the registers actually overlap, and we
16924 have to switch modes in order to treat them differently. */
16925 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
16926 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
16927 cost += 20;
16928
16929 return cost;
16930 }
16931
16932 /* Moves between SSE/MMX and integer unit are expensive. */
16933 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
16934 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16935 return ix86_cost->mmxsse_to_integer;
16936 if (MAYBE_FLOAT_CLASS_P (class1))
16937 return ix86_cost->fp_move;
16938 if (MAYBE_SSE_CLASS_P (class1))
16939 return ix86_cost->sse_move;
16940 if (MAYBE_MMX_CLASS_P (class1))
16941 return ix86_cost->mmx_move;
16942 return 2;
16943 }
16944
16945 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
16946
16947 bool
16948 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
16949 {
16950 /* Flags and only flags can only hold CCmode values. */
16951 if (CC_REGNO_P (regno))
16952 return GET_MODE_CLASS (mode) == MODE_CC;
16953 if (GET_MODE_CLASS (mode) == MODE_CC
16954 || GET_MODE_CLASS (mode) == MODE_RANDOM
16955 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
16956 return 0;
16957 if (FP_REGNO_P (regno))
16958 return VALID_FP_MODE_P (mode);
16959 if (SSE_REGNO_P (regno))
16960 {
16961 /* We implement the move patterns for all vector modes into and
16962 out of SSE registers, even when no operation instructions
16963 are available. */
16964 return (VALID_SSE_REG_MODE (mode)
16965 || VALID_SSE2_REG_MODE (mode)
16966 || VALID_MMX_REG_MODE (mode)
16967 || VALID_MMX_REG_MODE_3DNOW (mode));
16968 }
16969 if (MMX_REGNO_P (regno))
16970 {
16971 /* We implement the move patterns for 3DNOW modes even in MMX mode,
16972 so if the register is available at all, then we can move data of
16973 the given mode into or out of it. */
16974 return (VALID_MMX_REG_MODE (mode)
16975 || VALID_MMX_REG_MODE_3DNOW (mode));
16976 }
16977
16978 if (mode == QImode)
16979 {
16980 /* Take care for QImode values - they can be in non-QI regs,
16981 but then they do cause partial register stalls. */
16982 if (regno < 4 || TARGET_64BIT)
16983 return 1;
16984 if (!TARGET_PARTIAL_REG_STALL)
16985 return 1;
16986 return reload_in_progress || reload_completed;
16987 }
16988 /* We handle both integer and floats in the general purpose registers. */
16989 else if (VALID_INT_MODE_P (mode))
16990 return 1;
16991 else if (VALID_FP_MODE_P (mode))
16992 return 1;
16993 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
16994 on to use that value in smaller contexts, this can easily force a
16995 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
16996 supporting DImode, allow it. */
16997 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
16998 return 1;
16999
17000 return 0;
17001 }
17002
17003 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
17004 tieable integer mode. */
17005
17006 static bool
17007 ix86_tieable_integer_mode_p (enum machine_mode mode)
17008 {
17009 switch (mode)
17010 {
17011 case HImode:
17012 case SImode:
17013 return true;
17014
17015 case QImode:
17016 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
17017
17018 case DImode:
17019 return TARGET_64BIT;
17020
17021 default:
17022 return false;
17023 }
17024 }
17025
17026 /* Return true if MODE1 is accessible in a register that can hold MODE2
17027 without copying. That is, all register classes that can hold MODE2
17028 can also hold MODE1. */
17029
17030 bool
17031 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
17032 {
17033 if (mode1 == mode2)
17034 return true;
17035
17036 if (ix86_tieable_integer_mode_p (mode1)
17037 && ix86_tieable_integer_mode_p (mode2))
17038 return true;
17039
17040 /* MODE2 being XFmode implies fp stack or general regs, which means we
17041 can tie any smaller floating point modes to it. Note that we do not
17042 tie this with TFmode. */
17043 if (mode2 == XFmode)
17044 return mode1 == SFmode || mode1 == DFmode;
17045
17046 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
17047 that we can tie it with SFmode. */
17048 if (mode2 == DFmode)
17049 return mode1 == SFmode;
17050
17051 /* If MODE2 is only appropriate for an SSE register, then tie with
17052 any other mode acceptable to SSE registers. */
17053 if (GET_MODE_SIZE (mode2) >= 8
17054 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
17055 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
17056
17057 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
17058 with any other mode acceptable to MMX registers. */
17059 if (GET_MODE_SIZE (mode2) == 8
17060 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
17061 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
17062
17063 return false;
17064 }
17065
17066 /* Return the cost of moving data of mode M between a
17067 register and memory. A value of 2 is the default; this cost is
17068 relative to those in `REGISTER_MOVE_COST'.
17069
17070 If moving between registers and memory is more expensive than
17071 between two registers, you should define this macro to express the
17072 relative cost.
17073
17074 Model also increased moving costs of QImode registers in non
17075 Q_REGS classes.
17076 */
17077 int
17078 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
17079 {
17080 if (FLOAT_CLASS_P (class))
17081 {
17082 int index;
17083 switch (mode)
17084 {
17085 case SFmode:
17086 index = 0;
17087 break;
17088 case DFmode:
17089 index = 1;
17090 break;
17091 case XFmode:
17092 index = 2;
17093 break;
17094 default:
17095 return 100;
17096 }
17097 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
17098 }
17099 if (SSE_CLASS_P (class))
17100 {
17101 int index;
17102 switch (GET_MODE_SIZE (mode))
17103 {
17104 case 4:
17105 index = 0;
17106 break;
17107 case 8:
17108 index = 1;
17109 break;
17110 case 16:
17111 index = 2;
17112 break;
17113 default:
17114 return 100;
17115 }
17116 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
17117 }
17118 if (MMX_CLASS_P (class))
17119 {
17120 int index;
17121 switch (GET_MODE_SIZE (mode))
17122 {
17123 case 4:
17124 index = 0;
17125 break;
17126 case 8:
17127 index = 1;
17128 break;
17129 default:
17130 return 100;
17131 }
17132 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
17133 }
17134 switch (GET_MODE_SIZE (mode))
17135 {
17136 case 1:
17137 if (in)
17138 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
17139 : ix86_cost->movzbl_load);
17140 else
17141 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
17142 : ix86_cost->int_store[0] + 4);
17143 break;
17144 case 2:
17145 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
17146 default:
17147 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
17148 if (mode == TFmode)
17149 mode = XFmode;
17150 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
17151 * (((int) GET_MODE_SIZE (mode)
17152 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
17153 }
17154 }
17155
17156 /* Compute a (partial) cost for rtx X. Return true if the complete
17157 cost has been computed, and false if subexpressions should be
17158 scanned. In either case, *TOTAL contains the cost result. */
17159
17160 static bool
17161 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
17162 {
17163 enum machine_mode mode = GET_MODE (x);
17164
17165 switch (code)
17166 {
17167 case CONST_INT:
17168 case CONST:
17169 case LABEL_REF:
17170 case SYMBOL_REF:
17171 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
17172 *total = 3;
17173 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
17174 *total = 2;
17175 else if (flag_pic && SYMBOLIC_CONST (x)
17176 && (!TARGET_64BIT
17177 || (!GET_CODE (x) != LABEL_REF
17178 && (GET_CODE (x) != SYMBOL_REF
17179 || !SYMBOL_REF_LOCAL_P (x)))))
17180 *total = 1;
17181 else
17182 *total = 0;
17183 return true;
17184
17185 case CONST_DOUBLE:
17186 if (mode == VOIDmode)
17187 *total = 0;
17188 else
17189 switch (standard_80387_constant_p (x))
17190 {
17191 case 1: /* 0.0 */
17192 *total = 1;
17193 break;
17194 default: /* Other constants */
17195 *total = 2;
17196 break;
17197 case 0:
17198 case -1:
17199 /* Start with (MEM (SYMBOL_REF)), since that's where
17200 it'll probably end up. Add a penalty for size. */
17201 *total = (COSTS_N_INSNS (1)
17202 + (flag_pic != 0 && !TARGET_64BIT)
17203 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
17204 break;
17205 }
17206 return true;
17207
17208 case ZERO_EXTEND:
17209 /* The zero extensions is often completely free on x86_64, so make
17210 it as cheap as possible. */
17211 if (TARGET_64BIT && mode == DImode
17212 && GET_MODE (XEXP (x, 0)) == SImode)
17213 *total = 1;
17214 else if (TARGET_ZERO_EXTEND_WITH_AND)
17215 *total = ix86_cost->add;
17216 else
17217 *total = ix86_cost->movzx;
17218 return false;
17219
17220 case SIGN_EXTEND:
17221 *total = ix86_cost->movsx;
17222 return false;
17223
17224 case ASHIFT:
17225 if (GET_CODE (XEXP (x, 1)) == CONST_INT
17226 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
17227 {
17228 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17229 if (value == 1)
17230 {
17231 *total = ix86_cost->add;
17232 return false;
17233 }
17234 if ((value == 2 || value == 3)
17235 && ix86_cost->lea <= ix86_cost->shift_const)
17236 {
17237 *total = ix86_cost->lea;
17238 return false;
17239 }
17240 }
17241 /* FALLTHRU */
17242
17243 case ROTATE:
17244 case ASHIFTRT:
17245 case LSHIFTRT:
17246 case ROTATERT:
17247 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
17248 {
17249 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17250 {
17251 if (INTVAL (XEXP (x, 1)) > 32)
17252 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
17253 else
17254 *total = ix86_cost->shift_const * 2;
17255 }
17256 else
17257 {
17258 if (GET_CODE (XEXP (x, 1)) == AND)
17259 *total = ix86_cost->shift_var * 2;
17260 else
17261 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
17262 }
17263 }
17264 else
17265 {
17266 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17267 *total = ix86_cost->shift_const;
17268 else
17269 *total = ix86_cost->shift_var;
17270 }
17271 return false;
17272
17273 case MULT:
17274 if (FLOAT_MODE_P (mode))
17275 {
17276 *total = ix86_cost->fmul;
17277 return false;
17278 }
17279 else
17280 {
17281 rtx op0 = XEXP (x, 0);
17282 rtx op1 = XEXP (x, 1);
17283 int nbits;
17284 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17285 {
17286 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17287 for (nbits = 0; value != 0; value &= value - 1)
17288 nbits++;
17289 }
17290 else
17291 /* This is arbitrary. */
17292 nbits = 7;
17293
17294 /* Compute costs correctly for widening multiplication. */
17295 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
17296 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
17297 == GET_MODE_SIZE (mode))
17298 {
17299 int is_mulwiden = 0;
17300 enum machine_mode inner_mode = GET_MODE (op0);
17301
17302 if (GET_CODE (op0) == GET_CODE (op1))
17303 is_mulwiden = 1, op1 = XEXP (op1, 0);
17304 else if (GET_CODE (op1) == CONST_INT)
17305 {
17306 if (GET_CODE (op0) == SIGN_EXTEND)
17307 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
17308 == INTVAL (op1);
17309 else
17310 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
17311 }
17312
17313 if (is_mulwiden)
17314 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
17315 }
17316
17317 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
17318 + nbits * ix86_cost->mult_bit
17319 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
17320
17321 return true;
17322 }
17323
17324 case DIV:
17325 case UDIV:
17326 case MOD:
17327 case UMOD:
17328 if (FLOAT_MODE_P (mode))
17329 *total = ix86_cost->fdiv;
17330 else
17331 *total = ix86_cost->divide[MODE_INDEX (mode)];
17332 return false;
17333
17334 case PLUS:
17335 if (FLOAT_MODE_P (mode))
17336 *total = ix86_cost->fadd;
17337 else if (GET_MODE_CLASS (mode) == MODE_INT
17338 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
17339 {
17340 if (GET_CODE (XEXP (x, 0)) == PLUS
17341 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
17342 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
17343 && CONSTANT_P (XEXP (x, 1)))
17344 {
17345 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
17346 if (val == 2 || val == 4 || val == 8)
17347 {
17348 *total = ix86_cost->lea;
17349 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17350 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
17351 outer_code);
17352 *total += rtx_cost (XEXP (x, 1), outer_code);
17353 return true;
17354 }
17355 }
17356 else if (GET_CODE (XEXP (x, 0)) == MULT
17357 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
17358 {
17359 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
17360 if (val == 2 || val == 4 || val == 8)
17361 {
17362 *total = ix86_cost->lea;
17363 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17364 *total += rtx_cost (XEXP (x, 1), outer_code);
17365 return true;
17366 }
17367 }
17368 else if (GET_CODE (XEXP (x, 0)) == PLUS)
17369 {
17370 *total = ix86_cost->lea;
17371 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17372 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17373 *total += rtx_cost (XEXP (x, 1), outer_code);
17374 return true;
17375 }
17376 }
17377 /* FALLTHRU */
17378
17379 case MINUS:
17380 if (FLOAT_MODE_P (mode))
17381 {
17382 *total = ix86_cost->fadd;
17383 return false;
17384 }
17385 /* FALLTHRU */
17386
17387 case AND:
17388 case IOR:
17389 case XOR:
17390 if (!TARGET_64BIT && mode == DImode)
17391 {
17392 *total = (ix86_cost->add * 2
17393 + (rtx_cost (XEXP (x, 0), outer_code)
17394 << (GET_MODE (XEXP (x, 0)) != DImode))
17395 + (rtx_cost (XEXP (x, 1), outer_code)
17396 << (GET_MODE (XEXP (x, 1)) != DImode)));
17397 return true;
17398 }
17399 /* FALLTHRU */
17400
17401 case NEG:
17402 if (FLOAT_MODE_P (mode))
17403 {
17404 *total = ix86_cost->fchs;
17405 return false;
17406 }
17407 /* FALLTHRU */
17408
17409 case NOT:
17410 if (!TARGET_64BIT && mode == DImode)
17411 *total = ix86_cost->add * 2;
17412 else
17413 *total = ix86_cost->add;
17414 return false;
17415
17416 case COMPARE:
17417 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
17418 && XEXP (XEXP (x, 0), 1) == const1_rtx
17419 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
17420 && XEXP (x, 1) == const0_rtx)
17421 {
17422 /* This kind of construct is implemented using test[bwl].
17423 Treat it as if we had an AND. */
17424 *total = (ix86_cost->add
17425 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
17426 + rtx_cost (const1_rtx, outer_code));
17427 return true;
17428 }
17429 return false;
17430
17431 case FLOAT_EXTEND:
17432 if (!TARGET_SSE_MATH
17433 || mode == XFmode
17434 || (mode == DFmode && !TARGET_SSE2))
17435 /* For standard 80387 constants, raise the cost to prevent
17436 compress_float_constant() to generate load from memory. */
17437 switch (standard_80387_constant_p (XEXP (x, 0)))
17438 {
17439 case -1:
17440 case 0:
17441 *total = 0;
17442 break;
17443 case 1: /* 0.0 */
17444 *total = 1;
17445 break;
17446 default:
17447 *total = (x86_ext_80387_constants & TUNEMASK
17448 || optimize_size
17449 ? 1 : 0);
17450 }
17451 return false;
17452
17453 case ABS:
17454 if (FLOAT_MODE_P (mode))
17455 *total = ix86_cost->fabs;
17456 return false;
17457
17458 case SQRT:
17459 if (FLOAT_MODE_P (mode))
17460 *total = ix86_cost->fsqrt;
17461 return false;
17462
17463 case UNSPEC:
17464 if (XINT (x, 1) == UNSPEC_TP)
17465 *total = 0;
17466 return false;
17467
17468 default:
17469 return false;
17470 }
17471 }
17472
17473 #if TARGET_MACHO
17474
17475 static int current_machopic_label_num;
17476
17477 /* Given a symbol name and its associated stub, write out the
17478 definition of the stub. */
17479
17480 void
17481 machopic_output_stub (FILE *file, const char *symb, const char *stub)
17482 {
17483 unsigned int length;
17484 char *binder_name, *symbol_name, lazy_ptr_name[32];
17485 int label = ++current_machopic_label_num;
17486
17487 /* For 64-bit we shouldn't get here. */
17488 gcc_assert (!TARGET_64BIT);
17489
17490 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
17491 symb = (*targetm.strip_name_encoding) (symb);
17492
17493 length = strlen (stub);
17494 binder_name = alloca (length + 32);
17495 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
17496
17497 length = strlen (symb);
17498 symbol_name = alloca (length + 32);
17499 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
17500
17501 sprintf (lazy_ptr_name, "L%d$lz", label);
17502
17503 if (MACHOPIC_PURE)
17504 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
17505 else
17506 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
17507
17508 fprintf (file, "%s:\n", stub);
17509 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17510
17511 if (MACHOPIC_PURE)
17512 {
17513 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
17514 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
17515 fprintf (file, "\tjmp\t*%%edx\n");
17516 }
17517 else
17518 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
17519
17520 fprintf (file, "%s:\n", binder_name);
17521
17522 if (MACHOPIC_PURE)
17523 {
17524 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
17525 fprintf (file, "\tpushl\t%%eax\n");
17526 }
17527 else
17528 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
17529
17530 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
17531
17532 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
17533 fprintf (file, "%s:\n", lazy_ptr_name);
17534 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17535 fprintf (file, "\t.long %s\n", binder_name);
17536 }
17537
17538 void
17539 darwin_x86_file_end (void)
17540 {
17541 darwin_file_end ();
17542 ix86_file_end ();
17543 }
17544 #endif /* TARGET_MACHO */
17545
17546 /* Order the registers for register allocator. */
17547
17548 void
17549 x86_order_regs_for_local_alloc (void)
17550 {
17551 int pos = 0;
17552 int i;
17553
17554 /* First allocate the local general purpose registers. */
17555 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17556 if (GENERAL_REGNO_P (i) && call_used_regs[i])
17557 reg_alloc_order [pos++] = i;
17558
17559 /* Global general purpose registers. */
17560 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17561 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
17562 reg_alloc_order [pos++] = i;
17563
17564 /* x87 registers come first in case we are doing FP math
17565 using them. */
17566 if (!TARGET_SSE_MATH)
17567 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17568 reg_alloc_order [pos++] = i;
17569
17570 /* SSE registers. */
17571 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
17572 reg_alloc_order [pos++] = i;
17573 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
17574 reg_alloc_order [pos++] = i;
17575
17576 /* x87 registers. */
17577 if (TARGET_SSE_MATH)
17578 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17579 reg_alloc_order [pos++] = i;
17580
17581 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
17582 reg_alloc_order [pos++] = i;
17583
17584 /* Initialize the rest of array as we do not allocate some registers
17585 at all. */
17586 while (pos < FIRST_PSEUDO_REGISTER)
17587 reg_alloc_order [pos++] = 0;
17588 }
17589
17590 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
17591 struct attribute_spec.handler. */
17592 static tree
17593 ix86_handle_struct_attribute (tree *node, tree name,
17594 tree args ATTRIBUTE_UNUSED,
17595 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
17596 {
17597 tree *type = NULL;
17598 if (DECL_P (*node))
17599 {
17600 if (TREE_CODE (*node) == TYPE_DECL)
17601 type = &TREE_TYPE (*node);
17602 }
17603 else
17604 type = node;
17605
17606 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
17607 || TREE_CODE (*type) == UNION_TYPE)))
17608 {
17609 warning (OPT_Wattributes, "%qs attribute ignored",
17610 IDENTIFIER_POINTER (name));
17611 *no_add_attrs = true;
17612 }
17613
17614 else if ((is_attribute_p ("ms_struct", name)
17615 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
17616 || ((is_attribute_p ("gcc_struct", name)
17617 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
17618 {
17619 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
17620 IDENTIFIER_POINTER (name));
17621 *no_add_attrs = true;
17622 }
17623
17624 return NULL_TREE;
17625 }
17626
17627 static bool
17628 ix86_ms_bitfield_layout_p (tree record_type)
17629 {
17630 return (TARGET_MS_BITFIELD_LAYOUT &&
17631 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
17632 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
17633 }
17634
17635 /* Returns an expression indicating where the this parameter is
17636 located on entry to the FUNCTION. */
17637
17638 static rtx
17639 x86_this_parameter (tree function)
17640 {
17641 tree type = TREE_TYPE (function);
17642
17643 if (TARGET_64BIT)
17644 {
17645 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
17646 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
17647 }
17648
17649 if (ix86_function_regparm (type, function) > 0)
17650 {
17651 tree parm;
17652
17653 parm = TYPE_ARG_TYPES (type);
17654 /* Figure out whether or not the function has a variable number of
17655 arguments. */
17656 for (; parm; parm = TREE_CHAIN (parm))
17657 if (TREE_VALUE (parm) == void_type_node)
17658 break;
17659 /* If not, the this parameter is in the first argument. */
17660 if (parm)
17661 {
17662 int regno = 0;
17663 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
17664 regno = 2;
17665 return gen_rtx_REG (SImode, regno);
17666 }
17667 }
17668
17669 if (aggregate_value_p (TREE_TYPE (type), type))
17670 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
17671 else
17672 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
17673 }
17674
17675 /* Determine whether x86_output_mi_thunk can succeed. */
17676
17677 static bool
17678 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
17679 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
17680 HOST_WIDE_INT vcall_offset, tree function)
17681 {
17682 /* 64-bit can handle anything. */
17683 if (TARGET_64BIT)
17684 return true;
17685
17686 /* For 32-bit, everything's fine if we have one free register. */
17687 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
17688 return true;
17689
17690 /* Need a free register for vcall_offset. */
17691 if (vcall_offset)
17692 return false;
17693
17694 /* Need a free register for GOT references. */
17695 if (flag_pic && !(*targetm.binds_local_p) (function))
17696 return false;
17697
17698 /* Otherwise ok. */
17699 return true;
17700 }
17701
17702 /* Output the assembler code for a thunk function. THUNK_DECL is the
17703 declaration for the thunk function itself, FUNCTION is the decl for
17704 the target function. DELTA is an immediate constant offset to be
17705 added to THIS. If VCALL_OFFSET is nonzero, the word at
17706 *(*this + vcall_offset) should be added to THIS. */
17707
17708 static void
17709 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
17710 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
17711 HOST_WIDE_INT vcall_offset, tree function)
17712 {
17713 rtx xops[3];
17714 rtx this = x86_this_parameter (function);
17715 rtx this_reg, tmp;
17716
17717 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
17718 pull it in now and let DELTA benefit. */
17719 if (REG_P (this))
17720 this_reg = this;
17721 else if (vcall_offset)
17722 {
17723 /* Put the this parameter into %eax. */
17724 xops[0] = this;
17725 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
17726 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17727 }
17728 else
17729 this_reg = NULL_RTX;
17730
17731 /* Adjust the this parameter by a fixed constant. */
17732 if (delta)
17733 {
17734 xops[0] = GEN_INT (delta);
17735 xops[1] = this_reg ? this_reg : this;
17736 if (TARGET_64BIT)
17737 {
17738 if (!x86_64_general_operand (xops[0], DImode))
17739 {
17740 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17741 xops[1] = tmp;
17742 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
17743 xops[0] = tmp;
17744 xops[1] = this;
17745 }
17746 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17747 }
17748 else
17749 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17750 }
17751
17752 /* Adjust the this parameter by a value stored in the vtable. */
17753 if (vcall_offset)
17754 {
17755 if (TARGET_64BIT)
17756 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17757 else
17758 {
17759 int tmp_regno = 2 /* ECX */;
17760 if (lookup_attribute ("fastcall",
17761 TYPE_ATTRIBUTES (TREE_TYPE (function))))
17762 tmp_regno = 0 /* EAX */;
17763 tmp = gen_rtx_REG (SImode, tmp_regno);
17764 }
17765
17766 xops[0] = gen_rtx_MEM (Pmode, this_reg);
17767 xops[1] = tmp;
17768 if (TARGET_64BIT)
17769 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17770 else
17771 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17772
17773 /* Adjust the this parameter. */
17774 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
17775 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
17776 {
17777 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
17778 xops[0] = GEN_INT (vcall_offset);
17779 xops[1] = tmp2;
17780 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17781 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
17782 }
17783 xops[1] = this_reg;
17784 if (TARGET_64BIT)
17785 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17786 else
17787 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17788 }
17789
17790 /* If necessary, drop THIS back to its stack slot. */
17791 if (this_reg && this_reg != this)
17792 {
17793 xops[0] = this_reg;
17794 xops[1] = this;
17795 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17796 }
17797
17798 xops[0] = XEXP (DECL_RTL (function), 0);
17799 if (TARGET_64BIT)
17800 {
17801 if (!flag_pic || (*targetm.binds_local_p) (function))
17802 output_asm_insn ("jmp\t%P0", xops);
17803 else
17804 {
17805 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
17806 tmp = gen_rtx_CONST (Pmode, tmp);
17807 tmp = gen_rtx_MEM (QImode, tmp);
17808 xops[0] = tmp;
17809 output_asm_insn ("jmp\t%A0", xops);
17810 }
17811 }
17812 else
17813 {
17814 if (!flag_pic || (*targetm.binds_local_p) (function))
17815 output_asm_insn ("jmp\t%P0", xops);
17816 else
17817 #if TARGET_MACHO
17818 if (TARGET_MACHO)
17819 {
17820 rtx sym_ref = XEXP (DECL_RTL (function), 0);
17821 tmp = (gen_rtx_SYMBOL_REF
17822 (Pmode,
17823 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
17824 tmp = gen_rtx_MEM (QImode, tmp);
17825 xops[0] = tmp;
17826 output_asm_insn ("jmp\t%0", xops);
17827 }
17828 else
17829 #endif /* TARGET_MACHO */
17830 {
17831 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
17832 output_set_got (tmp, NULL_RTX);
17833
17834 xops[1] = tmp;
17835 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
17836 output_asm_insn ("jmp\t{*}%1", xops);
17837 }
17838 }
17839 }
17840
17841 static void
17842 x86_file_start (void)
17843 {
17844 default_file_start ();
17845 #if TARGET_MACHO
17846 darwin_file_start ();
17847 #endif
17848 if (X86_FILE_START_VERSION_DIRECTIVE)
17849 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
17850 if (X86_FILE_START_FLTUSED)
17851 fputs ("\t.global\t__fltused\n", asm_out_file);
17852 if (ix86_asm_dialect == ASM_INTEL)
17853 fputs ("\t.intel_syntax\n", asm_out_file);
17854 }
17855
17856 int
17857 x86_field_alignment (tree field, int computed)
17858 {
17859 enum machine_mode mode;
17860 tree type = TREE_TYPE (field);
17861
17862 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
17863 return computed;
17864 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
17865 ? get_inner_array_type (type) : type);
17866 if (mode == DFmode || mode == DCmode
17867 || GET_MODE_CLASS (mode) == MODE_INT
17868 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
17869 return MIN (32, computed);
17870 return computed;
17871 }
17872
17873 /* Output assembler code to FILE to increment profiler label # LABELNO
17874 for profiling a function entry. */
17875 void
17876 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
17877 {
17878 if (TARGET_64BIT)
17879 if (flag_pic)
17880 {
17881 #ifndef NO_PROFILE_COUNTERS
17882 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
17883 #endif
17884 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
17885 }
17886 else
17887 {
17888 #ifndef NO_PROFILE_COUNTERS
17889 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
17890 #endif
17891 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17892 }
17893 else if (flag_pic)
17894 {
17895 #ifndef NO_PROFILE_COUNTERS
17896 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
17897 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
17898 #endif
17899 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
17900 }
17901 else
17902 {
17903 #ifndef NO_PROFILE_COUNTERS
17904 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
17905 PROFILE_COUNT_REGISTER);
17906 #endif
17907 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17908 }
17909 }
17910
17911 /* We don't have exact information about the insn sizes, but we may assume
17912 quite safely that we are informed about all 1 byte insns and memory
17913 address sizes. This is enough to eliminate unnecessary padding in
17914 99% of cases. */
17915
17916 static int
17917 min_insn_size (rtx insn)
17918 {
17919 int l = 0;
17920
17921 if (!INSN_P (insn) || !active_insn_p (insn))
17922 return 0;
17923
17924 /* Discard alignments we've emit and jump instructions. */
17925 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
17926 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
17927 return 0;
17928 if (GET_CODE (insn) == JUMP_INSN
17929 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
17930 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
17931 return 0;
17932
17933 /* Important case - calls are always 5 bytes.
17934 It is common to have many calls in the row. */
17935 if (GET_CODE (insn) == CALL_INSN
17936 && symbolic_reference_mentioned_p (PATTERN (insn))
17937 && !SIBLING_CALL_P (insn))
17938 return 5;
17939 if (get_attr_length (insn) <= 1)
17940 return 1;
17941
17942 /* For normal instructions we may rely on the sizes of addresses
17943 and the presence of symbol to require 4 bytes of encoding.
17944 This is not the case for jumps where references are PC relative. */
17945 if (GET_CODE (insn) != JUMP_INSN)
17946 {
17947 l = get_attr_length_address (insn);
17948 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
17949 l = 4;
17950 }
17951 if (l)
17952 return 1+l;
17953 else
17954 return 2;
17955 }
17956
17957 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
17958 window. */
17959
17960 static void
17961 ix86_avoid_jump_misspredicts (void)
17962 {
17963 rtx insn, start = get_insns ();
17964 int nbytes = 0, njumps = 0;
17965 int isjump = 0;
17966
17967 /* Look for all minimal intervals of instructions containing 4 jumps.
17968 The intervals are bounded by START and INSN. NBYTES is the total
17969 size of instructions in the interval including INSN and not including
17970 START. When the NBYTES is smaller than 16 bytes, it is possible
17971 that the end of START and INSN ends up in the same 16byte page.
17972
17973 The smallest offset in the page INSN can start is the case where START
17974 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
17975 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
17976 */
17977 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17978 {
17979
17980 nbytes += min_insn_size (insn);
17981 if (dump_file)
17982 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
17983 INSN_UID (insn), min_insn_size (insn));
17984 if ((GET_CODE (insn) == JUMP_INSN
17985 && GET_CODE (PATTERN (insn)) != ADDR_VEC
17986 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
17987 || GET_CODE (insn) == CALL_INSN)
17988 njumps++;
17989 else
17990 continue;
17991
17992 while (njumps > 3)
17993 {
17994 start = NEXT_INSN (start);
17995 if ((GET_CODE (start) == JUMP_INSN
17996 && GET_CODE (PATTERN (start)) != ADDR_VEC
17997 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
17998 || GET_CODE (start) == CALL_INSN)
17999 njumps--, isjump = 1;
18000 else
18001 isjump = 0;
18002 nbytes -= min_insn_size (start);
18003 }
18004 gcc_assert (njumps >= 0);
18005 if (dump_file)
18006 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
18007 INSN_UID (start), INSN_UID (insn), nbytes);
18008
18009 if (njumps == 3 && isjump && nbytes < 16)
18010 {
18011 int padsize = 15 - nbytes + min_insn_size (insn);
18012
18013 if (dump_file)
18014 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
18015 INSN_UID (insn), padsize);
18016 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
18017 }
18018 }
18019 }
18020
18021 /* AMD Athlon works faster
18022 when RET is not destination of conditional jump or directly preceded
18023 by other jump instruction. We avoid the penalty by inserting NOP just
18024 before the RET instructions in such cases. */
18025 static void
18026 ix86_pad_returns (void)
18027 {
18028 edge e;
18029 edge_iterator ei;
18030
18031 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
18032 {
18033 basic_block bb = e->src;
18034 rtx ret = BB_END (bb);
18035 rtx prev;
18036 bool replace = false;
18037
18038 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
18039 || !maybe_hot_bb_p (bb))
18040 continue;
18041 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
18042 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
18043 break;
18044 if (prev && GET_CODE (prev) == CODE_LABEL)
18045 {
18046 edge e;
18047 edge_iterator ei;
18048
18049 FOR_EACH_EDGE (e, ei, bb->preds)
18050 if (EDGE_FREQUENCY (e) && e->src->index >= 0
18051 && !(e->flags & EDGE_FALLTHRU))
18052 replace = true;
18053 }
18054 if (!replace)
18055 {
18056 prev = prev_active_insn (ret);
18057 if (prev
18058 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
18059 || GET_CODE (prev) == CALL_INSN))
18060 replace = true;
18061 /* Empty functions get branch mispredict even when the jump destination
18062 is not visible to us. */
18063 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
18064 replace = true;
18065 }
18066 if (replace)
18067 {
18068 emit_insn_before (gen_return_internal_long (), ret);
18069 delete_insn (ret);
18070 }
18071 }
18072 }
18073
18074 /* Implement machine specific optimizations. We implement padding of returns
18075 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
18076 static void
18077 ix86_reorg (void)
18078 {
18079 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
18080 ix86_pad_returns ();
18081 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
18082 ix86_avoid_jump_misspredicts ();
18083 }
18084
18085 /* Return nonzero when QImode register that must be represented via REX prefix
18086 is used. */
18087 bool
18088 x86_extended_QIreg_mentioned_p (rtx insn)
18089 {
18090 int i;
18091 extract_insn_cached (insn);
18092 for (i = 0; i < recog_data.n_operands; i++)
18093 if (REG_P (recog_data.operand[i])
18094 && REGNO (recog_data.operand[i]) >= 4)
18095 return true;
18096 return false;
18097 }
18098
18099 /* Return nonzero when P points to register encoded via REX prefix.
18100 Called via for_each_rtx. */
18101 static int
18102 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
18103 {
18104 unsigned int regno;
18105 if (!REG_P (*p))
18106 return 0;
18107 regno = REGNO (*p);
18108 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
18109 }
18110
18111 /* Return true when INSN mentions register that must be encoded using REX
18112 prefix. */
18113 bool
18114 x86_extended_reg_mentioned_p (rtx insn)
18115 {
18116 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
18117 }
18118
18119 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
18120 optabs would emit if we didn't have TFmode patterns. */
18121
18122 void
18123 x86_emit_floatuns (rtx operands[2])
18124 {
18125 rtx neglab, donelab, i0, i1, f0, in, out;
18126 enum machine_mode mode, inmode;
18127
18128 inmode = GET_MODE (operands[1]);
18129 gcc_assert (inmode == SImode || inmode == DImode);
18130
18131 out = operands[0];
18132 in = force_reg (inmode, operands[1]);
18133 mode = GET_MODE (out);
18134 neglab = gen_label_rtx ();
18135 donelab = gen_label_rtx ();
18136 i1 = gen_reg_rtx (Pmode);
18137 f0 = gen_reg_rtx (mode);
18138
18139 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
18140
18141 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
18142 emit_jump_insn (gen_jump (donelab));
18143 emit_barrier ();
18144
18145 emit_label (neglab);
18146
18147 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18148 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18149 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
18150 expand_float (f0, i0, 0);
18151 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
18152
18153 emit_label (donelab);
18154 }
18155 \f
18156 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18157 with all elements equal to VAR. Return true if successful. */
18158
18159 static bool
18160 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
18161 rtx target, rtx val)
18162 {
18163 enum machine_mode smode, wsmode, wvmode;
18164 rtx x;
18165
18166 switch (mode)
18167 {
18168 case V2SImode:
18169 case V2SFmode:
18170 if (!mmx_ok)
18171 return false;
18172 /* FALLTHRU */
18173
18174 case V2DFmode:
18175 case V2DImode:
18176 case V4SFmode:
18177 case V4SImode:
18178 val = force_reg (GET_MODE_INNER (mode), val);
18179 x = gen_rtx_VEC_DUPLICATE (mode, val);
18180 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18181 return true;
18182
18183 case V4HImode:
18184 if (!mmx_ok)
18185 return false;
18186 if (TARGET_SSE || TARGET_3DNOW_A)
18187 {
18188 val = gen_lowpart (SImode, val);
18189 x = gen_rtx_TRUNCATE (HImode, val);
18190 x = gen_rtx_VEC_DUPLICATE (mode, x);
18191 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18192 return true;
18193 }
18194 else
18195 {
18196 smode = HImode;
18197 wsmode = SImode;
18198 wvmode = V2SImode;
18199 goto widen;
18200 }
18201
18202 case V8QImode:
18203 if (!mmx_ok)
18204 return false;
18205 smode = QImode;
18206 wsmode = HImode;
18207 wvmode = V4HImode;
18208 goto widen;
18209 case V8HImode:
18210 if (TARGET_SSE2)
18211 {
18212 rtx tmp1, tmp2;
18213 /* Extend HImode to SImode using a paradoxical SUBREG. */
18214 tmp1 = gen_reg_rtx (SImode);
18215 emit_move_insn (tmp1, gen_lowpart (SImode, val));
18216 /* Insert the SImode value as low element of V4SImode vector. */
18217 tmp2 = gen_reg_rtx (V4SImode);
18218 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18219 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18220 CONST0_RTX (V4SImode),
18221 const1_rtx);
18222 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18223 /* Cast the V4SImode vector back to a V8HImode vector. */
18224 tmp1 = gen_reg_rtx (V8HImode);
18225 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
18226 /* Duplicate the low short through the whole low SImode word. */
18227 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
18228 /* Cast the V8HImode vector back to a V4SImode vector. */
18229 tmp2 = gen_reg_rtx (V4SImode);
18230 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18231 /* Replicate the low element of the V4SImode vector. */
18232 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18233 /* Cast the V2SImode back to V8HImode, and store in target. */
18234 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
18235 return true;
18236 }
18237 smode = HImode;
18238 wsmode = SImode;
18239 wvmode = V4SImode;
18240 goto widen;
18241 case V16QImode:
18242 if (TARGET_SSE2)
18243 {
18244 rtx tmp1, tmp2;
18245 /* Extend QImode to SImode using a paradoxical SUBREG. */
18246 tmp1 = gen_reg_rtx (SImode);
18247 emit_move_insn (tmp1, gen_lowpart (SImode, val));
18248 /* Insert the SImode value as low element of V4SImode vector. */
18249 tmp2 = gen_reg_rtx (V4SImode);
18250 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18251 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18252 CONST0_RTX (V4SImode),
18253 const1_rtx);
18254 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18255 /* Cast the V4SImode vector back to a V16QImode vector. */
18256 tmp1 = gen_reg_rtx (V16QImode);
18257 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
18258 /* Duplicate the low byte through the whole low SImode word. */
18259 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18260 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18261 /* Cast the V16QImode vector back to a V4SImode vector. */
18262 tmp2 = gen_reg_rtx (V4SImode);
18263 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18264 /* Replicate the low element of the V4SImode vector. */
18265 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18266 /* Cast the V2SImode back to V16QImode, and store in target. */
18267 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
18268 return true;
18269 }
18270 smode = QImode;
18271 wsmode = HImode;
18272 wvmode = V8HImode;
18273 goto widen;
18274 widen:
18275 /* Replicate the value once into the next wider mode and recurse. */
18276 val = convert_modes (wsmode, smode, val, true);
18277 x = expand_simple_binop (wsmode, ASHIFT, val,
18278 GEN_INT (GET_MODE_BITSIZE (smode)),
18279 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18280 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
18281
18282 x = gen_reg_rtx (wvmode);
18283 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
18284 gcc_unreachable ();
18285 emit_move_insn (target, gen_lowpart (mode, x));
18286 return true;
18287
18288 default:
18289 return false;
18290 }
18291 }
18292
18293 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18294 whose ONE_VAR element is VAR, and other elements are zero. Return true
18295 if successful. */
18296
18297 static bool
18298 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
18299 rtx target, rtx var, int one_var)
18300 {
18301 enum machine_mode vsimode;
18302 rtx new_target;
18303 rtx x, tmp;
18304
18305 switch (mode)
18306 {
18307 case V2SFmode:
18308 case V2SImode:
18309 if (!mmx_ok)
18310 return false;
18311 /* FALLTHRU */
18312
18313 case V2DFmode:
18314 case V2DImode:
18315 if (one_var != 0)
18316 return false;
18317 var = force_reg (GET_MODE_INNER (mode), var);
18318 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
18319 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18320 return true;
18321
18322 case V4SFmode:
18323 case V4SImode:
18324 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
18325 new_target = gen_reg_rtx (mode);
18326 else
18327 new_target = target;
18328 var = force_reg (GET_MODE_INNER (mode), var);
18329 x = gen_rtx_VEC_DUPLICATE (mode, var);
18330 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
18331 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
18332 if (one_var != 0)
18333 {
18334 /* We need to shuffle the value to the correct position, so
18335 create a new pseudo to store the intermediate result. */
18336
18337 /* With SSE2, we can use the integer shuffle insns. */
18338 if (mode != V4SFmode && TARGET_SSE2)
18339 {
18340 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
18341 GEN_INT (1),
18342 GEN_INT (one_var == 1 ? 0 : 1),
18343 GEN_INT (one_var == 2 ? 0 : 1),
18344 GEN_INT (one_var == 3 ? 0 : 1)));
18345 if (target != new_target)
18346 emit_move_insn (target, new_target);
18347 return true;
18348 }
18349
18350 /* Otherwise convert the intermediate result to V4SFmode and
18351 use the SSE1 shuffle instructions. */
18352 if (mode != V4SFmode)
18353 {
18354 tmp = gen_reg_rtx (V4SFmode);
18355 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
18356 }
18357 else
18358 tmp = new_target;
18359
18360 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
18361 GEN_INT (1),
18362 GEN_INT (one_var == 1 ? 0 : 1),
18363 GEN_INT (one_var == 2 ? 0+4 : 1+4),
18364 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
18365
18366 if (mode != V4SFmode)
18367 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
18368 else if (tmp != target)
18369 emit_move_insn (target, tmp);
18370 }
18371 else if (target != new_target)
18372 emit_move_insn (target, new_target);
18373 return true;
18374
18375 case V8HImode:
18376 case V16QImode:
18377 vsimode = V4SImode;
18378 goto widen;
18379 case V4HImode:
18380 case V8QImode:
18381 if (!mmx_ok)
18382 return false;
18383 vsimode = V2SImode;
18384 goto widen;
18385 widen:
18386 if (one_var != 0)
18387 return false;
18388
18389 /* Zero extend the variable element to SImode and recurse. */
18390 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
18391
18392 x = gen_reg_rtx (vsimode);
18393 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
18394 var, one_var))
18395 gcc_unreachable ();
18396
18397 emit_move_insn (target, gen_lowpart (mode, x));
18398 return true;
18399
18400 default:
18401 return false;
18402 }
18403 }
18404
18405 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18406 consisting of the values in VALS. It is known that all elements
18407 except ONE_VAR are constants. Return true if successful. */
18408
18409 static bool
18410 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
18411 rtx target, rtx vals, int one_var)
18412 {
18413 rtx var = XVECEXP (vals, 0, one_var);
18414 enum machine_mode wmode;
18415 rtx const_vec, x;
18416
18417 const_vec = copy_rtx (vals);
18418 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
18419 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
18420
18421 switch (mode)
18422 {
18423 case V2DFmode:
18424 case V2DImode:
18425 case V2SFmode:
18426 case V2SImode:
18427 /* For the two element vectors, it's just as easy to use
18428 the general case. */
18429 return false;
18430
18431 case V4SFmode:
18432 case V4SImode:
18433 case V8HImode:
18434 case V4HImode:
18435 break;
18436
18437 case V16QImode:
18438 wmode = V8HImode;
18439 goto widen;
18440 case V8QImode:
18441 wmode = V4HImode;
18442 goto widen;
18443 widen:
18444 /* There's no way to set one QImode entry easily. Combine
18445 the variable value with its adjacent constant value, and
18446 promote to an HImode set. */
18447 x = XVECEXP (vals, 0, one_var ^ 1);
18448 if (one_var & 1)
18449 {
18450 var = convert_modes (HImode, QImode, var, true);
18451 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
18452 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18453 x = GEN_INT (INTVAL (x) & 0xff);
18454 }
18455 else
18456 {
18457 var = convert_modes (HImode, QImode, var, true);
18458 x = gen_int_mode (INTVAL (x) << 8, HImode);
18459 }
18460 if (x != const0_rtx)
18461 var = expand_simple_binop (HImode, IOR, var, x, var,
18462 1, OPTAB_LIB_WIDEN);
18463
18464 x = gen_reg_rtx (wmode);
18465 emit_move_insn (x, gen_lowpart (wmode, const_vec));
18466 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
18467
18468 emit_move_insn (target, gen_lowpart (mode, x));
18469 return true;
18470
18471 default:
18472 return false;
18473 }
18474
18475 emit_move_insn (target, const_vec);
18476 ix86_expand_vector_set (mmx_ok, target, var, one_var);
18477 return true;
18478 }
18479
18480 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
18481 all values variable, and none identical. */
18482
18483 static void
18484 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
18485 rtx target, rtx vals)
18486 {
18487 enum machine_mode half_mode = GET_MODE_INNER (mode);
18488 rtx op0 = NULL, op1 = NULL;
18489 bool use_vec_concat = false;
18490
18491 switch (mode)
18492 {
18493 case V2SFmode:
18494 case V2SImode:
18495 if (!mmx_ok && !TARGET_SSE)
18496 break;
18497 /* FALLTHRU */
18498
18499 case V2DFmode:
18500 case V2DImode:
18501 /* For the two element vectors, we always implement VEC_CONCAT. */
18502 op0 = XVECEXP (vals, 0, 0);
18503 op1 = XVECEXP (vals, 0, 1);
18504 use_vec_concat = true;
18505 break;
18506
18507 case V4SFmode:
18508 half_mode = V2SFmode;
18509 goto half;
18510 case V4SImode:
18511 half_mode = V2SImode;
18512 goto half;
18513 half:
18514 {
18515 rtvec v;
18516
18517 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
18518 Recurse to load the two halves. */
18519
18520 op0 = gen_reg_rtx (half_mode);
18521 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
18522 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
18523
18524 op1 = gen_reg_rtx (half_mode);
18525 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
18526 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
18527
18528 use_vec_concat = true;
18529 }
18530 break;
18531
18532 case V8HImode:
18533 case V16QImode:
18534 case V4HImode:
18535 case V8QImode:
18536 break;
18537
18538 default:
18539 gcc_unreachable ();
18540 }
18541
18542 if (use_vec_concat)
18543 {
18544 if (!register_operand (op0, half_mode))
18545 op0 = force_reg (half_mode, op0);
18546 if (!register_operand (op1, half_mode))
18547 op1 = force_reg (half_mode, op1);
18548
18549 emit_insn (gen_rtx_SET (VOIDmode, target,
18550 gen_rtx_VEC_CONCAT (mode, op0, op1)));
18551 }
18552 else
18553 {
18554 int i, j, n_elts, n_words, n_elt_per_word;
18555 enum machine_mode inner_mode;
18556 rtx words[4], shift;
18557
18558 inner_mode = GET_MODE_INNER (mode);
18559 n_elts = GET_MODE_NUNITS (mode);
18560 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
18561 n_elt_per_word = n_elts / n_words;
18562 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
18563
18564 for (i = 0; i < n_words; ++i)
18565 {
18566 rtx word = NULL_RTX;
18567
18568 for (j = 0; j < n_elt_per_word; ++j)
18569 {
18570 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
18571 elt = convert_modes (word_mode, inner_mode, elt, true);
18572
18573 if (j == 0)
18574 word = elt;
18575 else
18576 {
18577 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
18578 word, 1, OPTAB_LIB_WIDEN);
18579 word = expand_simple_binop (word_mode, IOR, word, elt,
18580 word, 1, OPTAB_LIB_WIDEN);
18581 }
18582 }
18583
18584 words[i] = word;
18585 }
18586
18587 if (n_words == 1)
18588 emit_move_insn (target, gen_lowpart (mode, words[0]));
18589 else if (n_words == 2)
18590 {
18591 rtx tmp = gen_reg_rtx (mode);
18592 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
18593 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
18594 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
18595 emit_move_insn (target, tmp);
18596 }
18597 else if (n_words == 4)
18598 {
18599 rtx tmp = gen_reg_rtx (V4SImode);
18600 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
18601 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
18602 emit_move_insn (target, gen_lowpart (mode, tmp));
18603 }
18604 else
18605 gcc_unreachable ();
18606 }
18607 }
18608
18609 /* Initialize vector TARGET via VALS. Suppress the use of MMX
18610 instructions unless MMX_OK is true. */
18611
18612 void
18613 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
18614 {
18615 enum machine_mode mode = GET_MODE (target);
18616 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18617 int n_elts = GET_MODE_NUNITS (mode);
18618 int n_var = 0, one_var = -1;
18619 bool all_same = true, all_const_zero = true;
18620 int i;
18621 rtx x;
18622
18623 for (i = 0; i < n_elts; ++i)
18624 {
18625 x = XVECEXP (vals, 0, i);
18626 if (!CONSTANT_P (x))
18627 n_var++, one_var = i;
18628 else if (x != CONST0_RTX (inner_mode))
18629 all_const_zero = false;
18630 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
18631 all_same = false;
18632 }
18633
18634 /* Constants are best loaded from the constant pool. */
18635 if (n_var == 0)
18636 {
18637 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
18638 return;
18639 }
18640
18641 /* If all values are identical, broadcast the value. */
18642 if (all_same
18643 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
18644 XVECEXP (vals, 0, 0)))
18645 return;
18646
18647 /* Values where only one field is non-constant are best loaded from
18648 the pool and overwritten via move later. */
18649 if (n_var == 1)
18650 {
18651 if (all_const_zero
18652 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
18653 XVECEXP (vals, 0, one_var),
18654 one_var))
18655 return;
18656
18657 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
18658 return;
18659 }
18660
18661 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
18662 }
18663
18664 void
18665 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
18666 {
18667 enum machine_mode mode = GET_MODE (target);
18668 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18669 bool use_vec_merge = false;
18670 rtx tmp;
18671
18672 switch (mode)
18673 {
18674 case V2SFmode:
18675 case V2SImode:
18676 if (mmx_ok)
18677 {
18678 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
18679 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
18680 if (elt == 0)
18681 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
18682 else
18683 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
18684 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18685 return;
18686 }
18687 break;
18688
18689 case V2DFmode:
18690 case V2DImode:
18691 {
18692 rtx op0, op1;
18693
18694 /* For the two element vectors, we implement a VEC_CONCAT with
18695 the extraction of the other element. */
18696
18697 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
18698 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
18699
18700 if (elt == 0)
18701 op0 = val, op1 = tmp;
18702 else
18703 op0 = tmp, op1 = val;
18704
18705 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
18706 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18707 }
18708 return;
18709
18710 case V4SFmode:
18711 switch (elt)
18712 {
18713 case 0:
18714 use_vec_merge = true;
18715 break;
18716
18717 case 1:
18718 /* tmp = target = A B C D */
18719 tmp = copy_to_reg (target);
18720 /* target = A A B B */
18721 emit_insn (gen_sse_unpcklps (target, target, target));
18722 /* target = X A B B */
18723 ix86_expand_vector_set (false, target, val, 0);
18724 /* target = A X C D */
18725 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18726 GEN_INT (1), GEN_INT (0),
18727 GEN_INT (2+4), GEN_INT (3+4)));
18728 return;
18729
18730 case 2:
18731 /* tmp = target = A B C D */
18732 tmp = copy_to_reg (target);
18733 /* tmp = X B C D */
18734 ix86_expand_vector_set (false, tmp, val, 0);
18735 /* target = A B X D */
18736 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18737 GEN_INT (0), GEN_INT (1),
18738 GEN_INT (0+4), GEN_INT (3+4)));
18739 return;
18740
18741 case 3:
18742 /* tmp = target = A B C D */
18743 tmp = copy_to_reg (target);
18744 /* tmp = X B C D */
18745 ix86_expand_vector_set (false, tmp, val, 0);
18746 /* target = A B X D */
18747 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18748 GEN_INT (0), GEN_INT (1),
18749 GEN_INT (2+4), GEN_INT (0+4)));
18750 return;
18751
18752 default:
18753 gcc_unreachable ();
18754 }
18755 break;
18756
18757 case V4SImode:
18758 /* Element 0 handled by vec_merge below. */
18759 if (elt == 0)
18760 {
18761 use_vec_merge = true;
18762 break;
18763 }
18764
18765 if (TARGET_SSE2)
18766 {
18767 /* With SSE2, use integer shuffles to swap element 0 and ELT,
18768 store into element 0, then shuffle them back. */
18769
18770 rtx order[4];
18771
18772 order[0] = GEN_INT (elt);
18773 order[1] = const1_rtx;
18774 order[2] = const2_rtx;
18775 order[3] = GEN_INT (3);
18776 order[elt] = const0_rtx;
18777
18778 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18779 order[1], order[2], order[3]));
18780
18781 ix86_expand_vector_set (false, target, val, 0);
18782
18783 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18784 order[1], order[2], order[3]));
18785 }
18786 else
18787 {
18788 /* For SSE1, we have to reuse the V4SF code. */
18789 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
18790 gen_lowpart (SFmode, val), elt);
18791 }
18792 return;
18793
18794 case V8HImode:
18795 use_vec_merge = TARGET_SSE2;
18796 break;
18797 case V4HImode:
18798 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18799 break;
18800
18801 case V16QImode:
18802 case V8QImode:
18803 default:
18804 break;
18805 }
18806
18807 if (use_vec_merge)
18808 {
18809 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
18810 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
18811 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18812 }
18813 else
18814 {
18815 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
18816
18817 emit_move_insn (mem, target);
18818
18819 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
18820 emit_move_insn (tmp, val);
18821
18822 emit_move_insn (target, mem);
18823 }
18824 }
18825
18826 void
18827 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
18828 {
18829 enum machine_mode mode = GET_MODE (vec);
18830 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18831 bool use_vec_extr = false;
18832 rtx tmp;
18833
18834 switch (mode)
18835 {
18836 case V2SImode:
18837 case V2SFmode:
18838 if (!mmx_ok)
18839 break;
18840 /* FALLTHRU */
18841
18842 case V2DFmode:
18843 case V2DImode:
18844 use_vec_extr = true;
18845 break;
18846
18847 case V4SFmode:
18848 switch (elt)
18849 {
18850 case 0:
18851 tmp = vec;
18852 break;
18853
18854 case 1:
18855 case 3:
18856 tmp = gen_reg_rtx (mode);
18857 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
18858 GEN_INT (elt), GEN_INT (elt),
18859 GEN_INT (elt+4), GEN_INT (elt+4)));
18860 break;
18861
18862 case 2:
18863 tmp = gen_reg_rtx (mode);
18864 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
18865 break;
18866
18867 default:
18868 gcc_unreachable ();
18869 }
18870 vec = tmp;
18871 use_vec_extr = true;
18872 elt = 0;
18873 break;
18874
18875 case V4SImode:
18876 if (TARGET_SSE2)
18877 {
18878 switch (elt)
18879 {
18880 case 0:
18881 tmp = vec;
18882 break;
18883
18884 case 1:
18885 case 3:
18886 tmp = gen_reg_rtx (mode);
18887 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
18888 GEN_INT (elt), GEN_INT (elt),
18889 GEN_INT (elt), GEN_INT (elt)));
18890 break;
18891
18892 case 2:
18893 tmp = gen_reg_rtx (mode);
18894 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
18895 break;
18896
18897 default:
18898 gcc_unreachable ();
18899 }
18900 vec = tmp;
18901 use_vec_extr = true;
18902 elt = 0;
18903 }
18904 else
18905 {
18906 /* For SSE1, we have to reuse the V4SF code. */
18907 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
18908 gen_lowpart (V4SFmode, vec), elt);
18909 return;
18910 }
18911 break;
18912
18913 case V8HImode:
18914 use_vec_extr = TARGET_SSE2;
18915 break;
18916 case V4HImode:
18917 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18918 break;
18919
18920 case V16QImode:
18921 case V8QImode:
18922 /* ??? Could extract the appropriate HImode element and shift. */
18923 default:
18924 break;
18925 }
18926
18927 if (use_vec_extr)
18928 {
18929 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
18930 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
18931
18932 /* Let the rtl optimizers know about the zero extension performed. */
18933 if (inner_mode == HImode)
18934 {
18935 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
18936 target = gen_lowpart (SImode, target);
18937 }
18938
18939 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18940 }
18941 else
18942 {
18943 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
18944
18945 emit_move_insn (mem, vec);
18946
18947 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
18948 emit_move_insn (target, tmp);
18949 }
18950 }
18951
18952 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
18953 pattern to reduce; DEST is the destination; IN is the input vector. */
18954
18955 void
18956 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
18957 {
18958 rtx tmp1, tmp2, tmp3;
18959
18960 tmp1 = gen_reg_rtx (V4SFmode);
18961 tmp2 = gen_reg_rtx (V4SFmode);
18962 tmp3 = gen_reg_rtx (V4SFmode);
18963
18964 emit_insn (gen_sse_movhlps (tmp1, in, in));
18965 emit_insn (fn (tmp2, tmp1, in));
18966
18967 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
18968 GEN_INT (1), GEN_INT (1),
18969 GEN_INT (1+4), GEN_INT (1+4)));
18970 emit_insn (fn (dest, tmp2, tmp3));
18971 }
18972 \f
18973 /* Target hook for scalar_mode_supported_p. */
18974 static bool
18975 ix86_scalar_mode_supported_p (enum machine_mode mode)
18976 {
18977 if (DECIMAL_FLOAT_MODE_P (mode))
18978 return true;
18979 else
18980 return default_scalar_mode_supported_p (mode);
18981 }
18982
18983 /* Implements target hook vector_mode_supported_p. */
18984 static bool
18985 ix86_vector_mode_supported_p (enum machine_mode mode)
18986 {
18987 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
18988 return true;
18989 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
18990 return true;
18991 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
18992 return true;
18993 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
18994 return true;
18995 return false;
18996 }
18997
18998 /* Worker function for TARGET_MD_ASM_CLOBBERS.
18999
19000 We do this in the new i386 backend to maintain source compatibility
19001 with the old cc0-based compiler. */
19002
19003 static tree
19004 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
19005 tree inputs ATTRIBUTE_UNUSED,
19006 tree clobbers)
19007 {
19008 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
19009 clobbers);
19010 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
19011 clobbers);
19012 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
19013 clobbers);
19014 return clobbers;
19015 }
19016
19017 /* Return true if this goes in small data/bss. */
19018
19019 static bool
19020 ix86_in_large_data_p (tree exp)
19021 {
19022 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
19023 return false;
19024
19025 /* Functions are never large data. */
19026 if (TREE_CODE (exp) == FUNCTION_DECL)
19027 return false;
19028
19029 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
19030 {
19031 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
19032 if (strcmp (section, ".ldata") == 0
19033 || strcmp (section, ".lbss") == 0)
19034 return true;
19035 return false;
19036 }
19037 else
19038 {
19039 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
19040
19041 /* If this is an incomplete type with size 0, then we can't put it
19042 in data because it might be too big when completed. */
19043 if (!size || size > ix86_section_threshold)
19044 return true;
19045 }
19046
19047 return false;
19048 }
19049 static void
19050 ix86_encode_section_info (tree decl, rtx rtl, int first)
19051 {
19052 default_encode_section_info (decl, rtl, first);
19053
19054 if (TREE_CODE (decl) == VAR_DECL
19055 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
19056 && ix86_in_large_data_p (decl))
19057 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
19058 }
19059
19060 /* Worker function for REVERSE_CONDITION. */
19061
19062 enum rtx_code
19063 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
19064 {
19065 return (mode != CCFPmode && mode != CCFPUmode
19066 ? reverse_condition (code)
19067 : reverse_condition_maybe_unordered (code));
19068 }
19069
19070 /* Output code to perform an x87 FP register move, from OPERANDS[1]
19071 to OPERANDS[0]. */
19072
19073 const char *
19074 output_387_reg_move (rtx insn, rtx *operands)
19075 {
19076 if (REG_P (operands[1])
19077 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
19078 {
19079 if (REGNO (operands[0]) == FIRST_STACK_REG)
19080 return output_387_ffreep (operands, 0);
19081 return "fstp\t%y0";
19082 }
19083 if (STACK_TOP_P (operands[0]))
19084 return "fld%z1\t%y1";
19085 return "fst\t%y0";
19086 }
19087
19088 /* Output code to perform a conditional jump to LABEL, if C2 flag in
19089 FP status register is set. */
19090
19091 void
19092 ix86_emit_fp_unordered_jump (rtx label)
19093 {
19094 rtx reg = gen_reg_rtx (HImode);
19095 rtx temp;
19096
19097 emit_insn (gen_x86_fnstsw_1 (reg));
19098
19099 if (TARGET_USE_SAHF)
19100 {
19101 emit_insn (gen_x86_sahf_1 (reg));
19102
19103 temp = gen_rtx_REG (CCmode, FLAGS_REG);
19104 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
19105 }
19106 else
19107 {
19108 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
19109
19110 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19111 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
19112 }
19113
19114 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
19115 gen_rtx_LABEL_REF (VOIDmode, label),
19116 pc_rtx);
19117 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
19118 emit_jump_insn (temp);
19119 }
19120
19121 /* Output code to perform a log1p XFmode calculation. */
19122
19123 void ix86_emit_i387_log1p (rtx op0, rtx op1)
19124 {
19125 rtx label1 = gen_label_rtx ();
19126 rtx label2 = gen_label_rtx ();
19127
19128 rtx tmp = gen_reg_rtx (XFmode);
19129 rtx tmp2 = gen_reg_rtx (XFmode);
19130
19131 emit_insn (gen_absxf2 (tmp, op1));
19132 emit_insn (gen_cmpxf (tmp,
19133 CONST_DOUBLE_FROM_REAL_VALUE (
19134 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
19135 XFmode)));
19136 emit_jump_insn (gen_bge (label1));
19137
19138 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19139 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
19140 emit_jump (label2);
19141
19142 emit_label (label1);
19143 emit_move_insn (tmp, CONST1_RTX (XFmode));
19144 emit_insn (gen_addxf3 (tmp, op1, tmp));
19145 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19146 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
19147
19148 emit_label (label2);
19149 }
19150
19151 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
19152
19153 static void
19154 i386_solaris_elf_named_section (const char *name, unsigned int flags,
19155 tree decl)
19156 {
19157 /* With Binutils 2.15, the "@unwind" marker must be specified on
19158 every occurrence of the ".eh_frame" section, not just the first
19159 one. */
19160 if (TARGET_64BIT
19161 && strcmp (name, ".eh_frame") == 0)
19162 {
19163 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
19164 flags & SECTION_WRITE ? "aw" : "a");
19165 return;
19166 }
19167 default_elf_asm_named_section (name, flags, decl);
19168 }
19169
19170 /* Return the mangling of TYPE if it is an extended fundamental type. */
19171
19172 static const char *
19173 ix86_mangle_fundamental_type (tree type)
19174 {
19175 switch (TYPE_MODE (type))
19176 {
19177 case TFmode:
19178 /* __float128 is "g". */
19179 return "g";
19180 case XFmode:
19181 /* "long double" or __float80 is "e". */
19182 return "e";
19183 default:
19184 return NULL;
19185 }
19186 }
19187
19188 /* For 32-bit code we can save PIC register setup by using
19189 __stack_chk_fail_local hidden function instead of calling
19190 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
19191 register, so it is better to call __stack_chk_fail directly. */
19192
19193 static tree
19194 ix86_stack_protect_fail (void)
19195 {
19196 return TARGET_64BIT
19197 ? default_external_stack_protect_fail ()
19198 : default_hidden_stack_protect_fail ();
19199 }
19200
19201 /* Select a format to encode pointers in exception handling data. CODE
19202 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
19203 true if the symbol may be affected by dynamic relocations.
19204
19205 ??? All x86 object file formats are capable of representing this.
19206 After all, the relocation needed is the same as for the call insn.
19207 Whether or not a particular assembler allows us to enter such, I
19208 guess we'll have to see. */
19209 int
19210 asm_preferred_eh_data_format (int code, int global)
19211 {
19212 if (flag_pic)
19213 {
19214 int type = DW_EH_PE_sdata8;
19215 if (!TARGET_64BIT
19216 || ix86_cmodel == CM_SMALL_PIC
19217 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
19218 type = DW_EH_PE_sdata4;
19219 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
19220 }
19221 if (ix86_cmodel == CM_SMALL
19222 || (ix86_cmodel == CM_MEDIUM && code))
19223 return DW_EH_PE_udata4;
19224 return DW_EH_PE_absptr;
19225 }
19226 \f
19227 /* Expand copysign from SIGN to the positive value ABS_VALUE
19228 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
19229 the sign-bit. */
19230 static void
19231 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
19232 {
19233 enum machine_mode mode = GET_MODE (sign);
19234 rtx sgn = gen_reg_rtx (mode);
19235 if (mask == NULL_RTX)
19236 {
19237 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
19238 if (!VECTOR_MODE_P (mode))
19239 {
19240 /* We need to generate a scalar mode mask in this case. */
19241 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
19242 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
19243 mask = gen_reg_rtx (mode);
19244 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
19245 }
19246 }
19247 else
19248 mask = gen_rtx_NOT (mode, mask);
19249 emit_insn (gen_rtx_SET (VOIDmode, sgn,
19250 gen_rtx_AND (mode, mask, sign)));
19251 emit_insn (gen_rtx_SET (VOIDmode, result,
19252 gen_rtx_IOR (mode, abs_value, sgn)));
19253 }
19254
19255 /* Expand fabs (OP0) and return a new rtx that holds the result. The
19256 mask for masking out the sign-bit is stored in *SMASK, if that is
19257 non-null. */
19258 static rtx
19259 ix86_expand_sse_fabs (rtx op0, rtx *smask)
19260 {
19261 enum machine_mode mode = GET_MODE (op0);
19262 rtx xa, mask;
19263
19264 xa = gen_reg_rtx (mode);
19265 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
19266 if (!VECTOR_MODE_P (mode))
19267 {
19268 /* We need to generate a scalar mode mask in this case. */
19269 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
19270 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
19271 mask = gen_reg_rtx (mode);
19272 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
19273 }
19274 emit_insn (gen_rtx_SET (VOIDmode, xa,
19275 gen_rtx_AND (mode, op0, mask)));
19276
19277 if (smask)
19278 *smask = mask;
19279
19280 return xa;
19281 }
19282
19283 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
19284 swapping the operands if SWAP_OPERANDS is true. The expanded
19285 code is a forward jump to a newly created label in case the
19286 comparison is true. The generated label rtx is returned. */
19287 static rtx
19288 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
19289 bool swap_operands)
19290 {
19291 rtx label, tmp;
19292
19293 if (swap_operands)
19294 {
19295 tmp = op0;
19296 op0 = op1;
19297 op1 = tmp;
19298 }
19299
19300 label = gen_label_rtx ();
19301 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
19302 emit_insn (gen_rtx_SET (VOIDmode, tmp,
19303 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
19304 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
19305 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19306 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
19307 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19308 JUMP_LABEL (tmp) = label;
19309
19310 return label;
19311 }
19312
19313 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
19314 using comparison code CODE. Operands are swapped for the comparison if
19315 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
19316 static rtx
19317 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
19318 bool swap_operands)
19319 {
19320 enum machine_mode mode = GET_MODE (op0);
19321 rtx mask = gen_reg_rtx (mode);
19322
19323 if (swap_operands)
19324 {
19325 rtx tmp = op0;
19326 op0 = op1;
19327 op1 = tmp;
19328 }
19329
19330 if (mode == DFmode)
19331 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
19332 gen_rtx_fmt_ee (code, mode, op0, op1)));
19333 else
19334 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
19335 gen_rtx_fmt_ee (code, mode, op0, op1)));
19336
19337 return mask;
19338 }
19339
19340 /* Generate and return a rtx of mode MODE for 2**n where n is the number
19341 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
19342 static rtx
19343 ix86_gen_TWO52 (enum machine_mode mode)
19344 {
19345 REAL_VALUE_TYPE TWO52r;
19346 rtx TWO52;
19347
19348 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
19349 TWO52 = const_double_from_real_value (TWO52r, mode);
19350 TWO52 = force_reg (mode, TWO52);
19351
19352 return TWO52;
19353 }
19354
19355 /* Expand SSE sequence for computing lround from OP1 storing
19356 into OP0. */
19357 void
19358 ix86_expand_lround (rtx op0, rtx op1)
19359 {
19360 /* C code for the stuff we're doing below:
19361 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
19362 return (long)tmp;
19363 */
19364 enum machine_mode mode = GET_MODE (op1);
19365 const struct real_format *fmt;
19366 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
19367 rtx adj;
19368
19369 /* load nextafter (0.5, 0.0) */
19370 fmt = REAL_MODE_FORMAT (mode);
19371 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
19372 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
19373
19374 /* adj = copysign (0.5, op1) */
19375 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
19376 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
19377
19378 /* adj = op1 + adj */
19379 expand_simple_binop (mode, PLUS, adj, op1, adj, 0, OPTAB_DIRECT);
19380
19381 /* op0 = (imode)adj */
19382 expand_fix (op0, adj, 0);
19383 }
19384
19385 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
19386 into OPERAND0. */
19387 void
19388 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
19389 {
19390 /* C code for the stuff we're doing below (for do_floor):
19391 xi = (long)op1;
19392 xi -= (double)xi > op1 ? 1 : 0;
19393 return xi;
19394 */
19395 enum machine_mode fmode = GET_MODE (op1);
19396 enum machine_mode imode = GET_MODE (op0);
19397 rtx ireg, freg, label;
19398
19399 /* reg = (long)op1 */
19400 ireg = gen_reg_rtx (imode);
19401 expand_fix (ireg, op1, 0);
19402
19403 /* freg = (double)reg */
19404 freg = gen_reg_rtx (fmode);
19405 expand_float (freg, ireg, 0);
19406
19407 /* ireg = (freg > op1) ? ireg - 1 : ireg */
19408 label = ix86_expand_sse_compare_and_jump (UNLE,
19409 freg, op1, !do_floor);
19410 expand_simple_binop (imode, do_floor ? MINUS : PLUS,
19411 ireg, const1_rtx, ireg, 0, OPTAB_DIRECT);
19412 emit_label (label);
19413 LABEL_NUSES (label) = 1;
19414
19415 emit_move_insn (op0, ireg);
19416 }
19417
19418 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
19419 result in OPERAND0. */
19420 void
19421 ix86_expand_rint (rtx operand0, rtx operand1)
19422 {
19423 /* C code for the stuff we're doing below:
19424 if (!isless (fabs (operand1), 2**52))
19425 return operand1;
19426 tmp = copysign (2**52, operand1);
19427 return operand1 + tmp - tmp;
19428 */
19429 enum machine_mode mode = GET_MODE (operand0);
19430 rtx res, xa, label, TWO52, mask;
19431
19432 res = gen_reg_rtx (mode);
19433 emit_move_insn (res, operand1);
19434
19435 /* xa = abs (operand1) */
19436 xa = ix86_expand_sse_fabs (res, &mask);
19437
19438 /* if (!isless (xa, TWO52)) goto label; */
19439 TWO52 = ix86_gen_TWO52 (mode);
19440 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19441
19442 ix86_sse_copysign_to_positive (TWO52, TWO52, res, mask);
19443
19444 expand_simple_binop (mode, PLUS, res, TWO52, res, 0, OPTAB_DIRECT);
19445 expand_simple_binop (mode, MINUS, res, TWO52, res, 0, OPTAB_DIRECT);
19446
19447 emit_label (label);
19448 LABEL_NUSES (label) = 1;
19449
19450 emit_move_insn (operand0, res);
19451 }
19452
19453 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
19454 into OPERAND0. */
19455 void
19456 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
19457 {
19458 /* C code for the stuff we expand below.
19459 double xa = fabs (x), x2;
19460 if (!isless (xa, TWO52))
19461 return x;
19462 xa = xa + TWO52 - TWO52;
19463 x2 = copysign (xa, x);
19464 Compensate. Floor:
19465 if (x2 > x)
19466 x2 -= 1;
19467 Compensate. Ceil:
19468 if (x2 < x)
19469 x2 += 1;
19470 return x2;
19471 */
19472 enum machine_mode mode = GET_MODE (operand0);
19473 rtx xa, TWO52, tmp, label, one, res, mask;
19474
19475 TWO52 = ix86_gen_TWO52 (mode);
19476
19477 /* Temporary for holding the result, initialized to the input
19478 operand to ease control flow. */
19479 res = gen_reg_rtx (mode);
19480 emit_move_insn (res, operand1);
19481
19482 /* xa = abs (operand1) */
19483 xa = ix86_expand_sse_fabs (res, &mask);
19484
19485 /* if (!isless (xa, TWO52)) goto label; */
19486 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19487
19488 /* xa = xa + TWO52 - TWO52; */
19489 expand_simple_binop (mode, PLUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
19490 expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
19491
19492 /* xa = copysign (xa, operand1) */
19493 ix86_sse_copysign_to_positive (xa, xa, res, mask);
19494
19495 /* generate 1.0 */
19496 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
19497
19498 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
19499 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
19500 emit_insn (gen_rtx_SET (VOIDmode, tmp,
19501 gen_rtx_AND (mode, one, tmp)));
19502 expand_simple_binop (mode, do_floor ? MINUS : PLUS,
19503 xa, tmp, res, 0, OPTAB_DIRECT);
19504
19505 emit_label (label);
19506 LABEL_NUSES (label) = 1;
19507
19508 emit_move_insn (operand0, res);
19509 }
19510
19511 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
19512 into OPERAND0. */
19513 void
19514 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
19515 {
19516 /* C code for the stuff we expand below.
19517 double xa = fabs (x), x2;
19518 if (!isless (xa, TWO52))
19519 return x;
19520 x2 = (double)(long)x;
19521 Compensate. Floor:
19522 if (x2 > x)
19523 x2 -= 1;
19524 Compensate. Ceil:
19525 if (x2 < x)
19526 x2 += 1;
19527 return x2;
19528 */
19529 enum machine_mode mode = GET_MODE (operand0);
19530 rtx xa, xi, TWO52, tmp, label, one, res;
19531
19532 TWO52 = ix86_gen_TWO52 (mode);
19533
19534 /* Temporary for holding the result, initialized to the input
19535 operand to ease control flow. */
19536 res = gen_reg_rtx (mode);
19537 emit_move_insn (res, operand1);
19538
19539 /* xa = abs (operand1) */
19540 xa = ix86_expand_sse_fabs (res, NULL);
19541
19542 /* if (!isless (xa, TWO52)) goto label; */
19543 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19544
19545 /* xa = (double)(long)x */
19546 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
19547 expand_fix (xi, res, 0);
19548 expand_float (xa, xi, 0);
19549
19550 /* generate 1.0 */
19551 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
19552
19553 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
19554 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
19555 emit_insn (gen_rtx_SET (VOIDmode, tmp,
19556 gen_rtx_AND (mode, one, tmp)));
19557 expand_simple_binop (mode, do_floor ? MINUS : PLUS,
19558 xa, tmp, res, 0, OPTAB_DIRECT);
19559
19560 emit_label (label);
19561 LABEL_NUSES (label) = 1;
19562
19563 emit_move_insn (operand0, res);
19564 }
19565
19566 #include "gt-i386.h"