X86: Move address based decode caching in front of the predecoder.
[gem5.git] / src / arch / x86 / decoder.cc
1 /*
2 * Copyright (c) 2011 Google
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * Authors: Gabe Black
29 */
30
31 #include "arch/x86/decoder.hh"
32 #include "arch/x86/regs/misc.hh"
33 #include "base/misc.hh"
34 #include "base/trace.hh"
35 #include "base/types.hh"
36 #include "cpu/thread_context.hh"
37 #include "debug/Decoder.hh"
38
39 namespace X86ISA
40 {
41
42 Decoder::State
43 Decoder::doResetState()
44 {
45 origPC = basePC + offset;
46 DPRINTF(Decoder, "Setting origPC to %#x\n", origPC);
47 instBytes = &decodePages->lookup(origPC);
48 chunkIdx = 0;
49
50 emi.rex = 0;
51 emi.legacy = 0;
52 emi.opcode.num = 0;
53 emi.opcode.op = 0;
54 emi.opcode.prefixA = emi.opcode.prefixB = 0;
55
56 immediateCollected = 0;
57 emi.immediate = 0;
58 emi.displacement = 0;
59 emi.dispSize = 0;
60
61 emi.modRM = 0;
62 emi.sib = 0;
63
64 if (instBytes->si) {
65 return FromCacheState;
66 } else {
67 instBytes->chunks.clear();
68 return PrefixState;
69 }
70 }
71
72 void
73 Decoder::process()
74 {
75 //This function drives the decoder state machine.
76
77 //Some sanity checks. You shouldn't try to process more bytes if
78 //there aren't any, and you shouldn't overwrite an already
79 //decoder ExtMachInst.
80 assert(!outOfBytes);
81 assert(!instDone);
82
83 if (state == ResetState)
84 state = doResetState();
85 if (state == FromCacheState) {
86 state = doFromCacheState();
87 } else {
88 instBytes->chunks.push_back(fetchChunk);
89 }
90
91 //While there's still something to do...
92 while (!instDone && !outOfBytes) {
93 uint8_t nextByte = getNextByte();
94 switch (state) {
95 case PrefixState:
96 state = doPrefixState(nextByte);
97 break;
98 case OpcodeState:
99 state = doOpcodeState(nextByte);
100 break;
101 case ModRMState:
102 state = doModRMState(nextByte);
103 break;
104 case SIBState:
105 state = doSIBState(nextByte);
106 break;
107 case DisplacementState:
108 state = doDisplacementState();
109 break;
110 case ImmediateState:
111 state = doImmediateState();
112 break;
113 case ErrorState:
114 panic("Went to the error state in the decoder.\n");
115 default:
116 panic("Unrecognized state! %d\n", state);
117 }
118 }
119 }
120
121 Decoder::State
122 Decoder::doFromCacheState()
123 {
124 DPRINTF(Decoder, "Looking at cache state.\n");
125 if ((fetchChunk & instBytes->masks[chunkIdx]) !=
126 instBytes->chunks[chunkIdx]) {
127 DPRINTF(Decoder, "Decode cache miss.\n");
128 // The chached chunks didn't match what was fetched. Fall back to the
129 // predecoder.
130 instBytes->chunks[chunkIdx] = fetchChunk;
131 instBytes->chunks.resize(chunkIdx + 1);
132 instBytes->si = NULL;
133 chunkIdx = 0;
134 fetchChunk = instBytes->chunks[0];
135 offset = origPC % sizeof(MachInst);
136 basePC = origPC - offset;
137 return PrefixState;
138 } else if (chunkIdx == instBytes->chunks.size() - 1) {
139 // We matched the cache, so use its value.
140 instDone = true;
141 offset = instBytes->lastOffset;
142 if (offset == sizeof(MachInst))
143 outOfBytes = true;
144 return ResetState;
145 } else {
146 // We matched so far, but need to check more chunks.
147 chunkIdx++;
148 outOfBytes = true;
149 return FromCacheState;
150 }
151 }
152
153 //Either get a prefix and record it in the ExtMachInst, or send the
154 //state machine on to get the opcode(s).
155 Decoder::State
156 Decoder::doPrefixState(uint8_t nextByte)
157 {
158 uint8_t prefix = Prefixes[nextByte];
159 State nextState = PrefixState;
160 // REX prefixes are only recognized in 64 bit mode.
161 if (prefix == RexPrefix && emi.mode.submode != SixtyFourBitMode)
162 prefix = 0;
163 if (prefix)
164 consumeByte();
165 switch(prefix)
166 {
167 //Operand size override prefixes
168 case OperandSizeOverride:
169 DPRINTF(Decoder, "Found operand size override prefix.\n");
170 emi.legacy.op = true;
171 break;
172 case AddressSizeOverride:
173 DPRINTF(Decoder, "Found address size override prefix.\n");
174 emi.legacy.addr = true;
175 break;
176 //Segment override prefixes
177 case CSOverride:
178 case DSOverride:
179 case ESOverride:
180 case FSOverride:
181 case GSOverride:
182 case SSOverride:
183 DPRINTF(Decoder, "Found segment override.\n");
184 emi.legacy.seg = prefix;
185 break;
186 case Lock:
187 DPRINTF(Decoder, "Found lock prefix.\n");
188 emi.legacy.lock = true;
189 break;
190 case Rep:
191 DPRINTF(Decoder, "Found rep prefix.\n");
192 emi.legacy.rep = true;
193 break;
194 case Repne:
195 DPRINTF(Decoder, "Found repne prefix.\n");
196 emi.legacy.repne = true;
197 break;
198 case RexPrefix:
199 DPRINTF(Decoder, "Found Rex prefix %#x.\n", nextByte);
200 emi.rex = nextByte;
201 break;
202 case 0:
203 nextState = OpcodeState;
204 break;
205 default:
206 panic("Unrecognized prefix %#x\n", nextByte);
207 }
208 return nextState;
209 }
210
211 //Load all the opcodes (currently up to 2) and then figure out
212 //what immediate and/or ModRM is needed.
213 Decoder::State
214 Decoder::doOpcodeState(uint8_t nextByte)
215 {
216 State nextState = ErrorState;
217 emi.opcode.num++;
218 //We can't handle 3+ byte opcodes right now
219 assert(emi.opcode.num < 4);
220 consumeByte();
221 if(emi.opcode.num == 1 && nextByte == 0x0f)
222 {
223 nextState = OpcodeState;
224 DPRINTF(Decoder, "Found two byte opcode.\n");
225 emi.opcode.prefixA = nextByte;
226 }
227 else if(emi.opcode.num == 2 && (nextByte == 0x38 || nextByte == 0x3A))
228 {
229 nextState = OpcodeState;
230 DPRINTF(Decoder, "Found three byte opcode.\n");
231 emi.opcode.prefixB = nextByte;
232 }
233 else
234 {
235 DPRINTF(Decoder, "Found opcode %#x.\n", nextByte);
236 emi.opcode.op = nextByte;
237
238 //Figure out the effective operand size. This can be overriden to
239 //a fixed value at the decoder level.
240 int logOpSize;
241 if (emi.rex.w)
242 logOpSize = 3; // 64 bit operand size
243 else if (emi.legacy.op)
244 logOpSize = altOp;
245 else
246 logOpSize = defOp;
247
248 //Set the actual op size
249 emi.opSize = 1 << logOpSize;
250
251 //Figure out the effective address size. This can be overriden to
252 //a fixed value at the decoder level.
253 int logAddrSize;
254 if(emi.legacy.addr)
255 logAddrSize = altAddr;
256 else
257 logAddrSize = defAddr;
258
259 //Set the actual address size
260 emi.addrSize = 1 << logAddrSize;
261
262 //Figure out the effective stack width. This can be overriden to
263 //a fixed value at the decoder level.
264 emi.stackSize = 1 << stack;
265
266 //Figure out how big of an immediate we'll retreive based
267 //on the opcode.
268 int immType = ImmediateType[emi.opcode.num - 1][nextByte];
269 if (emi.opcode.num == 1 && nextByte >= 0xA0 && nextByte <= 0xA3)
270 immediateSize = SizeTypeToSize[logAddrSize - 1][immType];
271 else
272 immediateSize = SizeTypeToSize[logOpSize - 1][immType];
273
274 //Determine what to expect next
275 if (UsesModRM[emi.opcode.num - 1][nextByte]) {
276 nextState = ModRMState;
277 } else {
278 if(immediateSize) {
279 nextState = ImmediateState;
280 } else {
281 instDone = true;
282 nextState = ResetState;
283 }
284 }
285 }
286 return nextState;
287 }
288
289 //Get the ModRM byte and determine what displacement, if any, there is.
290 //Also determine whether or not to get the SIB byte, displacement, or
291 //immediate next.
292 Decoder::State
293 Decoder::doModRMState(uint8_t nextByte)
294 {
295 State nextState = ErrorState;
296 ModRM modRM;
297 modRM = nextByte;
298 DPRINTF(Decoder, "Found modrm byte %#x.\n", nextByte);
299 if (defOp == 1) {
300 //figure out 16 bit displacement size
301 if ((modRM.mod == 0 && modRM.rm == 6) || modRM.mod == 2)
302 displacementSize = 2;
303 else if (modRM.mod == 1)
304 displacementSize = 1;
305 else
306 displacementSize = 0;
307 } else {
308 //figure out 32/64 bit displacement size
309 if ((modRM.mod == 0 && modRM.rm == 5) || modRM.mod == 2)
310 displacementSize = 4;
311 else if (modRM.mod == 1)
312 displacementSize = 1;
313 else
314 displacementSize = 0;
315 }
316
317 // The "test" instruction in group 3 needs an immediate, even though
318 // the other instructions with the same actual opcode don't.
319 if (emi.opcode.num == 1 && (modRM.reg & 0x6) == 0) {
320 if (emi.opcode.op == 0xF6)
321 immediateSize = 1;
322 else if (emi.opcode.op == 0xF7)
323 immediateSize = (emi.opSize == 8) ? 4 : emi.opSize;
324 }
325
326 //If there's an SIB, get that next.
327 //There is no SIB in 16 bit mode.
328 if (modRM.rm == 4 && modRM.mod != 3) {
329 // && in 32/64 bit mode)
330 nextState = SIBState;
331 } else if(displacementSize) {
332 nextState = DisplacementState;
333 } else if(immediateSize) {
334 nextState = ImmediateState;
335 } else {
336 instDone = true;
337 nextState = ResetState;
338 }
339 //The ModRM byte is consumed no matter what
340 consumeByte();
341 emi.modRM = modRM;
342 return nextState;
343 }
344
345 //Get the SIB byte. We don't do anything with it at this point, other
346 //than storing it in the ExtMachInst. Determine if we need to get a
347 //displacement or immediate next.
348 Decoder::State
349 Decoder::doSIBState(uint8_t nextByte)
350 {
351 State nextState = ErrorState;
352 emi.sib = nextByte;
353 DPRINTF(Decoder, "Found SIB byte %#x.\n", nextByte);
354 consumeByte();
355 if (emi.modRM.mod == 0 && emi.sib.base == 5)
356 displacementSize = 4;
357 if (displacementSize) {
358 nextState = DisplacementState;
359 } else if(immediateSize) {
360 nextState = ImmediateState;
361 } else {
362 instDone = true;
363 nextState = ResetState;
364 }
365 return nextState;
366 }
367
368 //Gather up the displacement, or at least as much of it
369 //as we can get.
370 Decoder::State
371 Decoder::doDisplacementState()
372 {
373 State nextState = ErrorState;
374
375 getImmediate(immediateCollected,
376 emi.displacement,
377 displacementSize);
378
379 DPRINTF(Decoder, "Collecting %d byte displacement, got %d bytes.\n",
380 displacementSize, immediateCollected);
381
382 if(displacementSize == immediateCollected) {
383 //Reset this for other immediates.
384 immediateCollected = 0;
385 //Sign extend the displacement
386 switch(displacementSize)
387 {
388 case 1:
389 emi.displacement = sext<8>(emi.displacement);
390 break;
391 case 2:
392 emi.displacement = sext<16>(emi.displacement);
393 break;
394 case 4:
395 emi.displacement = sext<32>(emi.displacement);
396 break;
397 default:
398 panic("Undefined displacement size!\n");
399 }
400 DPRINTF(Decoder, "Collected displacement %#x.\n",
401 emi.displacement);
402 if(immediateSize) {
403 nextState = ImmediateState;
404 } else {
405 instDone = true;
406 nextState = ResetState;
407 }
408
409 emi.dispSize = displacementSize;
410 }
411 else
412 nextState = DisplacementState;
413 return nextState;
414 }
415
416 //Gather up the immediate, or at least as much of it
417 //as we can get
418 Decoder::State
419 Decoder::doImmediateState()
420 {
421 State nextState = ErrorState;
422
423 getImmediate(immediateCollected,
424 emi.immediate,
425 immediateSize);
426
427 DPRINTF(Decoder, "Collecting %d byte immediate, got %d bytes.\n",
428 immediateSize, immediateCollected);
429
430 if(immediateSize == immediateCollected)
431 {
432 //Reset this for other immediates.
433 immediateCollected = 0;
434
435 //XXX Warning! The following is an observed pattern and might
436 //not always be true!
437
438 //Instructions which use 64 bit operands but 32 bit immediates
439 //need to have the immediate sign extended to 64 bits.
440 //Instructions which use true 64 bit immediates won't be
441 //affected, and instructions that use true 32 bit immediates
442 //won't notice.
443 switch(immediateSize)
444 {
445 case 4:
446 emi.immediate = sext<32>(emi.immediate);
447 break;
448 case 1:
449 emi.immediate = sext<8>(emi.immediate);
450 }
451
452 DPRINTF(Decoder, "Collected immediate %#x.\n",
453 emi.immediate);
454 instDone = true;
455 nextState = ResetState;
456 }
457 else
458 nextState = ImmediateState;
459 return nextState;
460 }
461
462 Decoder::InstBytes Decoder::dummy;
463 Decoder::InstCacheMap Decoder::instCacheMap;
464
465 StaticInstPtr
466 Decoder::decode(ExtMachInst mach_inst, Addr addr)
467 {
468 DecodeCache::InstMap::iterator iter = instMap->find(mach_inst);
469 if (iter != instMap->end())
470 return iter->second;
471
472 StaticInstPtr si = decodeInst(mach_inst);
473 (*instMap)[mach_inst] = si;
474 return si;
475 }
476
477 StaticInstPtr
478 Decoder::decode(PCState &nextPC)
479 {
480 if (!instDone)
481 return NULL;
482 instDone = false;
483 updateNPC(nextPC);
484
485 StaticInstPtr &si = instBytes->si;
486 if (si)
487 return si;
488
489 // We didn't match in the AddrMap, but we still populated an entry. Fix
490 // up its byte masks.
491 const int chunkSize = sizeof(MachInst);
492
493 instBytes->lastOffset = offset;
494
495 Addr firstBasePC = basePC - (instBytes->chunks.size() - 1) * chunkSize;
496 Addr firstOffset = origPC - firstBasePC;
497 Addr totalSize = instBytes->lastOffset - firstOffset +
498 (instBytes->chunks.size() - 1) * chunkSize;
499 int start = firstOffset;
500 instBytes->masks.clear();
501
502 while (totalSize) {
503 int end = start + totalSize;
504 end = (chunkSize < end) ? chunkSize : end;
505 int size = end - start;
506 int idx = instBytes->masks.size();
507
508 MachInst maskVal = mask(size * 8) << (start * 8);
509 assert(maskVal);
510
511 instBytes->masks.push_back(maskVal);
512 instBytes->chunks[idx] &= instBytes->masks[idx];
513 totalSize -= size;
514 start = 0;
515 }
516
517 si = decode(emi, origPC);
518 return si;
519 }
520
521 }