/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
* Copyright (c) 2007 The Hewlett-Packard Development Company
* All rights reserved.
*
#include "arch/x86/regs/segment.hh"
#include "arch/x86/isa_traits.hh"
#include "arch/x86/process.hh"
+#include "arch/x86/system.hh"
#include "arch/x86/types.hh"
#include "base/loader/elf_object.hh"
#include "base/loader/object_file.hh"
#include "base/trace.hh"
#include "cpu/thread_context.hh"
#include "debug/Stack.hh"
+#include "mem/multi_level_page_table.hh"
#include "mem/page_table.hh"
#include "sim/process_impl.hh"
#include "sim/syscall_emul.hh"
initVirtMem.writeBlob(vsyscallPage.base + vsyscallPage.vgettimeofdayOffset,
vgettimeofdayBlob, sizeof(vgettimeofdayBlob));
- for (int i = 0; i < contextIds.size(); i++) {
- ThreadContext * tc = system->getThreadContext(contextIds[i]);
-
- SegAttr dataAttr = 0;
- dataAttr.dpl = 3;
- dataAttr.unusable = 0;
- dataAttr.defaultSize = 1;
- dataAttr.longMode = 1;
- dataAttr.avl = 0;
- dataAttr.granularity = 1;
- dataAttr.present = 1;
- dataAttr.type = 3;
- dataAttr.writable = 1;
- dataAttr.readable = 1;
- dataAttr.expandDown = 0;
- dataAttr.system = 1;
-
- //Initialize the segment registers.
- for(int seg = 0; seg < NUM_SEGMENTREGS; seg++) {
- tc->setMiscRegNoEffect(MISCREG_SEG_BASE(seg), 0);
- tc->setMiscRegNoEffect(MISCREG_SEG_EFF_BASE(seg), 0);
- tc->setMiscRegNoEffect(MISCREG_SEG_ATTR(seg), dataAttr);
+ if (kvmInSE) {
+ PortProxy physProxy = system->physProxy;
+
+ /*
+ * Set up the gdt.
+ */
+ uint8_t numGDTEntries = 0;
+ uint64_t nullDescriptor = 0;
+ physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8,
+ (uint8_t *)(&nullDescriptor), 8);
+ numGDTEntries++;
+
+ SegDescriptor initDesc = 0;
+ initDesc.type.codeOrData = 0; // code or data type
+ initDesc.type.c = 0; // conforming
+ initDesc.type.r = 1; // readable
+ initDesc.dpl = 0; // privilege
+ initDesc.p = 1; // present
+ initDesc.l = 1; // longmode - 64 bit
+ initDesc.d = 0; // operand size
+ initDesc.g = 1; // granularity
+ initDesc.s = 1; // system segment
+ initDesc.limitHigh = 0xFFFF;
+ initDesc.limitLow = 0xF;
+ initDesc.baseHigh = 0x0;
+ initDesc.baseLow = 0x0;
+
+ //64 bit code segment
+ SegDescriptor csLowPLDesc = initDesc;
+ csLowPLDesc.type.codeOrData = 1;
+ csLowPLDesc.dpl = 0;
+ uint64_t csLowPLDescVal = csLowPLDesc;
+ physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8,
+ (uint8_t *)(&csLowPLDescVal), 8);
+
+ numGDTEntries++;
+
+ SegSelector csLowPL = 0;
+ csLowPL.si = numGDTEntries - 1;
+ csLowPL.rpl = 0;
+
+ //64 bit data segment
+ SegDescriptor dsLowPLDesc = initDesc;
+ dsLowPLDesc.type.codeOrData = 0;
+ dsLowPLDesc.dpl = 0;
+ uint64_t dsLowPLDescVal = dsLowPLDesc;
+ physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8,
+ (uint8_t *)(&dsLowPLDescVal), 8);
+
+ numGDTEntries++;
+
+ SegSelector dsLowPL = 0;
+ dsLowPL.si = numGDTEntries - 1;
+ dsLowPL.rpl = 0;
+
+ //64 bit data segment
+ SegDescriptor dsDesc = initDesc;
+ dsDesc.type.codeOrData = 0;
+ dsDesc.dpl = 3;
+ uint64_t dsDescVal = dsDesc;
+ physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8,
+ (uint8_t *)(&dsDescVal), 8);
+
+ numGDTEntries++;
+
+ SegSelector ds = 0;
+ ds.si = numGDTEntries - 1;
+ ds.rpl = 3;
+
+ //64 bit code segment
+ SegDescriptor csDesc = initDesc;
+ csDesc.type.codeOrData = 1;
+ csDesc.dpl = 3;
+ uint64_t csDescVal = csDesc;
+ physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8,
+ (uint8_t *)(&csDescVal), 8);
+
+ numGDTEntries++;
+
+ SegSelector cs = 0;
+ cs.si = numGDTEntries - 1;
+ cs.rpl = 3;
+
+ SegSelector scall = 0;
+ scall.si = csLowPL.si;
+ scall.rpl = 0;
+
+ SegSelector sret = 0;
+ sret.si = dsLowPL.si;
+ sret.rpl = 3;
+
+ /* In long mode the TSS has been extended to 16 Bytes */
+ TSSlow TSSDescLow = 0;
+ TSSDescLow.type = 0xB;
+ TSSDescLow.dpl = 0; // Privelege level 0
+ TSSDescLow.p = 1; // Present
+ TSSDescLow.g = 1; // Page granularity
+ TSSDescLow.limitHigh = 0xF;
+ TSSDescLow.limitLow = 0xFFFF;
+ TSSDescLow.baseLow = (((uint32_t)TSSVirtAddr) << 8) >> 8;
+ TSSDescLow.baseHigh = (uint8_t)(((uint32_t)TSSVirtAddr) >> 24);
+
+ TSShigh TSSDescHigh = 0;
+ TSSDescHigh.base = (uint32_t)(TSSVirtAddr >> 32);
+
+ struct TSSDesc {
+ uint64_t low;
+ uint64_t high;
+ } tssDescVal = {TSSDescLow, TSSDescHigh};
+
+ physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8,
+ (uint8_t *)(&tssDescVal), sizeof(tssDescVal));
+
+ numGDTEntries++;
+
+ SegSelector tssSel = 0;
+ tssSel.si = numGDTEntries - 1;
+
+ uint64_t tss_base_addr = (TSSDescHigh.base << 32) | ((TSSDescLow.baseHigh << 24) | TSSDescLow.baseLow);
+ uint64_t tss_limit = TSSDescLow.limitLow | (TSSDescLow.limitHigh << 16);
+
+ SegAttr tss_attr = 0;
+
+ tss_attr.type = TSSDescLow.type;
+ tss_attr.dpl = TSSDescLow.dpl;
+ tss_attr.present = TSSDescLow.p;
+ tss_attr.granularity = TSSDescLow.g;
+ tss_attr.unusable = 0;
+
+ for (int i = 0; i < contextIds.size(); i++) {
+ ThreadContext * tc = system->getThreadContext(contextIds[i]);
+
+ tc->setMiscReg(MISCREG_CS, (MiscReg)cs);
+ tc->setMiscReg(MISCREG_DS, (MiscReg)ds);
+ tc->setMiscReg(MISCREG_ES, (MiscReg)ds);
+ tc->setMiscReg(MISCREG_FS, (MiscReg)ds);
+ tc->setMiscReg(MISCREG_GS, (MiscReg)ds);
+ tc->setMiscReg(MISCREG_SS, (MiscReg)ds);
+
+ // LDT
+ tc->setMiscReg(MISCREG_TSL, 0);
+ SegAttr tslAttr = 0;
+ tslAttr.present = 1;
+ tslAttr.type = 2;
+ tc->setMiscReg(MISCREG_TSL_ATTR, tslAttr);
+
+ tc->setMiscReg(MISCREG_TSG_BASE, GDTVirtAddr);
+ tc->setMiscReg(MISCREG_TSG_LIMIT, 8 * numGDTEntries - 1);
+
+ tc->setMiscReg(MISCREG_TR, (MiscReg)tssSel);
+ tc->setMiscReg(MISCREG_SEG_BASE(SYS_SEGMENT_REG_TR), tss_base_addr);
+ tc->setMiscReg(MISCREG_SEG_EFF_BASE(SYS_SEGMENT_REG_TR), 0);
+ tc->setMiscReg(MISCREG_SEG_LIMIT(SYS_SEGMENT_REG_TR), tss_limit);
+ tc->setMiscReg(MISCREG_SEG_ATTR(SYS_SEGMENT_REG_TR), (MiscReg)tss_attr);
+
+ //Start using longmode segments.
+ installSegDesc(tc, SEGMENT_REG_CS, csDesc, true);
+ installSegDesc(tc, SEGMENT_REG_DS, dsDesc, true);
+ installSegDesc(tc, SEGMENT_REG_ES, dsDesc, true);
+ installSegDesc(tc, SEGMENT_REG_FS, dsDesc, true);
+ installSegDesc(tc, SEGMENT_REG_GS, dsDesc, true);
+ installSegDesc(tc, SEGMENT_REG_SS, dsDesc, true);
+
+ Efer efer = 0;
+ efer.sce = 1; // Enable system call extensions.
+ efer.lme = 1; // Enable long mode.
+ efer.lma = 1; // Activate long mode.
+ efer.nxe = 0; // Enable nx support.
+ efer.svme = 1; // Enable svm support for now.
+ efer.ffxsr = 0; // Turn on fast fxsave and fxrstor.
+ tc->setMiscReg(MISCREG_EFER, efer);
+
+ //Set up the registers that describe the operating mode.
+ CR0 cr0 = 0;
+ cr0.pg = 1; // Turn on paging.
+ cr0.cd = 0; // Don't disable caching.
+ cr0.nw = 0; // This is bit is defined to be ignored.
+ cr0.am = 1; // No alignment checking
+ cr0.wp = 1; // Supervisor mode can write read only pages
+ cr0.ne = 1;
+ cr0.et = 1; // This should always be 1
+ cr0.ts = 0; // We don't do task switching, so causing fp exceptions
+ // would be pointless.
+ cr0.em = 0; // Allow x87 instructions to execute natively.
+ cr0.mp = 1; // This doesn't really matter, but the manual suggests
+ // setting it to one.
+ cr0.pe = 1; // We're definitely in protected mode.
+ tc->setMiscReg(MISCREG_CR0, cr0);
+
+ CR0 cr2 = 0;
+ tc->setMiscReg(MISCREG_CR2, cr2);
+
+ CR3 cr3 = pageTablePhysAddr;
+ tc->setMiscReg(MISCREG_CR3, cr3);
+
+ CR4 cr4 = 0;
+ //Turn on pae.
+ cr4.osxsave = 1; // Enable XSAVE and Proc Extended States
+ cr4.osxmmexcpt = 1; // Operating System Unmasked Exception
+ cr4.osfxsr = 1; // Operating System FXSave/FSRSTOR Support
+ cr4.pce = 0; // Performance-Monitoring Counter Enable
+ cr4.pge = 0; // Page-Global Enable
+ cr4.mce = 0; // Machine Check Enable
+ cr4.pae = 1; // Physical-Address Extension
+ cr4.pse = 0; // Page Size Extensions
+ cr4.de = 0; // Debugging Extensions
+ cr4.tsd = 0; // Time Stamp Disable
+ cr4.pvi = 0; // Protected-Mode Virtual Interrupts
+ cr4.vme = 0; // Virtual-8086 Mode Extensions
+
+ tc->setMiscReg(MISCREG_CR4, cr4);
+
+ CR4 cr8 = 0;
+ tc->setMiscReg(MISCREG_CR8, cr8);
+
+ const Addr PageMapLevel4 = pageTablePhysAddr;
+ //Point to the page tables.
+ tc->setMiscReg(MISCREG_CR3, PageMapLevel4);
+
+ tc->setMiscReg(MISCREG_MXCSR, 0x1f80);
+
+ tc->setMiscReg(MISCREG_APIC_BASE, 0xfee00900);
+
+ tc->setMiscReg(MISCREG_SEG_BASE(MISCREG_TSG - MISCREG_SEG_SEL_BASE), GDTVirtAddr);
+ tc->setMiscReg(MISCREG_SEG_LIMIT(MISCREG_TSG - MISCREG_SEG_SEL_BASE), 0xffff);
+
+ tc->setMiscReg(MISCREG_SEG_BASE(MISCREG_IDTR - MISCREG_SEG_SEL_BASE), IDTVirtAddr);
+ tc->setMiscReg(MISCREG_SEG_LIMIT(MISCREG_IDTR - MISCREG_SEG_SEL_BASE), 0xffff);
+
+ /* enabling syscall and sysret */
+ MiscReg star = ((MiscReg)sret << 48) | ((MiscReg)scall << 32);
+ tc->setMiscReg(MISCREG_STAR, star);
+ MiscReg lstar = (MiscReg) syscallCodeVirtAddr;
+ tc->setMiscReg(MISCREG_LSTAR, lstar);
+ MiscReg sfmask = (1<<8) | (1<<10); // TF | DF
+ tc->setMiscReg(MISCREG_SF_MASK, sfmask);
}
- SegAttr csAttr = 0;
- csAttr.dpl = 3;
- csAttr.unusable = 0;
- csAttr.defaultSize = 0;
- csAttr.longMode = 1;
- csAttr.avl = 0;
- csAttr.granularity = 1;
- csAttr.present = 1;
- csAttr.type = 10;
- csAttr.writable = 0;
- csAttr.readable = 1;
- csAttr.expandDown = 0;
- csAttr.system = 1;
-
- tc->setMiscRegNoEffect(MISCREG_CS_ATTR, csAttr);
-
- Efer efer = 0;
- efer.sce = 1; // Enable system call extensions.
- efer.lme = 1; // Enable long mode.
- efer.lma = 1; // Activate long mode.
- efer.nxe = 1; // Enable nx support.
- efer.svme = 0; // Disable svm support for now. It isn't implemented.
- efer.ffxsr = 1; // Turn on fast fxsave and fxrstor.
- tc->setMiscReg(MISCREG_EFER, efer);
-
- //Set up the registers that describe the operating mode.
- CR0 cr0 = 0;
- cr0.pg = 1; // Turn on paging.
- cr0.cd = 0; // Don't disable caching.
- cr0.nw = 0; // This is bit is defined to be ignored.
- cr0.am = 0; // No alignment checking
- cr0.wp = 0; // Supervisor mode can write read only pages
- cr0.ne = 1;
- cr0.et = 1; // This should always be 1
- cr0.ts = 0; // We don't do task switching, so causing fp exceptions
- // would be pointless.
- cr0.em = 0; // Allow x87 instructions to execute natively.
- cr0.mp = 1; // This doesn't really matter, but the manual suggests
- // setting it to one.
- cr0.pe = 1; // We're definitely in protected mode.
- tc->setMiscReg(MISCREG_CR0, cr0);
-
- tc->setMiscReg(MISCREG_MXCSR, 0x1f80);
+ /*
+ * Setting up the content of the TSS
+ * and writting it to physical memory
+ */
+
+ struct {
+ uint32_t reserved0; // +00h
+ uint32_t RSP0_low; // +04h
+ uint32_t RSP0_high; // +08h
+ uint32_t RSP1_low; // +0Ch
+ uint32_t RSP1_high; // +10h
+ uint32_t RSP2_low; // +14h
+ uint32_t RSP2_high; // +18h
+ uint32_t reserved1; // +1Ch
+ uint32_t reserved2; // +20h
+ uint32_t IST1_low; // +24h
+ uint32_t IST1_high; // +28h
+ uint32_t IST2_low; // +2Ch
+ uint32_t IST2_high; // +30h
+ uint32_t IST3_low; // +34h
+ uint32_t IST3_high; // +38h
+ uint32_t IST4_low; // +3Ch
+ uint32_t IST4_high; // +40h
+ uint32_t IST5_low; // +44h
+ uint32_t IST5_high; // +48h
+ uint32_t IST6_low; // +4Ch
+ uint32_t IST6_high; // +50h
+ uint32_t IST7_low; // +54h
+ uint32_t IST7_high; // +58h
+ uint32_t reserved3; // +5Ch
+ uint32_t reserved4; // +60h
+ uint16_t reserved5; // +64h
+ uint16_t IO_MapBase; // +66h
+ } tss;
+
+ /** setting Interrupt Stack Table */
+ uint64_t IST_start = ISTVirtAddr + PageBytes;
+ tss.IST1_low = (uint32_t)IST_start;
+ tss.IST1_high = (uint32_t)(IST_start >> 32);
+ tss.RSP0_low = tss.IST1_low;
+ tss.RSP0_high = tss.IST1_high;
+ tss.RSP1_low = tss.IST1_low;
+ tss.RSP1_high = tss.IST1_high;
+ tss.RSP2_low = tss.IST1_low;
+ tss.RSP2_high = tss.IST1_high;
+ physProxy.writeBlob(TSSPhysAddr, (uint8_t *)(&tss), sizeof(tss));
+
+ /* Setting IDT gates */
+ GateDescriptorLow PFGateLow = 0;
+ PFGateLow.offsetHigh = (uint16_t)((uint32_t)PFHandlerVirtAddr >> 16);
+ PFGateLow.offsetLow = (uint16_t)PFHandlerVirtAddr;
+ PFGateLow.selector = (MiscReg)csLowPL;
+ PFGateLow.p = 1;
+ PFGateLow.dpl = 0;
+ PFGateLow.type = 0xe; // gate interrupt type
+ PFGateLow.IST = 0; // setting IST to 0 and using RSP0
+
+ GateDescriptorHigh PFGateHigh = 0;
+ PFGateHigh.offset = (uint32_t)(PFHandlerVirtAddr >> 32);
+
+ struct {
+ uint64_t low;
+ uint64_t high;
+ } PFGate = {PFGateLow, PFGateHigh};
+
+ physProxy.writeBlob(IDTPhysAddr + 0xE0,
+ (uint8_t *)(&PFGate), sizeof(PFGate));
+
+ /** System call handler */
+ uint8_t syscallBlob[] = {
+ 0x48,0xa3,0x00,0x60,0x00,
+ 0x00,0x00,0xc9,0xff,0xff, // mov %rax, (0xffffc90000005600)
+ 0x48,0x0f,0x07, // sysret
+ };
+
+ physProxy.writeBlob(syscallCodePhysAddr,
+ syscallBlob, sizeof(syscallBlob));
+
+ /** Page fault handler */
+ uint8_t faultBlob[] = {
+ 0x48,0xa3,0x00,0x61,0x00,
+ 0x00,0x00,0xc9,0xff,0xff, // mov %rax, (0xffffc90000005700)
+ 0x48,0x83,0xc4,0x08, // add $0x8,%rsp # skip error
+ 0x48,0xcf, // iretq
+ };
+
+ physProxy.writeBlob(PFHandlerPhysAddr, faultBlob, sizeof(faultBlob));
+
+ MultiLevelPageTable<PageTableOps> *pt =
+ dynamic_cast<MultiLevelPageTable<PageTableOps> *>(pTable);
+
+ /* Syscall handler */
+ pt->map(syscallCodeVirtAddr, syscallCodePhysAddr, PageBytes, false);
+ /* GDT */
+ pt->map(GDTVirtAddr, GDTPhysAddr, PageBytes, false);
+ /* IDT */
+ pt->map(IDTVirtAddr, IDTPhysAddr, PageBytes, false);
+ /* TSS */
+ pt->map(TSSVirtAddr, TSSPhysAddr, PageBytes, false);
+ /* IST */
+ pt->map(ISTVirtAddr, ISTPhysAddr, PageBytes, false);
+ /* PF handler */
+ pt->map(PFHandlerVirtAddr, PFHandlerPhysAddr, PageBytes, false);
+ /* MMIO region for m5ops */
+ pt->map(MMIORegionVirtAddr, MMIORegionPhysAddr, 16*PageBytes, false);
+ } else {
+ for (int i = 0; i < contextIds.size(); i++) {
+ ThreadContext * tc = system->getThreadContext(contextIds[i]);
+
+ SegAttr dataAttr = 0;
+ dataAttr.dpl = 3;
+ dataAttr.unusable = 0;
+ dataAttr.defaultSize = 1;
+ dataAttr.longMode = 1;
+ dataAttr.avl = 0;
+ dataAttr.granularity = 1;
+ dataAttr.present = 1;
+ dataAttr.type = 3;
+ dataAttr.writable = 1;
+ dataAttr.readable = 1;
+ dataAttr.expandDown = 0;
+ dataAttr.system = 1;
+
+ //Initialize the segment registers.
+ for(int seg = 0; seg < NUM_SEGMENTREGS; seg++) {
+ tc->setMiscRegNoEffect(MISCREG_SEG_BASE(seg), 0);
+ tc->setMiscRegNoEffect(MISCREG_SEG_EFF_BASE(seg), 0);
+ tc->setMiscRegNoEffect(MISCREG_SEG_ATTR(seg), dataAttr);
+ }
+
+ SegAttr csAttr = 0;
+ csAttr.dpl = 3;
+ csAttr.unusable = 0;
+ csAttr.defaultSize = 0;
+ csAttr.longMode = 1;
+ csAttr.avl = 0;
+ csAttr.granularity = 1;
+ csAttr.present = 1;
+ csAttr.type = 10;
+ csAttr.writable = 0;
+ csAttr.readable = 1;
+ csAttr.expandDown = 0;
+ csAttr.system = 1;
+
+ tc->setMiscRegNoEffect(MISCREG_CS_ATTR, csAttr);
+
+ Efer efer = 0;
+ efer.sce = 1; // Enable system call extensions.
+ efer.lme = 1; // Enable long mode.
+ efer.lma = 1; // Activate long mode.
+ efer.nxe = 1; // Enable nx support.
+ efer.svme = 0; // Disable svm support for now. It isn't implemented.
+ efer.ffxsr = 1; // Turn on fast fxsave and fxrstor.
+ tc->setMiscReg(MISCREG_EFER, efer);
+
+ //Set up the registers that describe the operating mode.
+ CR0 cr0 = 0;
+ cr0.pg = 1; // Turn on paging.
+ cr0.cd = 0; // Don't disable caching.
+ cr0.nw = 0; // This is bit is defined to be ignored.
+ cr0.am = 0; // No alignment checking
+ cr0.wp = 0; // Supervisor mode can write read only pages
+ cr0.ne = 1;
+ cr0.et = 1; // This should always be 1
+ cr0.ts = 0; // We don't do task switching, so causing fp exceptions
+ // would be pointless.
+ cr0.em = 0; // Allow x87 instructions to execute natively.
+ cr0.mp = 1; // This doesn't really matter, but the manual suggests
+ // setting it to one.
+ cr0.pe = 1; // We're definitely in protected mode.
+ tc->setMiscReg(MISCREG_CR0, cr0);
+
+ tc->setMiscReg(MISCREG_MXCSR, 0x1f80);
+ }
}
}
stack_min = stack_base - space_needed;
stack_min = roundDown(stack_min, align);
- stack_size = stack_base - stack_min;
+ stack_size = roundUp(stack_base - stack_min, pageSize);
// map memory
- allocateMem(roundDown(stack_min, pageSize), roundUp(stack_size, pageSize));
+ Addr stack_end = roundDown(stack_base - stack_size, pageSize);
+
+ DPRINTF(Stack, "Mapping the stack: 0x%x %dB\n", stack_end, stack_size);
+ allocateMem(stack_end, stack_size);
// map out initial stack contents
IntType sentry_base = stack_base - sentry_size;