a69e50b8d981f6b7d62c10f48cfac5605b9a82c6
[gem5.git] / src / arch / x86 / fs_workload.cc
1 /*
2 * Copyright (c) 2007 The Hewlett-Packard Development Company
3 * Copyright (c) 2018 TU Dresden
4 * All rights reserved.
5 *
6 * The license below extends only to copyright in the software and shall
7 * not be construed as granting a license to any other intellectual
8 * property including but not limited to intellectual property relating
9 * to a hardware implementation of the functionality of the software
10 * licensed hereunder. You may use the software subject to the license
11 * terms below provided that you ensure that this notice is replicated
12 * unmodified and in its entirety in all distributions of the software,
13 * modified or unmodified, in source code or in binary form.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions are
17 * met: redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer;
19 * redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution;
22 * neither the name of the copyright holders nor the names of its
23 * contributors may be used to endorse or promote products derived from
24 * this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 #include "arch/x86/fs_workload.hh"
40
41 #include "arch/x86/bios/intelmp.hh"
42 #include "arch/x86/bios/smbios.hh"
43 #include "arch/x86/faults.hh"
44 #include "arch/x86/isa_traits.hh"
45 #include "base/loader/object_file.hh"
46 #include "cpu/thread_context.hh"
47 #include "params/X86FsWorkload.hh"
48 #include "sim/system.hh"
49
50 namespace X86ISA
51 {
52
53 FsWorkload::FsWorkload(const Params &p) : KernelWorkload(p),
54 smbiosTable(p.smbios_table),
55 mpFloatingPointer(p.intel_mp_pointer),
56 mpConfigTable(p.intel_mp_table),
57 rsdp(p.acpi_description_table_pointer)
58 {}
59
60 void
61 installSegDesc(ThreadContext *tc, SegmentRegIndex seg,
62 SegDescriptor desc, bool longmode)
63 {
64 bool honorBase = !longmode || seg == SEGMENT_REG_FS ||
65 seg == SEGMENT_REG_GS ||
66 seg == SEGMENT_REG_TSL ||
67 seg == SYS_SEGMENT_REG_TR;
68
69 SegAttr attr = 0;
70
71 attr.dpl = desc.dpl;
72 attr.unusable = 0;
73 attr.defaultSize = desc.d;
74 attr.longMode = desc.l;
75 attr.avl = desc.avl;
76 attr.granularity = desc.g;
77 attr.present = desc.p;
78 attr.system = desc.s;
79 attr.type = desc.type;
80 if (desc.s) {
81 if (desc.type.codeOrData) {
82 // Code segment
83 attr.expandDown = 0;
84 attr.readable = desc.type.r;
85 attr.writable = 0;
86 } else {
87 // Data segment
88 attr.expandDown = desc.type.e;
89 attr.readable = 1;
90 attr.writable = desc.type.w;
91 }
92 } else {
93 attr.readable = 1;
94 attr.writable = 1;
95 attr.expandDown = 0;
96 }
97
98 tc->setMiscReg(MISCREG_SEG_BASE(seg), desc.base);
99 tc->setMiscReg(MISCREG_SEG_EFF_BASE(seg), honorBase ? desc.base : 0);
100 tc->setMiscReg(MISCREG_SEG_LIMIT(seg), desc.limit);
101 tc->setMiscReg(MISCREG_SEG_ATTR(seg), (RegVal)attr);
102 }
103
104 void
105 FsWorkload::initState()
106 {
107 KernelWorkload::initState();
108
109 for (auto *tc: system->threads) {
110 X86ISA::InitInterrupt(0).invoke(tc);
111
112 if (tc->contextId() == 0) {
113 tc->activate();
114 } else {
115 // This is an application processor (AP). It should be initialized
116 // to look like only the BIOS POST has run on it and put then put
117 // it into a halted state.
118 tc->suspend();
119 }
120 }
121
122 fatal_if(!kernelObj, "No kernel to load.");
123
124 fatal_if(kernelObj->getArch() == Loader::I386,
125 "Loading a 32 bit x86 kernel is not supported.");
126
127 ThreadContext *tc = system->threads[0];
128 auto phys_proxy = system->physProxy;
129
130 // This is the boot strap processor (BSP). Initialize it to look like
131 // the boot loader has just turned control over to the 64 bit OS. We
132 // won't actually set up real mode or legacy protected mode descriptor
133 // tables because we aren't executing any code that would require
134 // them. We do, however toggle the control bits in the correct order
135 // while allowing consistency checks and the underlying mechansims
136 // just to be safe.
137
138 const int NumPDTs = 4;
139
140 const Addr PageMapLevel4 = 0x70000;
141 const Addr PageDirPtrTable = 0x71000;
142 const Addr PageDirTable[NumPDTs] =
143 {0x72000, 0x73000, 0x74000, 0x75000};
144 const Addr GDTBase = 0x76000;
145
146 const int PML4Bits = 9;
147 const int PDPTBits = 9;
148 const int PDTBits = 9;
149
150 /*
151 * Set up the gdt.
152 */
153 uint8_t numGDTEntries = 0;
154 // Place holder at selector 0
155 uint64_t nullDescriptor = 0;
156 phys_proxy.writeBlob(GDTBase + numGDTEntries * 8, &nullDescriptor, 8);
157 numGDTEntries++;
158
159 SegDescriptor initDesc = 0;
160 initDesc.type.codeOrData = 0; // code or data type
161 initDesc.type.c = 0; // conforming
162 initDesc.type.r = 1; // readable
163 initDesc.dpl = 0; // privilege
164 initDesc.p = 1; // present
165 initDesc.l = 1; // longmode - 64 bit
166 initDesc.d = 0; // operand size
167 initDesc.g = 1; // granularity
168 initDesc.s = 1; // system segment
169 initDesc.limit = 0xFFFFFFFF;
170 initDesc.base = 0;
171
172 // 64 bit code segment
173 SegDescriptor csDesc = initDesc;
174 csDesc.type.codeOrData = 1;
175 csDesc.dpl = 0;
176 // Because we're dealing with a pointer and I don't think it's
177 // guaranteed that there isn't anything in a nonvirtual class between
178 // it's beginning in memory and it's actual data, we'll use an
179 // intermediary.
180 uint64_t csDescVal = csDesc;
181 phys_proxy.writeBlob(GDTBase + numGDTEntries * 8, (&csDescVal), 8);
182
183 numGDTEntries++;
184
185 SegSelector cs = 0;
186 cs.si = numGDTEntries - 1;
187
188 tc->setMiscReg(MISCREG_CS, (RegVal)cs);
189
190 // 32 bit data segment
191 SegDescriptor dsDesc = initDesc;
192 dsDesc.type.e = 0;
193 dsDesc.type.w = 1;
194 dsDesc.d = 1;
195 dsDesc.baseHigh = 0;
196 dsDesc.baseLow = 0;
197
198 uint64_t dsDescVal = dsDesc;
199 phys_proxy.writeBlob(GDTBase + numGDTEntries * 8, (&dsDescVal), 8);
200
201 numGDTEntries++;
202
203 SegSelector ds = 0;
204 ds.si = numGDTEntries - 1;
205
206 tc->setMiscReg(MISCREG_DS, (RegVal)ds);
207 tc->setMiscReg(MISCREG_ES, (RegVal)ds);
208 tc->setMiscReg(MISCREG_FS, (RegVal)ds);
209 tc->setMiscReg(MISCREG_GS, (RegVal)ds);
210 tc->setMiscReg(MISCREG_SS, (RegVal)ds);
211
212 tc->setMiscReg(MISCREG_TSL, 0);
213 SegAttr ldtAttr = 0;
214 ldtAttr.unusable = 1;
215 tc->setMiscReg(MISCREG_TSL_ATTR, ldtAttr);
216 tc->setMiscReg(MISCREG_TSG_BASE, GDTBase);
217 tc->setMiscReg(MISCREG_TSG_LIMIT, 8 * numGDTEntries - 1);
218
219 SegDescriptor tssDesc = initDesc;
220 tssDesc.type = 0xB;
221 tssDesc.s = 0;
222
223 uint64_t tssDescVal = tssDesc;
224 phys_proxy.writeBlob(GDTBase + numGDTEntries * 8, (&tssDescVal), 8);
225
226 numGDTEntries++;
227
228 SegSelector tss = 0;
229 tss.si = numGDTEntries - 1;
230
231 tc->setMiscReg(MISCREG_TR, (RegVal)tss);
232 installSegDesc(tc, SYS_SEGMENT_REG_TR, tssDesc, true);
233
234 /*
235 * Identity map the first 4GB of memory. In order to map this region
236 * of memory in long mode, there needs to be one actual page map level
237 * 4 entry which points to one page directory pointer table which
238 * points to 4 different page directory tables which are full of two
239 * megabyte pages. All of the other entries in valid tables are set
240 * to indicate that they don't pertain to anything valid and will
241 * cause a fault if used.
242 */
243
244 // Put valid values in all of the various table entries which indicate
245 // that those entries don't point to further tables or pages. Then
246 // set the values of those entries which are needed.
247
248 // Page Map Level 4
249
250 // read/write, user, not present
251 uint64_t pml4e = htole<uint64_t>(0x6);
252 for (int offset = 0; offset < (1 << PML4Bits) * 8; offset += 8)
253 phys_proxy.writeBlob(PageMapLevel4 + offset, (&pml4e), 8);
254 // Point to the only PDPT
255 pml4e = htole<uint64_t>(0x7 | PageDirPtrTable);
256 phys_proxy.writeBlob(PageMapLevel4, (&pml4e), 8);
257
258 // Page Directory Pointer Table
259
260 // read/write, user, not present
261 uint64_t pdpe = htole<uint64_t>(0x6);
262 for (int offset = 0; offset < (1 << PDPTBits) * 8; offset += 8)
263 phys_proxy.writeBlob(PageDirPtrTable + offset, &pdpe, 8);
264 // Point to the PDTs
265 for (int table = 0; table < NumPDTs; table++) {
266 pdpe = htole<uint64_t>(0x7 | PageDirTable[table]);
267 phys_proxy.writeBlob(PageDirPtrTable + table * 8, &pdpe, 8);
268 }
269
270 // Page Directory Tables
271
272 Addr base = 0;
273 const Addr pageSize = 2 << 20;
274 for (int table = 0; table < NumPDTs; table++) {
275 for (int offset = 0; offset < (1 << PDTBits) * 8; offset += 8) {
276 // read/write, user, present, 4MB
277 uint64_t pdte = htole(0x87 | base);
278 phys_proxy.writeBlob(PageDirTable[table] + offset, &pdte, 8);
279 base += pageSize;
280 }
281 }
282
283 /*
284 * Transition from real mode all the way up to Long mode
285 */
286 CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
287 // Turn off paging.
288 cr0.pg = 0;
289 tc->setMiscReg(MISCREG_CR0, cr0);
290 // Turn on protected mode.
291 cr0.pe = 1;
292 tc->setMiscReg(MISCREG_CR0, cr0);
293
294 CR4 cr4 = tc->readMiscRegNoEffect(MISCREG_CR4);
295 // Turn on pae.
296 cr4.pae = 1;
297 tc->setMiscReg(MISCREG_CR4, cr4);
298
299 // Point to the page tables.
300 tc->setMiscReg(MISCREG_CR3, PageMapLevel4);
301
302 Efer efer = tc->readMiscRegNoEffect(MISCREG_EFER);
303 // Enable long mode.
304 efer.lme = 1;
305 tc->setMiscReg(MISCREG_EFER, efer);
306
307 // Start using longmode segments.
308 installSegDesc(tc, SEGMENT_REG_CS, csDesc, true);
309 installSegDesc(tc, SEGMENT_REG_DS, dsDesc, true);
310 installSegDesc(tc, SEGMENT_REG_ES, dsDesc, true);
311 installSegDesc(tc, SEGMENT_REG_FS, dsDesc, true);
312 installSegDesc(tc, SEGMENT_REG_GS, dsDesc, true);
313 installSegDesc(tc, SEGMENT_REG_SS, dsDesc, true);
314
315 // Activate long mode.
316 cr0.pg = 1;
317 tc->setMiscReg(MISCREG_CR0, cr0);
318
319 tc->pcState(kernelObj->entryPoint());
320
321 // We should now be in long mode. Yay!
322
323 Addr ebdaPos = 0xF0000;
324 Addr fixed, table;
325
326 // Write out the SMBios/DMI table.
327 writeOutSMBiosTable(ebdaPos, fixed, table);
328 ebdaPos += (fixed + table);
329 ebdaPos = roundUp(ebdaPos, 16);
330
331 // Write out the Intel MP Specification configuration table.
332 writeOutMPTable(ebdaPos, fixed, table);
333 ebdaPos += (fixed + table);
334 }
335
336 void
337 FsWorkload::writeOutSMBiosTable(Addr header,
338 Addr &headerSize, Addr &structSize, Addr table)
339 {
340 // If the table location isn't specified, just put it after the header.
341 // The header size as of the 2.5 SMBios specification is 0x1F bytes.
342 if (!table)
343 table = header + 0x1F;
344 smbiosTable->setTableAddr(table);
345
346 smbiosTable->writeOut(system->physProxy, header, headerSize, structSize);
347
348 // Do some bounds checking to make sure we at least didn't step on
349 // ourselves.
350 assert(header > table || header + headerSize <= table);
351 assert(table > header || table + structSize <= header);
352 }
353
354 void
355 FsWorkload::writeOutMPTable(Addr fp, Addr &fpSize, Addr &tableSize, Addr table)
356 {
357 // If the table location isn't specified and it exists, just put
358 // it after the floating pointer. The fp size as of the 1.4 Intel MP
359 // specification is 0x10 bytes.
360 if (mpConfigTable) {
361 if (!table)
362 table = fp + 0x10;
363 mpFloatingPointer->setTableAddr(table);
364 }
365
366 fpSize = mpFloatingPointer->writeOut(system->physProxy, fp);
367 if (mpConfigTable)
368 tableSize = mpConfigTable->writeOut(system->physProxy, table);
369 else
370 tableSize = 0;
371
372 // Do some bounds checking to make sure we at least didn't step on
373 // ourselves and the fp structure was the size we thought it was.
374 assert(fp > table || fp + fpSize <= table);
375 assert(table > fp || table + tableSize <= fp);
376 assert(fpSize == 0x10);
377 }
378
379 } // namespace X86ISA
380
381 X86ISA::FsWorkload *
382 X86FsWorkloadParams::create() const
383 {
384 return new X86ISA::FsWorkload(*this);
385 }