swr: [rasterizer core] Globally cache allocated arena blocks for fast re-allocation.
[mesa.git] / src / gallium / drivers / swr / rasterizer / common / isa.hpp
1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 ****************************************************************************/
23
24 #pragma once
25
26 #include <iostream>
27 #include <vector>
28 #include <bitset>
29 #include <array>
30 #include <string>
31 #include <algorithm>
32
33 #if defined(_WIN32)
34 #include <intrin.h>
35 #else
36 #include <string.h>
37 #include <cpuid.h>
38 #endif
39
40 class InstructionSet
41 {
42 public:
43 InstructionSet() : CPU_Rep() {};
44
45 // getters
46 std::string Vendor(void) { return CPU_Rep.vendor_; }
47 std::string Brand(void) { return CPU_Rep.brand_; }
48
49 bool SSE3(void) { return CPU_Rep.f_1_ECX_[0]; }
50 bool PCLMULQDQ(void) { return CPU_Rep.f_1_ECX_[1]; }
51 bool MONITOR(void) { return CPU_Rep.f_1_ECX_[3]; }
52 bool SSSE3(void) { return CPU_Rep.f_1_ECX_[9]; }
53 bool FMA(void) { return CPU_Rep.f_1_ECX_[12]; }
54 bool CMPXCHG16B(void) { return CPU_Rep.f_1_ECX_[13]; }
55 bool SSE41(void) { return CPU_Rep.f_1_ECX_[19]; }
56 bool SSE42(void) { return CPU_Rep.f_1_ECX_[20]; }
57 bool MOVBE(void) { return CPU_Rep.f_1_ECX_[22]; }
58 bool POPCNT(void) { return CPU_Rep.f_1_ECX_[23]; }
59 bool AES(void) { return CPU_Rep.f_1_ECX_[25]; }
60 bool XSAVE(void) { return CPU_Rep.f_1_ECX_[26]; }
61 bool OSXSAVE(void) { return CPU_Rep.f_1_ECX_[27]; }
62 bool RDRAND(void) { return CPU_Rep.f_1_ECX_[30]; }
63
64 bool MSR(void) { return CPU_Rep.f_1_EDX_[5]; }
65 bool CX8(void) { return CPU_Rep.f_1_EDX_[8]; }
66 bool SEP(void) { return CPU_Rep.f_1_EDX_[11]; }
67 bool CMOV(void) { return CPU_Rep.f_1_EDX_[15]; }
68 bool CLFSH(void) { return CPU_Rep.f_1_EDX_[19]; }
69 bool MMX(void) { return CPU_Rep.f_1_EDX_[23]; }
70 bool FXSR(void) { return CPU_Rep.f_1_EDX_[24]; }
71 bool SSE(void) { return CPU_Rep.f_1_EDX_[25]; }
72 bool SSE2(void) { return CPU_Rep.f_1_EDX_[26]; }
73
74 bool FSGSBASE(void) { return CPU_Rep.f_7_EBX_[0]; }
75 bool BMI1(void) { return CPU_Rep.f_7_EBX_[3]; }
76 bool HLE(void) { return CPU_Rep.isIntel_ && CPU_Rep.f_7_EBX_[4]; }
77 bool BMI2(void) { return CPU_Rep.f_7_EBX_[8]; }
78 bool ERMS(void) { return CPU_Rep.f_7_EBX_[9]; }
79 bool INVPCID(void) { return CPU_Rep.f_7_EBX_[10]; }
80 bool RTM(void) { return CPU_Rep.isIntel_ && CPU_Rep.f_7_EBX_[11]; }
81 bool RDSEED(void) { return CPU_Rep.f_7_EBX_[18]; }
82 bool ADX(void) { return CPU_Rep.f_7_EBX_[19]; }
83 bool SHA(void) { return CPU_Rep.f_7_EBX_[29]; }
84
85 bool PREFETCHWT1(void) { return CPU_Rep.f_7_ECX_[0]; }
86
87 bool LAHF(void) { return CPU_Rep.f_81_ECX_[0]; }
88 bool LZCNT(void) { return CPU_Rep.isIntel_ && CPU_Rep.f_81_ECX_[5]; }
89 bool ABM(void) { return CPU_Rep.isAMD_ && CPU_Rep.f_81_ECX_[5]; }
90 bool SSE4a(void) { return CPU_Rep.isAMD_ && CPU_Rep.f_81_ECX_[6]; }
91 bool XOP(void) { return CPU_Rep.isAMD_ && CPU_Rep.f_81_ECX_[11]; }
92 bool TBM(void) { return CPU_Rep.isAMD_ && CPU_Rep.f_81_ECX_[21]; }
93
94 bool SYSCALL(void) { return CPU_Rep.isIntel_ && CPU_Rep.f_81_EDX_[11]; }
95 bool MMXEXT(void) { return CPU_Rep.isAMD_ && CPU_Rep.f_81_EDX_[22]; }
96 bool RDTSCP(void) { return CPU_Rep.isIntel_ && CPU_Rep.f_81_EDX_[27]; }
97 bool _3DNOWEXT(void) { return CPU_Rep.isAMD_ && CPU_Rep.f_81_EDX_[30]; }
98 bool _3DNOW(void) { return CPU_Rep.isAMD_ && CPU_Rep.f_81_EDX_[31]; }
99
100 bool AVX(void) { return CPU_Rep.f_1_ECX_[28]; }
101 bool F16C(void) { return CPU_Rep.f_1_ECX_[29]; }
102 bool AVX2(void) { return CPU_Rep.f_7_EBX_[5]; }
103 bool AVX512F(void) { return CPU_Rep.f_7_EBX_[16]; }
104 bool AVX512PF(void) { return CPU_Rep.f_7_EBX_[26]; }
105 bool AVX512ER(void) { return CPU_Rep.f_7_EBX_[27]; }
106 bool AVX512CD(void) { return CPU_Rep.f_7_EBX_[28]; }
107
108 private:
109 class InstructionSet_Internal
110 {
111 public:
112 InstructionSet_Internal()
113 : nIds_{ 0 },
114 nExIds_{ 0 },
115 isIntel_{ false },
116 isAMD_{ false },
117 f_1_ECX_{ 0 },
118 f_1_EDX_{ 0 },
119 f_7_EBX_{ 0 },
120 f_7_ECX_{ 0 },
121 f_81_ECX_{ 0 },
122 f_81_EDX_{ 0 },
123 data_{},
124 extdata_{}
125 {
126 //int cpuInfo[4] = {-1};
127 std::array<int, 4> cpui;
128
129 // Calling __cpuid with 0x0 as the function_id argument
130 // gets the number of the highest valid function ID.
131 #if defined(_WIN32)
132 __cpuid(cpui.data(), 0);
133 nIds_ = cpui[0];
134 #else
135 nIds_ = __get_cpuid_max(0, NULL);
136 #endif
137
138 for (int i = 0; i <= nIds_; ++i)
139 {
140 #if defined(_WIN32)
141 __cpuidex(cpui.data(), i, 0);
142 #else
143 int *data = cpui.data();
144 __cpuid_count(i, 0, data[0], data[1], data[2], data[3]);
145 #endif
146 data_.push_back(cpui);
147 }
148
149 // Capture vendor string
150 char vendor[0x20];
151 memset(vendor, 0, sizeof(vendor));
152 *reinterpret_cast<int*>(vendor) = data_[0][1];
153 *reinterpret_cast<int*>(vendor + 4) = data_[0][3];
154 *reinterpret_cast<int*>(vendor + 8) = data_[0][2];
155 vendor_ = vendor;
156 if (vendor_ == "GenuineIntel")
157 {
158 isIntel_ = true;
159 }
160 else if (vendor_ == "AuthenticAMD")
161 {
162 isAMD_ = true;
163 }
164
165 // load bitset with flags for function 0x00000001
166 if (nIds_ >= 1)
167 {
168 f_1_ECX_ = data_[1][2];
169 f_1_EDX_ = data_[1][3];
170 }
171
172 // load bitset with flags for function 0x00000007
173 if (nIds_ >= 7)
174 {
175 f_7_EBX_ = data_[7][1];
176 f_7_ECX_ = data_[7][2];
177 }
178
179 // Calling __cpuid with 0x80000000 as the function_id argument
180 // gets the number of the highest valid extended ID.
181 #if defined(_WIN32)
182 __cpuid(cpui.data(), 0x80000000);
183 nExIds_ = cpui[0];
184 #else
185 nExIds_ = __get_cpuid_max(0x80000000, NULL);
186 #endif
187
188 char brand[0x40];
189 memset(brand, 0, sizeof(brand));
190
191 for (unsigned i = 0x80000000; i <= nExIds_; ++i)
192 {
193 #if defined(_WIN32)
194 __cpuidex(cpui.data(), i, 0);
195 #else
196 int *data = cpui.data();
197 __cpuid_count(i, 0, data[0], data[1], data[2], data[3]);
198 #endif
199 extdata_.push_back(cpui);
200 }
201
202 // load bitset with flags for function 0x80000001
203 if (nExIds_ >= 0x80000001)
204 {
205 f_81_ECX_ = extdata_[1][2];
206 f_81_EDX_ = extdata_[1][3];
207 }
208
209 // Interpret CPU brand string if reported
210 if (nExIds_ >= 0x80000004)
211 {
212 memcpy(brand, extdata_[2].data(), sizeof(cpui));
213 memcpy(brand + 16, extdata_[3].data(), sizeof(cpui));
214 memcpy(brand + 32, extdata_[4].data(), sizeof(cpui));
215 brand_ = brand;
216 }
217 };
218
219 int nIds_;
220 unsigned nExIds_;
221 std::string vendor_;
222 std::string brand_;
223 bool isIntel_;
224 bool isAMD_;
225 std::bitset<32> f_1_ECX_;
226 std::bitset<32> f_1_EDX_;
227 std::bitset<32> f_7_EBX_;
228 std::bitset<32> f_7_ECX_;
229 std::bitset<32> f_81_ECX_;
230 std::bitset<32> f_81_EDX_;
231 std::vector<std::array<int, 4>> data_;
232 std::vector<std::array<int, 4>> extdata_;
233 };
234 const InstructionSet_Internal CPU_Rep;
235 };