2 * Copyright © 2013 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 #include "gen_device_info.h"
27 #include "compiler/shader_enums.h"
29 static const struct gen_device_info gen_device_info_i965
= {
31 .has_negative_rhw_bug
= true,
35 .max_wm_threads
= 8 * 4,
41 static const struct gen_device_info gen_device_info_g4x
= {
45 .has_surface_tile_offset
= true,
50 .max_wm_threads
= 10 * 5,
56 static const struct gen_device_info gen_device_info_ilk
= {
60 .has_surface_tile_offset
= true,
64 .max_wm_threads
= 12 * 6,
70 static const struct gen_device_info gen_device_info_snb_gt1
= {
73 .has_hiz_and_separate_stencil
= true,
76 .has_surface_tile_offset
= true,
77 .needs_unlit_centroid_workaround
= true,
80 .max_gs_threads
= 21, /* conservative; 24 if rendering disabled. */
85 [MESA_SHADER_VERTEX
] = 24,
88 [MESA_SHADER_VERTEX
] = 256,
89 [MESA_SHADER_GEOMETRY
] = 256,
94 static const struct gen_device_info gen_device_info_snb_gt2
= {
97 .has_hiz_and_separate_stencil
= true,
100 .has_surface_tile_offset
= true,
101 .needs_unlit_centroid_workaround
= true,
103 .max_vs_threads
= 60,
104 .max_gs_threads
= 60,
105 .max_wm_threads
= 80,
109 [MESA_SHADER_VERTEX
] = 24,
112 [MESA_SHADER_VERTEX
] = 256,
113 [MESA_SHADER_GEOMETRY
] = 256,
118 #define GEN7_FEATURES \
120 .has_hiz_and_separate_stencil = true, \
121 .must_use_separate_stencil = true, \
124 .has_surface_tile_offset = true
126 static const struct gen_device_info gen_device_info_ivb_gt1
= {
127 GEN7_FEATURES
, .is_ivybridge
= true, .gt
= 1,
129 .max_vs_threads
= 36,
130 .max_tcs_threads
= 36,
131 .max_tes_threads
= 36,
132 .max_gs_threads
= 36,
133 .max_wm_threads
= 48,
134 .max_cs_threads
= 36,
138 [MESA_SHADER_VERTEX
] = 32,
139 [MESA_SHADER_TESS_EVAL
] = 10,
142 [MESA_SHADER_VERTEX
] = 512,
143 [MESA_SHADER_TESS_CTRL
] = 32,
144 [MESA_SHADER_TESS_EVAL
] = 288,
145 [MESA_SHADER_GEOMETRY
] = 192,
150 static const struct gen_device_info gen_device_info_ivb_gt2
= {
151 GEN7_FEATURES
, .is_ivybridge
= true, .gt
= 2,
153 .max_vs_threads
= 128,
154 .max_tcs_threads
= 128,
155 .max_tes_threads
= 128,
156 .max_gs_threads
= 128,
157 .max_wm_threads
= 172,
158 .max_cs_threads
= 64,
162 [MESA_SHADER_VERTEX
] = 32,
163 [MESA_SHADER_TESS_EVAL
] = 10,
166 [MESA_SHADER_VERTEX
] = 704,
167 [MESA_SHADER_TESS_CTRL
] = 64,
168 [MESA_SHADER_TESS_EVAL
] = 448,
169 [MESA_SHADER_GEOMETRY
] = 320,
174 static const struct gen_device_info gen_device_info_byt
= {
175 GEN7_FEATURES
, .is_baytrail
= true, .gt
= 1,
178 .max_vs_threads
= 36,
179 .max_tcs_threads
= 36,
180 .max_tes_threads
= 36,
181 .max_gs_threads
= 36,
182 .max_wm_threads
= 48,
183 .max_cs_threads
= 32,
187 [MESA_SHADER_VERTEX
] = 32,
188 [MESA_SHADER_TESS_EVAL
] = 10,
191 [MESA_SHADER_VERTEX
] = 512,
192 [MESA_SHADER_TESS_CTRL
] = 32,
193 [MESA_SHADER_TESS_EVAL
] = 288,
194 [MESA_SHADER_GEOMETRY
] = 192,
199 #define HSW_FEATURES \
201 .is_haswell = true, \
202 .supports_simd16_3src = true, \
203 .has_resource_streamer = true
205 static const struct gen_device_info gen_device_info_hsw_gt1
= {
206 HSW_FEATURES
, .gt
= 1,
208 .max_vs_threads
= 70,
209 .max_tcs_threads
= 70,
210 .max_tes_threads
= 70,
211 .max_gs_threads
= 70,
212 .max_wm_threads
= 102,
213 .max_cs_threads
= 70,
217 [MESA_SHADER_VERTEX
] = 32,
218 [MESA_SHADER_TESS_EVAL
] = 10,
221 [MESA_SHADER_VERTEX
] = 640,
222 [MESA_SHADER_TESS_CTRL
] = 64,
223 [MESA_SHADER_TESS_EVAL
] = 384,
224 [MESA_SHADER_GEOMETRY
] = 256,
229 static const struct gen_device_info gen_device_info_hsw_gt2
= {
230 HSW_FEATURES
, .gt
= 2,
232 .max_vs_threads
= 280,
233 .max_tcs_threads
= 256,
234 .max_tes_threads
= 280,
235 .max_gs_threads
= 256,
236 .max_wm_threads
= 204,
237 .max_cs_threads
= 70,
241 [MESA_SHADER_VERTEX
] = 64,
242 [MESA_SHADER_TESS_EVAL
] = 10,
245 [MESA_SHADER_VERTEX
] = 1664,
246 [MESA_SHADER_TESS_CTRL
] = 128,
247 [MESA_SHADER_TESS_EVAL
] = 960,
248 [MESA_SHADER_GEOMETRY
] = 640,
253 static const struct gen_device_info gen_device_info_hsw_gt3
= {
254 HSW_FEATURES
, .gt
= 3,
256 .max_vs_threads
= 280,
257 .max_tcs_threads
= 256,
258 .max_tes_threads
= 280,
259 .max_gs_threads
= 256,
260 .max_wm_threads
= 408,
261 .max_cs_threads
= 70,
265 [MESA_SHADER_VERTEX
] = 64,
266 [MESA_SHADER_TESS_EVAL
] = 10,
269 [MESA_SHADER_VERTEX
] = 1664,
270 [MESA_SHADER_TESS_CTRL
] = 128,
271 [MESA_SHADER_TESS_EVAL
] = 960,
272 [MESA_SHADER_GEOMETRY
] = 640,
277 #define GEN8_FEATURES \
279 .has_hiz_and_separate_stencil = true, \
280 .has_resource_streamer = true, \
281 .must_use_separate_stencil = true, \
284 .supports_simd16_3src = true, \
285 .has_surface_tile_offset = true, \
286 .max_vs_threads = 504, \
287 .max_tcs_threads = 504, \
288 .max_tes_threads = 504, \
289 .max_gs_threads = 504, \
290 .max_wm_threads = 384
292 static const struct gen_device_info gen_device_info_bdw_gt1
= {
293 GEN8_FEATURES
, .gt
= 1,
295 .max_cs_threads
= 42,
299 [MESA_SHADER_VERTEX
] = 64,
300 [MESA_SHADER_TESS_EVAL
] = 34,
303 [MESA_SHADER_VERTEX
] = 2560,
304 [MESA_SHADER_TESS_CTRL
] = 504,
305 [MESA_SHADER_TESS_EVAL
] = 1536,
306 [MESA_SHADER_GEOMETRY
] = 960,
311 static const struct gen_device_info gen_device_info_bdw_gt2
= {
312 GEN8_FEATURES
, .gt
= 2,
314 .max_cs_threads
= 56,
318 [MESA_SHADER_VERTEX
] = 64,
319 [MESA_SHADER_TESS_EVAL
] = 34,
322 [MESA_SHADER_VERTEX
] = 2560,
323 [MESA_SHADER_TESS_CTRL
] = 504,
324 [MESA_SHADER_TESS_EVAL
] = 1536,
325 [MESA_SHADER_GEOMETRY
] = 960,
330 static const struct gen_device_info gen_device_info_bdw_gt3
= {
331 GEN8_FEATURES
, .gt
= 3,
333 .max_cs_threads
= 56,
337 [MESA_SHADER_VERTEX
] = 64,
338 [MESA_SHADER_TESS_EVAL
] = 34,
341 [MESA_SHADER_VERTEX
] = 2560,
342 [MESA_SHADER_TESS_CTRL
] = 504,
343 [MESA_SHADER_TESS_EVAL
] = 1536,
344 [MESA_SHADER_GEOMETRY
] = 960,
349 static const struct gen_device_info gen_device_info_chv
= {
350 GEN8_FEATURES
, .is_cherryview
= 1, .gt
= 1,
353 .max_vs_threads
= 80,
354 .max_tcs_threads
= 80,
355 .max_tes_threads
= 80,
356 .max_gs_threads
= 80,
357 .max_wm_threads
= 128,
358 .max_cs_threads
= 6 * 7,
362 [MESA_SHADER_VERTEX
] = 34,
363 [MESA_SHADER_TESS_EVAL
] = 34,
366 [MESA_SHADER_VERTEX
] = 640,
367 [MESA_SHADER_TESS_CTRL
] = 80,
368 [MESA_SHADER_TESS_EVAL
] = 384,
369 [MESA_SHADER_GEOMETRY
] = 256,
374 #define GEN9_FEATURES \
376 .has_hiz_and_separate_stencil = true, \
377 .has_resource_streamer = true, \
378 .must_use_separate_stencil = true, \
381 .supports_simd16_3src = true, \
382 .has_surface_tile_offset = true, \
383 .max_vs_threads = 336, \
384 .max_gs_threads = 336, \
385 .max_tcs_threads = 336, \
386 .max_tes_threads = 336, \
387 .max_cs_threads = 56, \
391 [MESA_SHADER_VERTEX] = 64, \
392 [MESA_SHADER_TESS_EVAL] = 34, \
395 [MESA_SHADER_VERTEX] = 1856, \
396 [MESA_SHADER_TESS_CTRL] = 672, \
397 [MESA_SHADER_TESS_EVAL] = 1120, \
398 [MESA_SHADER_GEOMETRY] = 640, \
402 #define GEN9_LP_FEATURES \
408 .max_vs_threads = 112, \
409 .max_tcs_threads = 112, \
410 .max_tes_threads = 112, \
411 .max_gs_threads = 112, \
412 .max_cs_threads = 6 * 6, \
416 [MESA_SHADER_VERTEX] = 34, \
417 [MESA_SHADER_TESS_EVAL] = 34, \
420 [MESA_SHADER_VERTEX] = 704, \
421 [MESA_SHADER_TESS_CTRL] = 256, \
422 [MESA_SHADER_TESS_EVAL] = 416, \
423 [MESA_SHADER_GEOMETRY] = 256, \
427 #define GEN9_LP_FEATURES_2X6 \
429 .max_vs_threads = 56, \
430 .max_tcs_threads = 56, \
431 .max_tes_threads = 56, \
432 .max_gs_threads = 56, \
433 .max_cs_threads = 6 * 6, \
437 [MESA_SHADER_VERTEX] = 34, \
438 [MESA_SHADER_TESS_EVAL] = 34, \
441 [MESA_SHADER_VERTEX] = 352, \
442 [MESA_SHADER_TESS_CTRL] = 128, \
443 [MESA_SHADER_TESS_EVAL] = 208, \
444 [MESA_SHADER_GEOMETRY] = 128, \
448 static const struct gen_device_info gen_device_info_skl_gt1
= {
449 GEN9_FEATURES
, .gt
= 1,
454 static const struct gen_device_info gen_device_info_skl_gt2
= {
455 GEN9_FEATURES
, .gt
= 2,
459 static const struct gen_device_info gen_device_info_skl_gt3
= {
460 GEN9_FEATURES
, .gt
= 3,
464 static const struct gen_device_info gen_device_info_skl_gt4
= {
465 GEN9_FEATURES
, .gt
= 4,
467 /* From the "L3 Allocation and Programming" documentation:
469 * "URB is limited to 1008KB due to programming restrictions. This is not a
470 * restriction of the L3 implementation, but of the FF and other clients.
471 * Therefore, in a GT4 implementation it is possible for the programmed
472 * allocation of the L3 data array to provide 3*384KB=1152KB for URB, but
473 * only 1008KB of this will be used."
475 .urb
.size
= 1008 / 3,
478 static const struct gen_device_info gen_device_info_bxt
= {
482 static const struct gen_device_info gen_device_info_bxt_2x6
= {
486 * Note: for all KBL SKUs, the PRM says SKL for GS entries, not SKL+.
487 * There's no KBL entry. Using the default SKL (GEN9) GS entries value.
490 static const struct gen_device_info gen_device_info_kbl_gt1
= {
494 .max_cs_threads
= 7 * 6,
499 static const struct gen_device_info gen_device_info_kbl_gt1_5
= {
503 .max_cs_threads
= 7 * 6,
507 static const struct gen_device_info gen_device_info_kbl_gt2
= {
514 static const struct gen_device_info gen_device_info_kbl_gt3
= {
521 static const struct gen_device_info gen_device_info_kbl_gt4
= {
526 * From the "L3 Allocation and Programming" documentation:
528 * "URB is limited to 1008KB due to programming restrictions. This
529 * is not a restriction of the L3 implementation, but of the FF and
530 * other clients. Therefore, in a GT4 implementation it is
531 * possible for the programmed allocation of the L3 data array to
532 * provide 3*384KB=1152KB for URB, but only 1008KB of this
535 .urb
.size
= 1008 / 3,
539 static const struct gen_device_info gen_device_info_glk
= {
543 static const struct gen_device_info gen_device_info_glk_2x6
= {
548 gen_get_device_info(int devid
, struct gen_device_info
*devinfo
)
552 #define CHIPSET(id, family, name) \
553 case id: *devinfo = gen_device_info_##family; break;
554 #include "pci_ids/i965_pci_ids.h"
556 fprintf(stderr
, "i965_dri.so does not support the 0x%x PCI ID.\n", devid
);
560 /* From the Skylake PRM, 3DSTATE_PS::Scratch Space Base Pointer:
562 * "Scratch Space per slice is computed based on 4 sub-slices. SW must
563 * allocate scratch space enough so that each slice has 4 slices allowed."
565 * The equivalent internal documentation says that this programming note
566 * applies to all Gen9+ platforms.
568 * The hardware typically calculates the scratch space pointer by taking
569 * the base address, and adding per-thread-scratch-space * thread ID.
570 * Extra padding can be necessary depending how the thread IDs are
571 * calculated for a particular shader stage.
573 if (devinfo
->gen
>= 9) {
574 devinfo
->max_wm_threads
= 64 /* threads-per-PSD */
575 * devinfo
->num_slices
576 * 4; /* effective subslices per slice */
583 gen_get_device_name(int devid
)
587 #define CHIPSET(id, family, name) case id: return name;
588 #include "pci_ids/i965_pci_ids.h"