* Chia-I Wu <olv@lunarg.com>
*/
+#include "pipe/p_state.h"
+#include "os/os_misc.h"
#include "util/u_format_s3tc.h"
#include "vl/vl_decoder.h"
#include "vl/vl_video_buffer.h"
-#include "intel_chipset.h"
-#include "intel_reg.h" /* for TIMESTAMP */
+#include "genhw/genhw.h" /* for GEN6_REG_TIMESTAMP */
#include "intel_winsys.h"
#include "ilo_context.h"
#include "ilo_format.h"
#include "ilo_resource.h"
+#include "ilo_transfer.h" /* for ILO_TRANSFER_MAP_BUFFER_ALIGNMENT */
#include "ilo_public.h"
#include "ilo_screen.h"
+struct ilo_fence {
+ struct pipe_reference reference;
+ struct intel_bo *bo;
+};
+
int ilo_debug;
static const struct debug_named_value ilo_debug_flags[] = {
- { "3d", ILO_DEBUG_3D, "Dump 3D commands and states" },
+ { "batch", ILO_DEBUG_BATCH, "Dump batch/state/surface/instruction buffers" },
{ "vs", ILO_DEBUG_VS, "Dump vertex shaders" },
{ "gs", ILO_DEBUG_GS, "Dump geometry shaders" },
{ "fs", ILO_DEBUG_FS, "Dump fragment shaders" },
{ "cs", ILO_DEBUG_CS, "Dump compute shaders" },
{ "draw", ILO_DEBUG_DRAW, "Show draw information" },
- { "flush", ILO_DEBUG_FLUSH, "Show batch buffer flushes" },
+ { "submit", ILO_DEBUG_SUBMIT, "Show batch buffer submissions" },
{ "nohw", ILO_DEBUG_NOHW, "Do not send commands to HW" },
{ "nocache", ILO_DEBUG_NOCACHE, "Always invalidate HW caches" },
{ "nohiz", ILO_DEBUG_NOHIZ, "Disable HiZ" },
case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
return UINT_MAX;
case PIPE_SHADER_CAP_MAX_INPUTS:
+ case PIPE_SHADER_CAP_MAX_OUTPUTS:
/* this is limited by how many attributes SF can remap */
return 16;
- case PIPE_SHADER_CAP_MAX_CONSTS:
- return 1024;
+ case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
+ return 1024 * sizeof(float[4]);
case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
return ILO_MAX_CONST_BUFFERS;
case PIPE_SHADER_CAP_MAX_TEMPS:
return 256;
- case PIPE_SHADER_CAP_MAX_ADDRS:
- return (shader == PIPE_SHADER_FRAGMENT) ? 0 : 1;
case PIPE_SHADER_CAP_MAX_PREDS:
return 0;
case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
enum pipe_compute_cap param,
void *ret)
{
+ struct ilo_screen *is = ilo_screen(screen);
union {
const char *ir_target;
uint64_t grid_dimension;
uint64_t max_private_size;
uint64_t max_input_size;
uint64_t max_mem_alloc_size;
+ uint32_t max_clock_frequency;
+ uint32_t max_compute_units;
+ uint32_t images_supported;
} val;
const void *ptr;
int size;
- /* XXX some randomly chosen values */
switch (param) {
case PIPE_COMPUTE_CAP_IR_TARGET:
val.ir_target = "ilog";
size = sizeof(val.grid_dimension);
break;
case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
- val.max_grid_size[0] = 65535;
- val.max_grid_size[1] = 65535;
- val.max_grid_size[2] = 1;
+ val.max_grid_size[0] = 0xffffffffu;
+ val.max_grid_size[1] = 0xffffffffu;
+ val.max_grid_size[2] = 0xffffffffu;
ptr = &val.max_grid_size;
size = sizeof(val.max_grid_size);
break;
case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
- val.max_block_size[0] = 512;
- val.max_block_size[1] = 512;
- val.max_block_size[2] = 512;
+ val.max_block_size[0] = 1024;
+ val.max_block_size[1] = 1024;
+ val.max_block_size[2] = 1024;
ptr = &val.max_block_size;
size = sizeof(val.max_block_size);
break;
case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
- val.max_threads_per_block = 512;
+ val.max_threads_per_block = 1024;
ptr = &val.max_threads_per_block;
size = sizeof(val.max_threads_per_block);
break;
case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
- val.max_global_size = 4;
+ /* \see ilo_max_resource_size */
+ val.max_global_size = 1u << 31;
ptr = &val.max_global_size;
size = sizeof(val.max_global_size);
break;
case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
+ /* Shared Local Memory Size of INTERFACE_DESCRIPTOR_DATA */
val.max_local_size = 64 * 1024;
ptr = &val.max_local_size;
size = sizeof(val.max_local_size);
break;
case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
- val.max_private_size = 32768;
+ /* scratch size */
+ val.max_private_size = 12 * 1024;
ptr = &val.max_private_size;
size = sizeof(val.max_private_size);
break;
case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
- val.max_input_size = 256;
+ val.max_input_size = 1024;
ptr = &val.max_input_size;
size = sizeof(val.max_input_size);
break;
case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
- val.max_mem_alloc_size = 128 * 1024 * 1024;
+ val.max_mem_alloc_size = 1u << 31;
ptr = &val.max_mem_alloc_size;
size = sizeof(val.max_mem_alloc_size);
break;
+ case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
+ val.max_clock_frequency = 1000;
+
+ ptr = &val.max_clock_frequency;
+ size = sizeof(val.max_clock_frequency);
+ break;
+ case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
+ val.max_compute_units = is->dev.eu_count;
+
+ ptr = &val.max_compute_units;
+ size = sizeof(val.max_compute_units);
+ break;
+ case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
+ val.images_supported = 1;
+
+ ptr = &val.images_supported;
+ size = sizeof(val.images_supported);
+ break;
default:
ptr = NULL;
size = 0;
* Max WxHxD for 2D and CUBE Max WxHxD for 3D
* GEN6 8192x8192x512 2048x2048x2048
* GEN7 16384x16384x2048 2048x2048x2048
- *
- * However, when the texutre size is large, things become unstable. We
- * require the maximum texture size to be 2^30 bytes in
- * screen->can_create_resource(). Since the maximum pixel size is 2^4
- * bytes (PIPE_FORMAT_R32G32B32A32_FLOAT), textures should not have more
- * than 2^26 pixels.
- *
- * For 3D textures, we have to set the maximum number of levels to 9,
- * which has at most 2^24 pixels. For 2D textures, we set it to 14,
- * which has at most 2^26 pixels. And for cube textures, we has to set
- * it to 12.
*/
- return 14;
+ return (ilo_dev_gen(&is->dev) >= ILO_GEN(7)) ? 15 : 14;
case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
- return 9;
- case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
return 12;
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ return (ilo_dev_gen(&is->dev) >= ILO_GEN(7)) ? 15 : 14;
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
return false;
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
case PIPE_CAP_SM3:
return true;
case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
- if (is->dev.gen >= ILO_GEN(7) && !is->dev.has_gen7_sol_reset)
+ if (ilo_dev_gen(&is->dev) >= ILO_GEN(7) && !is->dev.has_gen7_sol_reset)
return 0;
return ILO_MAX_SO_BUFFERS;
case PIPE_CAP_PRIMITIVE_RESTART:
case PIPE_CAP_INDEP_BLEND_FUNC:
return true;
case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
- return (is->dev.gen >= ILO_GEN(7)) ? 2048 : 512;
+ return (ilo_dev_gen(&is->dev) >= ILO_GEN(7)) ? 2048 : 512;
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
case PIPE_CAP_SEAMLESS_CUBE_MAP:
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
return true;
+ case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
case PIPE_CAP_MIN_TEXEL_OFFSET:
return -8;
+ case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
case PIPE_CAP_MAX_TEXEL_OFFSET:
return 7;
case PIPE_CAP_CONDITIONAL_RENDER:
return ILO_MAX_SO_BINDINGS / ILO_MAX_SO_BUFFERS;
case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
return ILO_MAX_SO_BINDINGS;
- case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:
- case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
- return 0;
case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
- if (is->dev.gen >= ILO_GEN(7))
+ if (ilo_dev_gen(&is->dev) >= ILO_GEN(7))
return is->dev.has_gen7_sol_reset;
else
return false; /* TODO */
case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
return false;
+ case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
+ return 2048;
case PIPE_CAP_COMPUTE:
return false; /* TODO */
case PIPE_CAP_USER_INDEX_BUFFERS:
case PIPE_CAP_TEXTURE_MULTISAMPLE:
return false; /* TODO */
case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
- return 64;
+ return ILO_TRANSFER_MAP_BUFFER_ALIGNMENT;
case PIPE_CAP_CUBE_MAP_ARRAY:
case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
return true;
case PIPE_CAP_TGSI_TEXCOORD:
return false;
case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
- return true;
case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
- return false; /* TODO */
+ return true;
case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
return 0;
case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
- /* a BRW_SURFACE_BUFFER can have up to 2^27 elements */
+ /* a GEN6_SURFTYPE_BUFFER can have up to 2^27 elements */
return 1 << 27;
case PIPE_CAP_MAX_VIEWPORTS:
return ILO_MAX_VIEWPORTS;
return PIPE_ENDIAN_LITTLE;
case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
return true;
- case PIPE_CAP_TGSI_VS_LAYER:
+ case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
+ case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:
+ case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
case PIPE_CAP_TEXTURE_GATHER_SM5:
+ return 0;
case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
+ return true;
+ case PIPE_CAP_FAKE_SW_MSAA:
+ case PIPE_CAP_TEXTURE_QUERY_LOD:
+ case PIPE_CAP_SAMPLE_SHADING:
+ case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
+ case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
+ case PIPE_CAP_MAX_VERTEX_STREAMS:
+ case PIPE_CAP_DRAW_INDIRECT:
+ case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
+ case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
+ case PIPE_CAP_SAMPLER_VIEW_TARGET:
return 0;
+ case PIPE_CAP_VENDOR_ID:
+ return 0x8086;
+ case PIPE_CAP_DEVICE_ID:
+ return is->dev.devid;
+ case PIPE_CAP_ACCELERATED:
+ return true;
+ case PIPE_CAP_VIDEO_MEMORY: {
+ /* Once a batch uses more than 75% of the maximum mappable size, we
+ * assume that there's some fragmentation, and we start doing extra
+ * flushing, etc. That's the big cliff apps will care about.
+ */
+ const uint64_t gpu_memory = is->dev.aperture_total * 3 / 4;
+ uint64_t system_memory;
+
+ if (!os_get_total_physical_memory(&system_memory))
+ return 0;
+
+ return (int) (MIN2(gpu_memory, system_memory) >> 20);
+ }
+ case PIPE_CAP_UMA:
+ return true;
+ case PIPE_CAP_CLIP_HALFZ:
+ return true;
+ case PIPE_CAP_VERTEXID_NOBASE:
+ return false;
+ case PIPE_CAP_POLYGON_OFFSET_CLAMP:
+ return true;
+
default:
return 0;
}
ilo_get_name(struct pipe_screen *screen)
{
struct ilo_screen *is = ilo_screen(screen);
- const char *chipset;
-
- /* stolen from classic i965 */
- switch (is->dev.devid) {
- case PCI_CHIP_SANDYBRIDGE_GT1:
- case PCI_CHIP_SANDYBRIDGE_GT2:
- case PCI_CHIP_SANDYBRIDGE_GT2_PLUS:
- chipset = "Intel(R) Sandybridge Desktop";
- break;
- case PCI_CHIP_SANDYBRIDGE_M_GT1:
- case PCI_CHIP_SANDYBRIDGE_M_GT2:
- case PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS:
- chipset = "Intel(R) Sandybridge Mobile";
- break;
- case PCI_CHIP_SANDYBRIDGE_S:
- chipset = "Intel(R) Sandybridge Server";
- break;
- case PCI_CHIP_IVYBRIDGE_GT1:
- case PCI_CHIP_IVYBRIDGE_GT2:
- chipset = "Intel(R) Ivybridge Desktop";
- break;
- case PCI_CHIP_IVYBRIDGE_M_GT1:
- case PCI_CHIP_IVYBRIDGE_M_GT2:
- chipset = "Intel(R) Ivybridge Mobile";
- break;
- case PCI_CHIP_IVYBRIDGE_S_GT1:
- case PCI_CHIP_IVYBRIDGE_S_GT2:
- chipset = "Intel(R) Ivybridge Server";
- break;
- case PCI_CHIP_BAYTRAIL_M_1:
- case PCI_CHIP_BAYTRAIL_M_2:
- case PCI_CHIP_BAYTRAIL_M_3:
- case PCI_CHIP_BAYTRAIL_M_4:
- case PCI_CHIP_BAYTRAIL_D:
+ const char *chipset = NULL;
+
+ if (gen_is_vlv(is->dev.devid)) {
chipset = "Intel(R) Bay Trail";
- break;
- case PCI_CHIP_HASWELL_GT1:
- case PCI_CHIP_HASWELL_GT2:
- case PCI_CHIP_HASWELL_GT3:
- case PCI_CHIP_HASWELL_SDV_GT1:
- case PCI_CHIP_HASWELL_SDV_GT2:
- case PCI_CHIP_HASWELL_SDV_GT3:
- case PCI_CHIP_HASWELL_ULT_GT1:
- case PCI_CHIP_HASWELL_ULT_GT2:
- case PCI_CHIP_HASWELL_ULT_GT3:
- case PCI_CHIP_HASWELL_CRW_GT1:
- case PCI_CHIP_HASWELL_CRW_GT2:
- case PCI_CHIP_HASWELL_CRW_GT3:
- chipset = "Intel(R) Haswell Desktop";
- break;
- case PCI_CHIP_HASWELL_M_GT1:
- case PCI_CHIP_HASWELL_M_GT2:
- case PCI_CHIP_HASWELL_M_GT3:
- case PCI_CHIP_HASWELL_SDV_M_GT1:
- case PCI_CHIP_HASWELL_SDV_M_GT2:
- case PCI_CHIP_HASWELL_SDV_M_GT3:
- case PCI_CHIP_HASWELL_ULT_M_GT1:
- case PCI_CHIP_HASWELL_ULT_M_GT2:
- case PCI_CHIP_HASWELL_ULT_M_GT3:
- case PCI_CHIP_HASWELL_CRW_M_GT1:
- case PCI_CHIP_HASWELL_CRW_M_GT2:
- case PCI_CHIP_HASWELL_CRW_M_GT3:
- chipset = "Intel(R) Haswell Mobile";
- break;
- case PCI_CHIP_HASWELL_S_GT1:
- case PCI_CHIP_HASWELL_S_GT2:
- case PCI_CHIP_HASWELL_S_GT3:
- case PCI_CHIP_HASWELL_SDV_S_GT1:
- case PCI_CHIP_HASWELL_SDV_S_GT2:
- case PCI_CHIP_HASWELL_SDV_S_GT3:
- case PCI_CHIP_HASWELL_ULT_S_GT1:
- case PCI_CHIP_HASWELL_ULT_S_GT2:
- case PCI_CHIP_HASWELL_ULT_S_GT3:
- case PCI_CHIP_HASWELL_CRW_S_GT1:
- case PCI_CHIP_HASWELL_CRW_S_GT2:
- case PCI_CHIP_HASWELL_CRW_S_GT3:
- chipset = "Intel(R) Haswell Server";
- break;
- default:
- chipset = "Unknown Intel Chipset";
- break;
+ }
+ else if (gen_is_hsw(is->dev.devid)) {
+ if (gen_is_desktop(is->dev.devid))
+ chipset = "Intel(R) Haswell Desktop";
+ else if (gen_is_mobile(is->dev.devid))
+ chipset = "Intel(R) Haswell Mobile";
+ else if (gen_is_server(is->dev.devid))
+ chipset = "Intel(R) Haswell Server";
+ }
+ else if (gen_is_ivb(is->dev.devid)) {
+ if (gen_is_desktop(is->dev.devid))
+ chipset = "Intel(R) Ivybridge Desktop";
+ else if (gen_is_mobile(is->dev.devid))
+ chipset = "Intel(R) Ivybridge Mobile";
+ else if (gen_is_server(is->dev.devid))
+ chipset = "Intel(R) Ivybridge Server";
+ }
+ else if (gen_is_snb(is->dev.devid)) {
+ if (gen_is_desktop(is->dev.devid))
+ chipset = "Intel(R) Sandybridge Desktop";
+ else if (gen_is_mobile(is->dev.devid))
+ chipset = "Intel(R) Sandybridge Mobile";
+ else if (gen_is_server(is->dev.devid))
+ chipset = "Intel(R) Sandybridge Server";
}
+ if (!chipset)
+ chipset = "Unknown Intel Chipset";
+
return chipset;
}
uint32_t dw[2];
} timestamp;
- intel_winsys_read_reg(is->winsys, TIMESTAMP, ×tamp.val);
+ intel_winsys_read_reg(is->winsys, GEN6_REG_TIMESTAMP, ×tamp.val);
/*
* From the Ivy Bridge PRM, volume 1 part 3, page 107:
struct pipe_fence_handle **p,
struct pipe_fence_handle *f)
{
- struct ilo_fence **ptr = (struct ilo_fence **) p;
struct ilo_fence *fence = ilo_fence(f);
+ struct ilo_fence *old;
- if (!ptr) {
- /* still need to reference fence */
- if (fence)
- pipe_reference(NULL, &fence->reference);
- return;
+ if (likely(p)) {
+ old = ilo_fence(*p);
+ *p = f;
+ }
+ else {
+ old = NULL;
}
- /* reference fence and dereference the one pointed to by ptr */
- if (*ptr && pipe_reference(&(*ptr)->reference, &fence->reference)) {
- struct ilo_fence *old = *ptr;
-
+ STATIC_ASSERT(&((struct ilo_fence *) NULL)->reference == NULL);
+ if (pipe_reference(&old->reference, &fence->reference)) {
if (old->bo)
intel_bo_unreference(old->bo);
FREE(old);
}
-
- *ptr = fence;
}
static boolean
return true;
}
+/**
+ * Create a fence for \p bo. When \p bo is not NULL, it must be submitted
+ * before waited on or checked.
+ */
+struct ilo_fence *
+ilo_fence_create(struct pipe_screen *screen, struct intel_bo *bo)
+{
+ struct ilo_fence *fence;
+
+ fence = CALLOC_STRUCT(ilo_fence);
+ if (!fence)
+ return NULL;
+
+ pipe_reference_init(&fence->reference, 1);
+
+ if (bo)
+ intel_bo_reference(bo);
+ fence->bo = bo;
+
+ return fence;
+}
+
static void
ilo_screen_destroy(struct pipe_screen *screen)
{
init_dev(struct ilo_dev_info *dev, const struct intel_winsys_info *info)
{
dev->devid = info->devid;
- dev->max_batch_size = info->max_batch_size;
+ dev->aperture_total = info->aperture_total;
+ dev->aperture_mappable = info->aperture_mappable;
dev->has_llc = info->has_llc;
dev->has_address_swizzling = info->has_address_swizzling;
dev->has_logical_context = info->has_logical_context;
+ dev->has_ppgtt = info->has_ppgtt;
dev->has_timestamp = info->has_timestamp;
dev->has_gen7_sol_reset = info->has_gen7_sol_reset;
}
/*
- * From the Sandy Bridge PRM, volume 4 part 2, page 18:
+ * PIPE_CONTROL and MI_* use PPGTT writes on GEN7+ and privileged GGTT
+ * writes on GEN6.
*
- * "[DevSNB]: The GT1 product's URB provides 32KB of storage, arranged
- * as 1024 256-bit rows. The GT2 product's URB provides 64KB of
- * storage, arranged as 2048 256-bit rows. A row corresponds in size
- * to an EU GRF register. Read/write access to the URB is generally
- * supported on a row-granular basis."
+ * From the Sandy Bridge PRM, volume 1 part 3, page 101:
*
- * From the Ivy Bridge PRM, volume 4 part 2, page 17:
+ * "[DevSNB] When Per-Process GTT Enable is set, it is assumed that all
+ * code is in a secure environment, independent of address space.
+ * Under this condition, this bit only specifies the address space
+ * (GGTT or PPGTT). All commands are executed "as-is""
*
- * "URB Size URB Rows URB Rows when SLM Enabled
- * 128k 4096 2048
- * 256k 8096 4096"
+ * We need PPGTT to be enabled on GEN6 too.
*/
+ if (!dev->has_ppgtt) {
+ /* experiments show that it does not really matter... */
+ ilo_warn("PPGTT disabled\n");
+ }
- if (IS_HASWELL(info->devid)) {
- dev->gen = ILO_GEN(7.5);
-
- if (IS_HSW_GT3(info->devid)) {
- dev->gt = 3;
+ if (gen_is_hsw(info->devid)) {
+ /*
+ * From the Haswell PRM, volume 4, page 8:
+ *
+ * "Description GT3 GT2 GT1.5 GT1
+ * (...)
+ * EUs (Total) 40 20 12 10
+ * Threads (Total) 280 140 84 70
+ * (...)
+ * URB Size (max, within L3$) 512KB 256KB 256KB 128KB
+ */
+ dev->gen_opaque = ILO_GEN(7.5);
+ dev->gt = gen_get_hsw_gt(info->devid);
+ if (dev->gt == 3) {
+ dev->eu_count = 40;
+ dev->thread_count = 280;
dev->urb_size = 512 * 1024;
- }
- else if (IS_HSW_GT2(info->devid)) {
- dev->gt = 2;
+ } else if (dev->gt == 2) {
+ dev->eu_count = 20;
+ dev->thread_count = 140;
dev->urb_size = 256 * 1024;
- }
- else {
- dev->gt = 1;
+ } else {
+ dev->eu_count = 10;
+ dev->thread_count = 70;
dev->urb_size = 128 * 1024;
}
- }
- else if (IS_GEN7(info->devid)) {
- dev->gen = ILO_GEN(7);
-
- if (IS_IVB_GT2(info->devid)) {
- dev->gt = 2;
+ } else if (gen_is_ivb(info->devid) || gen_is_vlv(info->devid)) {
+ /*
+ * From the Ivy Bridge PRM, volume 1 part 1, page 18:
+ *
+ * "Device # of EUs #Threads/EU
+ * Ivy Bridge (GT2) 16 8
+ * Ivy Bridge (GT1) 6 6"
+ *
+ * From the Ivy Bridge PRM, volume 4 part 2, page 17:
+ *
+ * "URB Size URB Rows URB Rows when SLM Enabled
+ * 128k 4096 2048
+ * 256k 8096 4096"
+ */
+ dev->gen_opaque = ILO_GEN(7);
+ dev->gt = (gen_is_ivb(info->devid)) ? gen_get_ivb_gt(info->devid) : 1;
+ if (dev->gt == 2) {
+ dev->eu_count = 16;
+ dev->thread_count = 128;
dev->urb_size = 256 * 1024;
- }
- else {
- dev->gt = 1;
+ } else {
+ dev->eu_count = 6;
+ dev->thread_count = 36;
dev->urb_size = 128 * 1024;
}
- }
- else if (IS_GEN6(info->devid)) {
- dev->gen = ILO_GEN(6);
-
- if (IS_SNB_GT2(info->devid)) {
- dev->gt = 2;
+ } else if (gen_is_snb(info->devid)) {
+ /*
+ * From the Sandy Bridge PRM, volume 1 part 1, page 22:
+ *
+ * "Device # of EUs #Threads/EU
+ * SNB GT2 12 5
+ * SNB GT1 6 4"
+ *
+ * From the Sandy Bridge PRM, volume 4 part 2, page 18:
+ *
+ * "[DevSNB]: The GT1 product's URB provides 32KB of storage,
+ * arranged as 1024 256-bit rows. The GT2 product's URB provides
+ * 64KB of storage, arranged as 2048 256-bit rows. A row
+ * corresponds in size to an EU GRF register. Read/write access to
+ * the URB is generally supported on a row-granular basis."
+ */
+ dev->gen_opaque = ILO_GEN(6);
+ dev->gt = gen_get_snb_gt(info->devid);
+ if (dev->gt == 2) {
+ dev->eu_count = 12;
+ dev->thread_count = 60;
dev->urb_size = 64 * 1024;
- }
- else {
- dev->gt = 1;
+ } else {
+ dev->eu_count = 6;
+ dev->thread_count = 24;
dev->urb_size = 32 * 1024;
}
- }
- else {
+ } else {
ilo_err("unknown GPU generation\n");
return false;
}