X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Filo%2Filo_screen.c;h=5048ba1ac22b23b0e91c31d9d976b38956be078e;hb=7c211a12aa6c22187264f718c81224a70e224ebe;hp=5d652e4ad73ebfdf87eaaff8d72e9e3fdfe49a3f;hpb=747627d0456eb14b2fb4c320c5bf0f084f3b2907;p=mesa.git diff --git a/src/gallium/drivers/ilo/ilo_screen.c b/src/gallium/drivers/ilo/ilo_screen.c index 5d652e4ad73..5048ba1ac22 100644 --- a/src/gallium/drivers/ilo/ilo_screen.c +++ b/src/gallium/drivers/ilo/ilo_screen.c @@ -25,29 +25,36 @@ * Chia-I Wu */ +#include "pipe/p_state.h" +#include "os/os_misc.h" #include "util/u_format_s3tc.h" #include "vl/vl_decoder.h" #include "vl/vl_video_buffer.h" -#include "intel_chipset.h" -#include "intel_reg.h" /* for TIMESTAMP */ +#include "genhw/genhw.h" /* for GEN6_REG_TIMESTAMP */ #include "intel_winsys.h" #include "ilo_context.h" #include "ilo_format.h" #include "ilo_resource.h" +#include "ilo_transfer.h" /* for ILO_TRANSFER_MAP_BUFFER_ALIGNMENT */ #include "ilo_public.h" #include "ilo_screen.h" +struct ilo_fence { + struct pipe_reference reference; + struct intel_bo *bo; +}; + int ilo_debug; static const struct debug_named_value ilo_debug_flags[] = { - { "3d", ILO_DEBUG_3D, "Dump 3D commands and states" }, + { "batch", ILO_DEBUG_BATCH, "Dump batch/state/surface/instruction buffers" }, { "vs", ILO_DEBUG_VS, "Dump vertex shaders" }, { "gs", ILO_DEBUG_GS, "Dump geometry shaders" }, { "fs", ILO_DEBUG_FS, "Dump fragment shaders" }, { "cs", ILO_DEBUG_CS, "Dump compute shaders" }, { "draw", ILO_DEBUG_DRAW, "Show draw information" }, - { "flush", ILO_DEBUG_FLUSH, "Show batch buffer flushes" }, + { "submit", ILO_DEBUG_SUBMIT, "Show batch buffer submissions" }, { "nohw", ILO_DEBUG_NOHW, "Do not send commands to HW" }, { "nocache", ILO_DEBUG_NOCACHE, "Always invalidate HW caches" }, { "nohiz", ILO_DEBUG_NOHIZ, "Disable HiZ" }, @@ -114,16 +121,15 @@ ilo_get_shader_param(struct pipe_screen *screen, unsigned shader, case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: return UINT_MAX; case PIPE_SHADER_CAP_MAX_INPUTS: + case PIPE_SHADER_CAP_MAX_OUTPUTS: /* this is limited by how many attributes SF can remap */ return 16; - case PIPE_SHADER_CAP_MAX_CONSTS: - return 1024; + case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: + return 1024 * sizeof(float[4]); case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: return ILO_MAX_CONST_BUFFERS; case PIPE_SHADER_CAP_MAX_TEMPS: return 256; - case PIPE_SHADER_CAP_MAX_ADDRS: - return (shader == PIPE_SHADER_FRAGMENT) ? 0 : 1; case PIPE_SHADER_CAP_MAX_PREDS: return 0; case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: @@ -188,6 +194,7 @@ ilo_get_compute_param(struct pipe_screen *screen, enum pipe_compute_cap param, void *ret) { + struct ilo_screen *is = ilo_screen(screen); union { const char *ir_target; uint64_t grid_dimension; @@ -199,11 +206,13 @@ ilo_get_compute_param(struct pipe_screen *screen, uint64_t max_private_size; uint64_t max_input_size; uint64_t max_mem_alloc_size; + uint32_t max_clock_frequency; + uint32_t max_compute_units; + uint32_t images_supported; } val; const void *ptr; int size; - /* XXX some randomly chosen values */ switch (param) { case PIPE_COMPUTE_CAP_IR_TARGET: val.ir_target = "ilog"; @@ -218,58 +227,79 @@ ilo_get_compute_param(struct pipe_screen *screen, size = sizeof(val.grid_dimension); break; case PIPE_COMPUTE_CAP_MAX_GRID_SIZE: - val.max_grid_size[0] = 65535; - val.max_grid_size[1] = 65535; - val.max_grid_size[2] = 1; + val.max_grid_size[0] = 0xffffffffu; + val.max_grid_size[1] = 0xffffffffu; + val.max_grid_size[2] = 0xffffffffu; ptr = &val.max_grid_size; size = sizeof(val.max_grid_size); break; case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE: - val.max_block_size[0] = 512; - val.max_block_size[1] = 512; - val.max_block_size[2] = 512; + val.max_block_size[0] = 1024; + val.max_block_size[1] = 1024; + val.max_block_size[2] = 1024; ptr = &val.max_block_size; size = sizeof(val.max_block_size); break; case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK: - val.max_threads_per_block = 512; + val.max_threads_per_block = 1024; ptr = &val.max_threads_per_block; size = sizeof(val.max_threads_per_block); break; case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: - val.max_global_size = 4; + /* \see ilo_max_resource_size */ + val.max_global_size = 1u << 31; ptr = &val.max_global_size; size = sizeof(val.max_global_size); break; case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: + /* Shared Local Memory Size of INTERFACE_DESCRIPTOR_DATA */ val.max_local_size = 64 * 1024; ptr = &val.max_local_size; size = sizeof(val.max_local_size); break; case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: - val.max_private_size = 32768; + /* scratch size */ + val.max_private_size = 12 * 1024; ptr = &val.max_private_size; size = sizeof(val.max_private_size); break; case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: - val.max_input_size = 256; + val.max_input_size = 1024; ptr = &val.max_input_size; size = sizeof(val.max_input_size); break; case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE: - val.max_mem_alloc_size = 128 * 1024 * 1024; + val.max_mem_alloc_size = 1u << 31; ptr = &val.max_mem_alloc_size; size = sizeof(val.max_mem_alloc_size); break; + case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY: + val.max_clock_frequency = 1000; + + ptr = &val.max_clock_frequency; + size = sizeof(val.max_clock_frequency); + break; + case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS: + val.max_compute_units = is->dev.eu_count; + + ptr = &val.max_compute_units; + size = sizeof(val.max_compute_units); + break; + case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED: + val.images_supported = 1; + + ptr = &val.images_supported; + size = sizeof(val.images_supported); + break; default: ptr = NULL; size = 0; @@ -310,30 +340,19 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param) * Max WxHxD for 2D and CUBE Max WxHxD for 3D * GEN6 8192x8192x512 2048x2048x2048 * GEN7 16384x16384x2048 2048x2048x2048 - * - * However, when the texutre size is large, things become unstable. We - * require the maximum texture size to be 2^30 bytes in - * screen->can_create_resource(). Since the maximum pixel size is 2^4 - * bytes (PIPE_FORMAT_R32G32B32A32_FLOAT), textures should not have more - * than 2^26 pixels. - * - * For 3D textures, we have to set the maximum number of levels to 9, - * which has at most 2^24 pixels. For 2D textures, we set it to 14, - * which has at most 2^26 pixels. And for cube textures, we has to set - * it to 12. */ - return 14; + return (ilo_dev_gen(&is->dev) >= ILO_GEN(7)) ? 15 : 14; case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - return 9; - case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: return 12; + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return (ilo_dev_gen(&is->dev) >= ILO_GEN(7)) ? 15 : 14; case PIPE_CAP_TEXTURE_MIRROR_CLAMP: return false; case PIPE_CAP_BLEND_EQUATION_SEPARATE: case PIPE_CAP_SM3: return true; case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: - if (is->dev.gen >= ILO_GEN(7) && !is->dev.has_gen7_sol_reset) + if (ilo_dev_gen(&is->dev) >= ILO_GEN(7) && !is->dev.has_gen7_sol_reset) return 0; return ILO_MAX_SO_BUFFERS; case PIPE_CAP_PRIMITIVE_RESTART: @@ -342,7 +361,7 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_INDEP_BLEND_FUNC: return true; case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: - return (is->dev.gen >= ILO_GEN(7)) ? 2048 : 512; + return (ilo_dev_gen(&is->dev) >= ILO_GEN(7)) ? 2048 : 512; case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: @@ -361,8 +380,10 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_SEAMLESS_CUBE_MAP: case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: return true; + case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET: case PIPE_CAP_MIN_TEXEL_OFFSET: return -8; + case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET: case PIPE_CAP_MAX_TEXEL_OFFSET: return 7; case PIPE_CAP_CONDITIONAL_RENDER: @@ -372,11 +393,8 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param) return ILO_MAX_SO_BINDINGS / ILO_MAX_SO_BUFFERS; case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS: return ILO_MAX_SO_BINDINGS; - case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES: - case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS: - return 0; case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: - if (is->dev.gen >= ILO_GEN(7)) + if (ilo_dev_gen(&is->dev) >= ILO_GEN(7)) return is->dev.has_gen7_sol_reset; else return false; /* TODO */ @@ -395,6 +413,8 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY: return false; + case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE: + return 2048; case PIPE_CAP_COMPUTE: return false; /* TODO */ case PIPE_CAP_USER_INDEX_BUFFERS: @@ -410,7 +430,7 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_TEXTURE_MULTISAMPLE: return false; /* TODO */ case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: - return 64; + return ILO_TRANSFER_MAP_BUFFER_ALIGNMENT; case PIPE_CAP_CUBE_MAP_ARRAY: case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: return true; @@ -419,13 +439,12 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_TGSI_TEXCOORD: return false; case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: - return true; case PIPE_CAP_QUERY_PIPELINE_STATISTICS: - return false; /* TODO */ + return true; case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: return 0; case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE: - /* a BRW_SURFACE_BUFFER can have up to 2^27 elements */ + /* a GEN6_SURFTYPE_BUFFER can have up to 2^27 elements */ return 1 << 27; case PIPE_CAP_MAX_VIEWPORTS: return ILO_MAX_VIEWPORTS; @@ -433,12 +452,54 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param) return PIPE_ENDIAN_LITTLE; case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES: return true; - case PIPE_CAP_TGSI_VS_LAYER: + case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT: + case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES: + case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS: case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: case PIPE_CAP_TEXTURE_GATHER_SM5: + return 0; case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT: + return true; + case PIPE_CAP_FAKE_SW_MSAA: + case PIPE_CAP_TEXTURE_QUERY_LOD: + case PIPE_CAP_SAMPLE_SHADING: + case PIPE_CAP_TEXTURE_GATHER_OFFSETS: + case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION: + case PIPE_CAP_MAX_VERTEX_STREAMS: + case PIPE_CAP_DRAW_INDIRECT: + case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: + case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: + case PIPE_CAP_SAMPLER_VIEW_TARGET: return 0; + case PIPE_CAP_VENDOR_ID: + return 0x8086; + case PIPE_CAP_DEVICE_ID: + return is->dev.devid; + case PIPE_CAP_ACCELERATED: + return true; + case PIPE_CAP_VIDEO_MEMORY: { + /* Once a batch uses more than 75% of the maximum mappable size, we + * assume that there's some fragmentation, and we start doing extra + * flushing, etc. That's the big cliff apps will care about. + */ + const uint64_t gpu_memory = is->dev.aperture_total * 3 / 4; + uint64_t system_memory; + + if (!os_get_total_physical_memory(&system_memory)) + return 0; + + return (int) (MIN2(gpu_memory, system_memory) >> 20); + } + case PIPE_CAP_UMA: + return true; + case PIPE_CAP_CLIP_HALFZ: + return true; + case PIPE_CAP_VERTEXID_NOBASE: + return false; + case PIPE_CAP_POLYGON_OFFSET_CLAMP: + return true; + default: return 0; } @@ -454,89 +515,39 @@ static const char * ilo_get_name(struct pipe_screen *screen) { struct ilo_screen *is = ilo_screen(screen); - const char *chipset; - - /* stolen from classic i965 */ - switch (is->dev.devid) { - case PCI_CHIP_SANDYBRIDGE_GT1: - case PCI_CHIP_SANDYBRIDGE_GT2: - case PCI_CHIP_SANDYBRIDGE_GT2_PLUS: - chipset = "Intel(R) Sandybridge Desktop"; - break; - case PCI_CHIP_SANDYBRIDGE_M_GT1: - case PCI_CHIP_SANDYBRIDGE_M_GT2: - case PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS: - chipset = "Intel(R) Sandybridge Mobile"; - break; - case PCI_CHIP_SANDYBRIDGE_S: - chipset = "Intel(R) Sandybridge Server"; - break; - case PCI_CHIP_IVYBRIDGE_GT1: - case PCI_CHIP_IVYBRIDGE_GT2: - chipset = "Intel(R) Ivybridge Desktop"; - break; - case PCI_CHIP_IVYBRIDGE_M_GT1: - case PCI_CHIP_IVYBRIDGE_M_GT2: - chipset = "Intel(R) Ivybridge Mobile"; - break; - case PCI_CHIP_IVYBRIDGE_S_GT1: - case PCI_CHIP_IVYBRIDGE_S_GT2: - chipset = "Intel(R) Ivybridge Server"; - break; - case PCI_CHIP_BAYTRAIL_M_1: - case PCI_CHIP_BAYTRAIL_M_2: - case PCI_CHIP_BAYTRAIL_M_3: - case PCI_CHIP_BAYTRAIL_M_4: - case PCI_CHIP_BAYTRAIL_D: + const char *chipset = NULL; + + if (gen_is_vlv(is->dev.devid)) { chipset = "Intel(R) Bay Trail"; - break; - case PCI_CHIP_HASWELL_GT1: - case PCI_CHIP_HASWELL_GT2: - case PCI_CHIP_HASWELL_GT3: - case PCI_CHIP_HASWELL_SDV_GT1: - case PCI_CHIP_HASWELL_SDV_GT2: - case PCI_CHIP_HASWELL_SDV_GT3: - case PCI_CHIP_HASWELL_ULT_GT1: - case PCI_CHIP_HASWELL_ULT_GT2: - case PCI_CHIP_HASWELL_ULT_GT3: - case PCI_CHIP_HASWELL_CRW_GT1: - case PCI_CHIP_HASWELL_CRW_GT2: - case PCI_CHIP_HASWELL_CRW_GT3: - chipset = "Intel(R) Haswell Desktop"; - break; - case PCI_CHIP_HASWELL_M_GT1: - case PCI_CHIP_HASWELL_M_GT2: - case PCI_CHIP_HASWELL_M_GT3: - case PCI_CHIP_HASWELL_SDV_M_GT1: - case PCI_CHIP_HASWELL_SDV_M_GT2: - case PCI_CHIP_HASWELL_SDV_M_GT3: - case PCI_CHIP_HASWELL_ULT_M_GT1: - case PCI_CHIP_HASWELL_ULT_M_GT2: - case PCI_CHIP_HASWELL_ULT_M_GT3: - case PCI_CHIP_HASWELL_CRW_M_GT1: - case PCI_CHIP_HASWELL_CRW_M_GT2: - case PCI_CHIP_HASWELL_CRW_M_GT3: - chipset = "Intel(R) Haswell Mobile"; - break; - case PCI_CHIP_HASWELL_S_GT1: - case PCI_CHIP_HASWELL_S_GT2: - case PCI_CHIP_HASWELL_S_GT3: - case PCI_CHIP_HASWELL_SDV_S_GT1: - case PCI_CHIP_HASWELL_SDV_S_GT2: - case PCI_CHIP_HASWELL_SDV_S_GT3: - case PCI_CHIP_HASWELL_ULT_S_GT1: - case PCI_CHIP_HASWELL_ULT_S_GT2: - case PCI_CHIP_HASWELL_ULT_S_GT3: - case PCI_CHIP_HASWELL_CRW_S_GT1: - case PCI_CHIP_HASWELL_CRW_S_GT2: - case PCI_CHIP_HASWELL_CRW_S_GT3: - chipset = "Intel(R) Haswell Server"; - break; - default: - chipset = "Unknown Intel Chipset"; - break; + } + else if (gen_is_hsw(is->dev.devid)) { + if (gen_is_desktop(is->dev.devid)) + chipset = "Intel(R) Haswell Desktop"; + else if (gen_is_mobile(is->dev.devid)) + chipset = "Intel(R) Haswell Mobile"; + else if (gen_is_server(is->dev.devid)) + chipset = "Intel(R) Haswell Server"; + } + else if (gen_is_ivb(is->dev.devid)) { + if (gen_is_desktop(is->dev.devid)) + chipset = "Intel(R) Ivybridge Desktop"; + else if (gen_is_mobile(is->dev.devid)) + chipset = "Intel(R) Ivybridge Mobile"; + else if (gen_is_server(is->dev.devid)) + chipset = "Intel(R) Ivybridge Server"; + } + else if (gen_is_snb(is->dev.devid)) { + if (gen_is_desktop(is->dev.devid)) + chipset = "Intel(R) Sandybridge Desktop"; + else if (gen_is_mobile(is->dev.devid)) + chipset = "Intel(R) Sandybridge Mobile"; + else if (gen_is_server(is->dev.devid)) + chipset = "Intel(R) Sandybridge Server"; } + if (!chipset) + chipset = "Unknown Intel Chipset"; + return chipset; } @@ -549,7 +560,7 @@ ilo_get_timestamp(struct pipe_screen *screen) uint32_t dw[2]; } timestamp; - intel_winsys_read_reg(is->winsys, TIMESTAMP, ×tamp.val); + intel_winsys_read_reg(is->winsys, GEN6_REG_TIMESTAMP, ×tamp.val); /* * From the Ivy Bridge PRM, volume 1 part 3, page 107: @@ -573,26 +584,23 @@ ilo_fence_reference(struct pipe_screen *screen, struct pipe_fence_handle **p, struct pipe_fence_handle *f) { - struct ilo_fence **ptr = (struct ilo_fence **) p; struct ilo_fence *fence = ilo_fence(f); + struct ilo_fence *old; - if (!ptr) { - /* still need to reference fence */ - if (fence) - pipe_reference(NULL, &fence->reference); - return; + if (likely(p)) { + old = ilo_fence(*p); + *p = f; + } + else { + old = NULL; } - /* reference fence and dereference the one pointed to by ptr */ - if (*ptr && pipe_reference(&(*ptr)->reference, &fence->reference)) { - struct ilo_fence *old = *ptr; - + STATIC_ASSERT(&((struct ilo_fence *) NULL)->reference == NULL); + if (pipe_reference(&old->reference, &fence->reference)) { if (old->bo) intel_bo_unreference(old->bo); FREE(old); } - - *ptr = fence; } static boolean @@ -633,6 +641,28 @@ ilo_fence_finish(struct pipe_screen *screen, return true; } +/** + * Create a fence for \p bo. When \p bo is not NULL, it must be submitted + * before waited on or checked. + */ +struct ilo_fence * +ilo_fence_create(struct pipe_screen *screen, struct intel_bo *bo) +{ + struct ilo_fence *fence; + + fence = CALLOC_STRUCT(ilo_fence); + if (!fence) + return NULL; + + pipe_reference_init(&fence->reference, 1); + + if (bo) + intel_bo_reference(bo); + fence->bo = bo; + + return fence; +} + static void ilo_screen_destroy(struct pipe_screen *screen) { @@ -648,10 +678,12 @@ static bool init_dev(struct ilo_dev_info *dev, const struct intel_winsys_info *info) { dev->devid = info->devid; - dev->max_batch_size = info->max_batch_size; + dev->aperture_total = info->aperture_total; + dev->aperture_mappable = info->aperture_mappable; dev->has_llc = info->has_llc; dev->has_address_swizzling = info->has_address_swizzling; dev->has_logical_context = info->has_logical_context; + dev->has_ppgtt = info->has_ppgtt; dev->has_timestamp = info->has_timestamp; dev->has_gen7_sol_reset = info->has_gen7_sol_reset; @@ -661,62 +693,102 @@ init_dev(struct ilo_dev_info *dev, const struct intel_winsys_info *info) } /* - * From the Sandy Bridge PRM, volume 4 part 2, page 18: + * PIPE_CONTROL and MI_* use PPGTT writes on GEN7+ and privileged GGTT + * writes on GEN6. * - * "[DevSNB]: The GT1 product's URB provides 32KB of storage, arranged - * as 1024 256-bit rows. The GT2 product's URB provides 64KB of - * storage, arranged as 2048 256-bit rows. A row corresponds in size - * to an EU GRF register. Read/write access to the URB is generally - * supported on a row-granular basis." + * From the Sandy Bridge PRM, volume 1 part 3, page 101: * - * From the Ivy Bridge PRM, volume 4 part 2, page 17: + * "[DevSNB] When Per-Process GTT Enable is set, it is assumed that all + * code is in a secure environment, independent of address space. + * Under this condition, this bit only specifies the address space + * (GGTT or PPGTT). All commands are executed "as-is"" * - * "URB Size URB Rows URB Rows when SLM Enabled - * 128k 4096 2048 - * 256k 8096 4096" + * We need PPGTT to be enabled on GEN6 too. */ + if (!dev->has_ppgtt) { + /* experiments show that it does not really matter... */ + ilo_warn("PPGTT disabled\n"); + } - if (IS_HASWELL(info->devid)) { - dev->gen = ILO_GEN(7.5); - - if (IS_HSW_GT3(info->devid)) { - dev->gt = 3; + if (gen_is_hsw(info->devid)) { + /* + * From the Haswell PRM, volume 4, page 8: + * + * "Description GT3 GT2 GT1.5 GT1 + * (...) + * EUs (Total) 40 20 12 10 + * Threads (Total) 280 140 84 70 + * (...) + * URB Size (max, within L3$) 512KB 256KB 256KB 128KB + */ + dev->gen_opaque = ILO_GEN(7.5); + dev->gt = gen_get_hsw_gt(info->devid); + if (dev->gt == 3) { + dev->eu_count = 40; + dev->thread_count = 280; dev->urb_size = 512 * 1024; - } - else if (IS_HSW_GT2(info->devid)) { - dev->gt = 2; + } else if (dev->gt == 2) { + dev->eu_count = 20; + dev->thread_count = 140; dev->urb_size = 256 * 1024; - } - else { - dev->gt = 1; + } else { + dev->eu_count = 10; + dev->thread_count = 70; dev->urb_size = 128 * 1024; } - } - else if (IS_GEN7(info->devid)) { - dev->gen = ILO_GEN(7); - - if (IS_IVB_GT2(info->devid)) { - dev->gt = 2; + } else if (gen_is_ivb(info->devid) || gen_is_vlv(info->devid)) { + /* + * From the Ivy Bridge PRM, volume 1 part 1, page 18: + * + * "Device # of EUs #Threads/EU + * Ivy Bridge (GT2) 16 8 + * Ivy Bridge (GT1) 6 6" + * + * From the Ivy Bridge PRM, volume 4 part 2, page 17: + * + * "URB Size URB Rows URB Rows when SLM Enabled + * 128k 4096 2048 + * 256k 8096 4096" + */ + dev->gen_opaque = ILO_GEN(7); + dev->gt = (gen_is_ivb(info->devid)) ? gen_get_ivb_gt(info->devid) : 1; + if (dev->gt == 2) { + dev->eu_count = 16; + dev->thread_count = 128; dev->urb_size = 256 * 1024; - } - else { - dev->gt = 1; + } else { + dev->eu_count = 6; + dev->thread_count = 36; dev->urb_size = 128 * 1024; } - } - else if (IS_GEN6(info->devid)) { - dev->gen = ILO_GEN(6); - - if (IS_SNB_GT2(info->devid)) { - dev->gt = 2; + } else if (gen_is_snb(info->devid)) { + /* + * From the Sandy Bridge PRM, volume 1 part 1, page 22: + * + * "Device # of EUs #Threads/EU + * SNB GT2 12 5 + * SNB GT1 6 4" + * + * From the Sandy Bridge PRM, volume 4 part 2, page 18: + * + * "[DevSNB]: The GT1 product's URB provides 32KB of storage, + * arranged as 1024 256-bit rows. The GT2 product's URB provides + * 64KB of storage, arranged as 2048 256-bit rows. A row + * corresponds in size to an EU GRF register. Read/write access to + * the URB is generally supported on a row-granular basis." + */ + dev->gen_opaque = ILO_GEN(6); + dev->gt = gen_get_snb_gt(info->devid); + if (dev->gt == 2) { + dev->eu_count = 12; + dev->thread_count = 60; dev->urb_size = 64 * 1024; - } - else { - dev->gt = 1; + } else { + dev->eu_count = 6; + dev->thread_count = 24; dev->urb_size = 32 * 1024; } - } - else { + } else { ilo_err("unknown GPU generation\n"); return false; }