X-Git-Url: https://git.libre-soc.org/?p=mesa.git;a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fintel_screen.c;h=96f30168127acac0909714a21bd179c1b30d7376;hp=21786eb54abe0c2f8c14a9e62cc7a29854a4b4fd;hb=37cdcaf3864345617d5edde661fa8443ee0a2182;hpb=05520ba490c9463391f438c341243d52a00a7168 diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index 21786eb54ab..96f30168127 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -23,6 +23,7 @@ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include #include #include #include @@ -41,6 +42,14 @@ #include "utils.h" #include "xmlpool.h" +#ifndef DRM_FORMAT_MOD_INVALID +#define DRM_FORMAT_MOD_INVALID ((1ULL<<56) - 1) +#endif + +#ifndef DRM_FORMAT_MOD_LINEAR +#define DRM_FORMAT_MOD_LINEAR 0 +#endif + static const __DRIconfigOptionsExtension brw_config_options = { .base = { __DRI_CONFIG_OPTIONS, 1 }, .xml = @@ -56,10 +65,6 @@ DRI_CONF_BEGIN DRI_CONF_ENUM(1, "Enable reuse of all sizes of buffer objects") DRI_CONF_DESC_END DRI_CONF_OPT_END - - DRI_CONF_OPT_BEGIN_B(hiz, "true") - DRI_CONF_DESC(en, "Enable Hierarchical Z on gen6+") - DRI_CONF_OPT_END DRI_CONF_SECTION_END DRI_CONF_SECTION_QUALITY @@ -84,7 +89,9 @@ DRI_CONF_BEGIN DRI_CONF_DISABLE_BLEND_FUNC_EXTENDED("false") DRI_CONF_DUAL_COLOR_BLEND_BY_LOCATION("false") DRI_CONF_ALLOW_GLSL_EXTENSION_DIRECTIVE_MIDSHADER("false") + DRI_CONF_ALLOW_GLSL_BUILTIN_VARIABLE_REDECLARATION("false") DRI_CONF_ALLOW_HIGHER_COMPAT_VERSION("false") + DRI_CONF_FORCE_GLSL_ABS_SQRT("false") DRI_CONF_OPT_BEGIN_B(shader_precompile, "true") DRI_CONF_DESC(en, "Perform code generation at shader link time.") @@ -99,7 +106,7 @@ DRI_CONF_END #include "intel_batchbuffer.h" #include "intel_buffers.h" -#include "intel_bufmgr.h" +#include "brw_bufmgr.h" #include "intel_fbo.h" #include "intel_mipmap_tree.h" #include "intel_screen.h" @@ -284,11 +291,63 @@ static struct intel_image_format intel_image_formats[] = { { 0, 1, 0, __DRI_IMAGE_FORMAT_ARGB8888, 4 } } } }; +static const struct { + uint32_t tiling; + uint64_t modifier; + unsigned height_align; +} tiling_modifier_map[] = { + { .tiling = I915_TILING_NONE, .modifier = DRM_FORMAT_MOD_LINEAR, + .height_align = 1 }, + { .tiling = I915_TILING_X, .modifier = I915_FORMAT_MOD_X_TILED, + .height_align = 8 }, + { .tiling = I915_TILING_Y, .modifier = I915_FORMAT_MOD_Y_TILED, + .height_align = 32 }, +}; + +static uint32_t +modifier_to_tiling(uint64_t modifier) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(tiling_modifier_map); i++) { + if (tiling_modifier_map[i].modifier == modifier) + return tiling_modifier_map[i].tiling; + } + + unreachable("modifier_to_tiling should only receive known modifiers"); +} + +static uint64_t +tiling_to_modifier(uint32_t tiling) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(tiling_modifier_map); i++) { + if (tiling_modifier_map[i].tiling == tiling) + return tiling_modifier_map[i].modifier; + } + + unreachable("tiling_to_modifier received unknown tiling mode"); +} + +static unsigned +get_tiled_height(uint64_t modifier, unsigned height) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(tiling_modifier_map); i++) { + if (tiling_modifier_map[i].modifier == modifier) + return ALIGN(height, tiling_modifier_map[i].height_align); + } + + unreachable("get_tiled_height received unknown tiling mode"); +} + static void intel_image_warn_if_unaligned(__DRIimage *image, const char *func) { uint32_t tiling, swizzle; - drm_intel_bo_get_tiling(image->bo, &tiling, &swizzle); + brw_bo_get_tiling(image->bo, &tiling, &swizzle); if (tiling != I915_TILING_NONE && (image->offset & 0xfff)) { _mesa_warning(NULL, "%s: offset 0x%08x not on tile boundary", @@ -323,7 +382,8 @@ static boolean intel_lookup_fourcc(int dri_format, int *fourcc) } static __DRIimage * -intel_allocate_image(int dri_format, void *loaderPrivate) +intel_allocate_image(struct intel_screen *screen, int dri_format, + void *loaderPrivate) { __DRIimage *image; @@ -331,6 +391,7 @@ intel_allocate_image(int dri_format, void *loaderPrivate) if (image == NULL) return NULL; + image->screen = screen; image->dri_format = dri_format; image->offset = 0; @@ -367,9 +428,9 @@ intel_setup_image_from_mipmap_tree(struct brw_context *brw, __DRIimage *image, &image->tile_x, &image->tile_y); - drm_intel_bo_unreference(image->bo); + brw_bo_unreference(image->bo); image->bo = mt->bo; - drm_intel_bo_reference(mt->bo); + brw_bo_reference(mt->bo); } static __DRIimage * @@ -381,7 +442,7 @@ intel_create_image_from_name(__DRIscreen *dri_screen, __DRIimage *image; int cpp; - image = intel_allocate_image(format, loaderPrivate); + image = intel_allocate_image(screen, format, loaderPrivate); if (image == NULL) return NULL; @@ -393,12 +454,13 @@ intel_create_image_from_name(__DRIscreen *dri_screen, image->width = width; image->height = height; image->pitch = pitch * cpp; - image->bo = drm_intel_bo_gem_create_from_name(screen->bufmgr, "image", + image->bo = brw_bo_gem_create_from_name(screen->bufmgr, "image", name); if (!image->bo) { free(image); return NULL; } + image->modifier = tiling_to_modifier(image->bo->tiling_mode); return image; } @@ -427,11 +489,12 @@ intel_create_image_from_renderbuffer(__DRIcontext *context, image->internal_format = rb->InternalFormat; image->format = rb->Format; + image->modifier = tiling_to_modifier(irb->mt->tiling); image->offset = 0; image->data = loaderPrivate; - drm_intel_bo_unreference(image->bo); + brw_bo_unreference(image->bo); image->bo = irb->mt->bo; - drm_intel_bo_reference(irb->mt->bo); + brw_bo_reference(irb->mt->bo); image->width = rb->Width; image->height = rb->Height; image->pitch = irb->mt->pitch; @@ -488,6 +551,7 @@ intel_create_image_from_texture(__DRIcontext *context, int target, image->internal_format = obj->Image[face][level]->InternalFormat; image->format = obj->Image[face][level]->TexFormat; + image->modifier = tiling_to_modifier(iobj->mt->tiling); image->data = loaderPrivate; intel_setup_image_from_mipmap_tree(brw, image, iobj->mt, level, zoffset); image->dri_format = driGLFormatToImageFormat(image->format); @@ -505,51 +569,137 @@ intel_create_image_from_texture(__DRIcontext *context, int target, static void intel_destroy_image(__DRIimage *image) { - drm_intel_bo_unreference(image->bo); + brw_bo_unreference(image->bo); free(image); } +enum modifier_priority { + MODIFIER_PRIORITY_INVALID = 0, + MODIFIER_PRIORITY_LINEAR, + MODIFIER_PRIORITY_X, + MODIFIER_PRIORITY_Y, +}; + +const uint64_t priority_to_modifier[] = { + [MODIFIER_PRIORITY_INVALID] = DRM_FORMAT_MOD_INVALID, + [MODIFIER_PRIORITY_LINEAR] = DRM_FORMAT_MOD_LINEAR, + [MODIFIER_PRIORITY_X] = I915_FORMAT_MOD_X_TILED, + [MODIFIER_PRIORITY_Y] = I915_FORMAT_MOD_Y_TILED, +}; + +static uint64_t +select_best_modifier(struct gen_device_info *devinfo, + const uint64_t *modifiers, + const unsigned count) +{ + enum modifier_priority prio = MODIFIER_PRIORITY_INVALID; + + for (int i = 0; i < count; i++) { + switch (modifiers[i]) { + case I915_FORMAT_MOD_Y_TILED: + prio = MAX2(prio, MODIFIER_PRIORITY_Y); + break; + case I915_FORMAT_MOD_X_TILED: + prio = MAX2(prio, MODIFIER_PRIORITY_X); + break; + case DRM_FORMAT_MOD_LINEAR: + prio = MAX2(prio, MODIFIER_PRIORITY_LINEAR); + break; + case DRM_FORMAT_MOD_INVALID: + default: + break; + } + } + + return priority_to_modifier[prio]; +} + static __DRIimage * -intel_create_image(__DRIscreen *dri_screen, - int width, int height, int format, - unsigned int use, - void *loaderPrivate) +intel_create_image_common(__DRIscreen *dri_screen, + int width, int height, int format, + unsigned int use, + const uint64_t *modifiers, + unsigned count, + void *loaderPrivate) { __DRIimage *image; struct intel_screen *screen = dri_screen->driverPrivate; uint32_t tiling; + uint64_t modifier = DRM_FORMAT_MOD_INVALID; + unsigned tiled_height; int cpp; - unsigned long pitch; - tiling = I915_TILING_X; + /* Callers of this may specify a modifier, or a dri usage, but not both. The + * newer modifier interface deprecates the older usage flags newer modifier + * interface deprecates the older usage flags. + */ + assert(!(use && count)); + if (use & __DRI_IMAGE_USE_CURSOR) { if (width != 64 || height != 64) return NULL; - tiling = I915_TILING_NONE; + modifier = DRM_FORMAT_MOD_LINEAR; } if (use & __DRI_IMAGE_USE_LINEAR) - tiling = I915_TILING_NONE; + modifier = DRM_FORMAT_MOD_LINEAR; + + if (modifier == DRM_FORMAT_MOD_INVALID) { + if (modifiers) { + /* User requested specific modifiers */ + modifier = select_best_modifier(&screen->devinfo, modifiers, count); + if (modifier == DRM_FORMAT_MOD_INVALID) + return NULL; + } else { + /* Historically, X-tiled was the default, and so lack of modifier means + * X-tiled. + */ + modifier = I915_FORMAT_MOD_X_TILED; + } + } + tiling = modifier_to_tiling(modifier); + tiled_height = get_tiled_height(modifier, height); - image = intel_allocate_image(format, loaderPrivate); + image = intel_allocate_image(screen, format, loaderPrivate); if (image == NULL) return NULL; cpp = _mesa_get_format_bytes(image->format); - image->bo = drm_intel_bo_alloc_tiled(screen->bufmgr, "image", - width, height, cpp, &tiling, - &pitch, 0); + image->bo = brw_bo_alloc_tiled(screen->bufmgr, "image", + width, tiled_height, cpp, tiling, + &image->pitch, 0); if (image->bo == NULL) { free(image); return NULL; } image->width = width; image->height = height; - image->pitch = pitch; + image->modifier = modifier; return image; } +static __DRIimage * +intel_create_image(__DRIscreen *dri_screen, + int width, int height, int format, + unsigned int use, + void *loaderPrivate) +{ + return intel_create_image_common(dri_screen, width, height, format, use, NULL, 0, + loaderPrivate); +} + +static __DRIimage * +intel_create_image_with_modifiers(__DRIscreen *dri_screen, + int width, int height, int format, + const uint64_t *modifiers, + const unsigned count, + void *loaderPrivate) +{ + return intel_create_image_common(dri_screen, width, height, format, 0, + modifiers, count, loaderPrivate); +} + static GLboolean intel_query_image(__DRIimage *image, int attrib, int *value) { @@ -558,10 +708,10 @@ intel_query_image(__DRIimage *image, int attrib, int *value) *value = image->pitch; return true; case __DRI_IMAGE_ATTRIB_HANDLE: - *value = image->bo->handle; + *value = image->bo->gem_handle; return true; case __DRI_IMAGE_ATTRIB_NAME: - return !drm_intel_bo_flink(image->bo, (uint32_t *) value); + return !brw_bo_flink(image->bo, (uint32_t *) value); case __DRI_IMAGE_ATTRIB_FORMAT: *value = image->dri_format; return true; @@ -577,7 +727,7 @@ intel_query_image(__DRIimage *image, int attrib, int *value) *value = image->planar_format->components; return true; case __DRI_IMAGE_ATTRIB_FD: - return !drm_intel_bo_gem_export_to_prime(image->bo, value); + return !brw_bo_gem_export_to_prime(image->bo, value); case __DRI_IMAGE_ATTRIB_FOURCC: return intel_lookup_fourcc(image->dri_format, value); case __DRI_IMAGE_ATTRIB_NUM_PLANES: @@ -586,6 +736,12 @@ intel_query_image(__DRIimage *image, int attrib, int *value) case __DRI_IMAGE_ATTRIB_OFFSET: *value = image->offset; return true; + case __DRI_IMAGE_ATTRIB_MODIFIER_LOWER: + *value = (image->modifier & 0xffffffff); + return true; + case __DRI_IMAGE_ATTRIB_MODIFIER_UPPER: + *value = ((image->modifier >> 32) & 0xffffffff); + return true; default: return false; @@ -601,12 +757,13 @@ intel_dup_image(__DRIimage *orig_image, void *loaderPrivate) if (image == NULL) return NULL; - drm_intel_bo_reference(orig_image->bo); + brw_bo_reference(orig_image->bo); image->bo = orig_image->bo; image->internal_format = orig_image->internal_format; image->planar_format = orig_image->planar_format; image->dri_format = orig_image->dri_format; image->format = orig_image->format; + image->modifier = orig_image->modifier; image->offset = orig_image->offset; image->width = orig_image->width; image->height = orig_image->height; @@ -678,6 +835,7 @@ intel_create_image_from_fds(__DRIscreen *dri_screen, struct intel_screen *screen = dri_screen->driverPrivate; struct intel_image_format *f; __DRIimage *image; + unsigned tiled_height; int i, index; if (fds == NULL || num_fds < 1) @@ -693,9 +851,11 @@ intel_create_image_from_fds(__DRIscreen *dri_screen, return NULL; if (f->nplanes == 1) - image = intel_allocate_image(f->planes[0].dri_format, loaderPrivate); + image = intel_allocate_image(screen, f->planes[0].dri_format, + loaderPrivate); else - image = intel_allocate_image(__DRI_IMAGE_FORMAT_NONE, loaderPrivate); + image = intel_allocate_image(screen, __DRI_IMAGE_FORMAT_NONE, + loaderPrivate); if (image == NULL) return NULL; @@ -705,21 +865,34 @@ intel_create_image_from_fds(__DRIscreen *dri_screen, image->pitch = strides[0]; image->planar_format = f; + + image->bo = brw_bo_gem_create_from_prime(screen->bufmgr, fds[0]); + if (image->bo == NULL) { + free(image); + return NULL; + } + + image->modifier = tiling_to_modifier(image->bo->tiling_mode); + tiled_height = get_tiled_height(image->modifier, height); + int size = 0; for (i = 0; i < f->nplanes; i++) { index = f->planes[i].buffer_index; image->offsets[index] = offsets[index]; image->strides[index] = strides[index]; - const int plane_height = height >> f->planes[i].height_shift; + const int plane_height = tiled_height >> f->planes[i].height_shift; const int end = offsets[index] + plane_height * strides[index]; if (size < end) size = end; } - image->bo = drm_intel_bo_gem_create_from_prime(screen->bufmgr, - fds[0], size); - if (image->bo == NULL) { + /* Check that the requested image actually fits within the BO. 'size' + * is already relative to the offsets, so we don't need to add that. */ + if (image->bo->size == 0) { + image->bo->size = size; + } else if (size > image->bo->size) { + brw_bo_unreference(image->bo); free(image); return NULL; } @@ -798,7 +971,7 @@ intel_from_planar(__DRIimage *parent, int plane, void *loaderPrivate) offset = parent->offsets[index]; stride = parent->strides[index]; - image = intel_allocate_image(dri_format, loaderPrivate); + image = intel_allocate_image(parent->screen, dri_format, loaderPrivate); if (image == NULL) return NULL; @@ -809,7 +982,8 @@ intel_from_planar(__DRIimage *parent, int plane, void *loaderPrivate) } image->bo = parent->bo; - drm_intel_bo_reference(parent->bo); + brw_bo_reference(parent->bo); + image->modifier = parent->modifier; image->width = width; image->height = height; @@ -822,7 +996,7 @@ intel_from_planar(__DRIimage *parent, int plane, void *loaderPrivate) } static const __DRIimageExtension intelImageExtension = { - .base = { __DRI_IMAGE, 13 }, + .base = { __DRI_IMAGE, 14 }, .createImageFromName = intel_create_image_from_name, .createImageFromRenderbuffer = intel_create_image_from_renderbuffer, @@ -840,8 +1014,20 @@ static const __DRIimageExtension intelImageExtension = { .getCapabilities = NULL, .mapImage = NULL, .unmapImage = NULL, + .createImageWithModifiers = intel_create_image_with_modifiers, }; +static uint64_t +get_aperture_size(int fd) +{ + struct drm_i915_gem_get_aperture aperture; + + if (drmIoctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture) != 0) + return 0; + + return aperture.aper_size; +} + static int brw_query_renderer_integer(__DRIscreen *dri_screen, int param, unsigned int *value) @@ -864,13 +1050,8 @@ brw_query_renderer_integer(__DRIscreen *dri_screen, * assume that there's some fragmentation, and we start doing extra * flushing, etc. That's the big cliff apps will care about. */ - size_t aper_size; - size_t mappable_size; - - drm_intel_get_aperture_sizes(dri_screen->fd, &mappable_size, &aper_size); - const unsigned gpu_mappable_megabytes = - (aper_size / (1024 * 1024)) * 3 / 4; + screen->aperture_threshold / (1024 * 1024); const long system_memory_pages = sysconf(_SC_PHYS_PAGES); const long system_page_size = sysconf(_SC_PAGE_SIZE); @@ -995,7 +1176,7 @@ intelDestroyScreen(__DRIscreen * sPriv) { struct intel_screen *screen = sPriv->driverPrivate; - dri_bufmgr_destroy(screen->bufmgr); + brw_bufmgr_destroy(screen->bufmgr); driDestroyOptionInfo(&screen->optionCache); ralloc_free(screen); @@ -1004,7 +1185,11 @@ intelDestroyScreen(__DRIscreen * sPriv) /** - * This is called when we need to set up GL rendering to a new X window. + * Create a gl_framebuffer and attach it to __DRIdrawable::driverPrivate. + * + *_This implements driDriverAPI::createNewDrawable, which the DRI layer calls + * when creating a EGLSurface, GLXDrawable, or GLXPixmap. Despite the name, + * this does not allocate GPU memory. */ static GLboolean intelCreateBuffer(__DRIscreen *dri_screen, @@ -1017,12 +1202,11 @@ intelCreateBuffer(__DRIscreen *dri_screen, mesa_format rgbFormat; unsigned num_samples = intel_quantize_num_samples(screen, mesaVis->samples); - struct gl_framebuffer *fb; if (isPixmap) return false; - fb = CALLOC_STRUCT(gl_framebuffer); + struct gl_framebuffer *fb = CALLOC_STRUCT(gl_framebuffer); if (!fb) return false; @@ -1049,12 +1233,12 @@ intelCreateBuffer(__DRIscreen *dri_screen, } /* setup the hardware-based renderbuffers */ - rb = intel_create_renderbuffer(rgbFormat, num_samples); - _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &rb->Base.Base); + rb = intel_create_winsys_renderbuffer(rgbFormat, num_samples); + _mesa_attach_and_own_rb(fb, BUFFER_FRONT_LEFT, &rb->Base.Base); if (mesaVis->doubleBufferMode) { - rb = intel_create_renderbuffer(rgbFormat, num_samples); - _mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &rb->Base.Base); + rb = intel_create_winsys_renderbuffer(rgbFormat, num_samples); + _mesa_attach_and_own_rb(fb, BUFFER_BACK_LEFT, &rb->Base.Base); } /* @@ -1068,10 +1252,10 @@ intelCreateBuffer(__DRIscreen *dri_screen, if (screen->devinfo.has_hiz_and_separate_stencil) { rb = intel_create_private_renderbuffer(MESA_FORMAT_Z24_UNORM_X8_UINT, num_samples); - _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &rb->Base.Base); + _mesa_attach_and_own_rb(fb, BUFFER_DEPTH, &rb->Base.Base); rb = intel_create_private_renderbuffer(MESA_FORMAT_S_UINT8, num_samples); - _mesa_add_renderbuffer(fb, BUFFER_STENCIL, &rb->Base.Base); + _mesa_attach_and_own_rb(fb, BUFFER_STENCIL, &rb->Base.Base); } else { /* * Use combined depth/stencil. Note that the renderbuffer is @@ -1079,15 +1263,15 @@ intelCreateBuffer(__DRIscreen *dri_screen, */ rb = intel_create_private_renderbuffer(MESA_FORMAT_Z24_UNORM_S8_UINT, num_samples); - _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &rb->Base.Base); - _mesa_add_renderbuffer(fb, BUFFER_STENCIL, &rb->Base.Base); + _mesa_attach_and_own_rb(fb, BUFFER_DEPTH, &rb->Base.Base); + _mesa_attach_and_reference_rb(fb, BUFFER_STENCIL, &rb->Base.Base); } } else if (mesaVis->depthBits == 16) { assert(mesaVis->stencilBits == 0); rb = intel_create_private_renderbuffer(MESA_FORMAT_Z_UNORM16, num_samples); - _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &rb->Base.Base); + _mesa_attach_and_own_rb(fb, BUFFER_DEPTH, &rb->Base.Base); } else { assert(mesaVis->depthBits == 0); @@ -1155,19 +1339,18 @@ intel_init_bufmgr(struct intel_screen *screen) { __DRIscreen *dri_screen = screen->driScrnPriv; - screen->no_hw = getenv("INTEL_NO_HW") != NULL; + if (getenv("INTEL_NO_HW") != NULL) + screen->no_hw = true; - screen->bufmgr = intel_bufmgr_gem_init(dri_screen->fd, BATCH_SZ); + screen->bufmgr = brw_bufmgr_init(&screen->devinfo, dri_screen->fd, BATCH_SZ); if (screen->bufmgr == NULL) { fprintf(stderr, "[%s:%u] Error initializing buffer manager.\n", __func__, __LINE__); return false; } - drm_intel_bufmgr_gem_enable_fenced_relocs(screen->bufmgr); - - if (!intel_get_boolean(screen, I915_PARAM_HAS_RELAXED_DELTA)) { - fprintf(stderr, "[%s: %u] Kernel 2.6.39 required.\n", __func__, __LINE__); + if (!intel_get_boolean(screen, I915_PARAM_HAS_WAIT_TIMEOUT)) { + fprintf(stderr, "[%s: %u] Kernel 3.6 required.\n", __func__, __LINE__); return false; } @@ -1177,20 +1360,19 @@ intel_init_bufmgr(struct intel_screen *screen) static bool intel_detect_swizzling(struct intel_screen *screen) { - drm_intel_bo *buffer; - unsigned long flags = 0; - unsigned long aligned_pitch; + struct brw_bo *buffer; + unsigned flags = 0; + uint32_t aligned_pitch; uint32_t tiling = I915_TILING_X; uint32_t swizzle_mode = 0; - buffer = drm_intel_bo_alloc_tiled(screen->bufmgr, "swizzle test", - 64, 64, 4, - &tiling, &aligned_pitch, flags); + buffer = brw_bo_alloc_tiled(screen->bufmgr, "swizzle test", + 64, 64, 4, tiling, &aligned_pitch, flags); if (buffer == NULL) return false; - drm_intel_bo_get_tiling(buffer, &tiling, &swizzle_mode); - drm_intel_bo_unreference(buffer); + brw_bo_get_tiling(buffer, &tiling, &swizzle_mode); + brw_bo_unreference(buffer); if (swizzle_mode == I915_BIT_6_SWIZZLE_NONE) return false; @@ -1210,13 +1392,13 @@ intel_detect_timestamp(struct intel_screen *screen) * More recent kernels offer an interface to read the full 36bits * everywhere. */ - if (drm_intel_reg_read(screen->bufmgr, TIMESTAMP | 1, &dummy) == 0) + if (brw_reg_read(screen->bufmgr, TIMESTAMP | 1, &dummy) == 0) return 3; /* Determine if we have a 32bit or 64bit kernel by inspecting the * upper 32bits for a rapidly changing timestamp. */ - if (drm_intel_reg_read(screen->bufmgr, TIMESTAMP, &last)) + if (brw_reg_read(screen->bufmgr, TIMESTAMP, &last)) return 0; upper = lower = 0; @@ -1224,7 +1406,7 @@ intel_detect_timestamp(struct intel_screen *screen) /* The TIMESTAMP should change every 80ns, so several round trips * through the kernel should be enough to advance it. */ - if (drm_intel_reg_read(screen->bufmgr, TIMESTAMP, &dummy)) + if (brw_reg_read(screen->bufmgr, TIMESTAMP, &dummy)) return 0; upper += (dummy >> 32) != (last >> 32); @@ -1253,24 +1435,29 @@ static bool intel_detect_pipelined_register(struct intel_screen *screen, int reg, uint32_t expected_value, bool reset) { - drm_intel_bo *results, *bo; + if (screen->no_hw) + return false; + + struct brw_bo *results, *bo; uint32_t *batch; uint32_t offset = 0; + void *map; bool success = false; /* Create a zero'ed temporary buffer for reading our results */ - results = drm_intel_bo_alloc(screen->bufmgr, "registers", 4096, 0); + results = brw_bo_alloc(screen->bufmgr, "registers", 4096, 0); if (results == NULL) goto err; - bo = drm_intel_bo_alloc(screen->bufmgr, "batchbuffer", 4096, 0); + bo = brw_bo_alloc(screen->bufmgr, "batchbuffer", 4096, 0); if (bo == NULL) goto err_results; - if (drm_intel_bo_map(bo, 1)) + map = brw_bo_map(NULL, bo, MAP_WRITE); + if (!map) goto err_batch; - batch = bo->virtual; + batch = map; /* Write the register. */ *batch++ = MI_LOAD_REGISTER_IMM | (3 - 2); @@ -1280,11 +1467,14 @@ intel_detect_pipelined_register(struct intel_screen *screen, /* Save the register's value back to the buffer. */ *batch++ = MI_STORE_REGISTER_MEM | (3 - 2); *batch++ = reg; - drm_intel_bo_emit_reloc(bo, (char *)batch -(char *)bo->virtual, - results, offset*sizeof(uint32_t), - I915_GEM_DOMAIN_INSTRUCTION, - I915_GEM_DOMAIN_INSTRUCTION); - *batch++ = results->offset + offset*sizeof(uint32_t); + struct drm_i915_gem_relocation_entry reloc = { + .offset = (char *) batch - (char *) map, + .delta = offset * sizeof(uint32_t), + .target_handle = results->gem_handle, + .read_domains = I915_GEM_DOMAIN_INSTRUCTION, + .write_domain = I915_GEM_DOMAIN_INSTRUCTION, + }; + *batch++ = reloc.presumed_offset + reloc.delta; /* And afterwards clear the register */ if (reset) { @@ -1295,20 +1485,41 @@ intel_detect_pipelined_register(struct intel_screen *screen, *batch++ = MI_BATCH_BUFFER_END; - drm_intel_bo_mrb_exec(bo, ALIGN((char *)batch - (char *)bo->virtual, 8), - NULL, 0, 0, - I915_EXEC_RENDER); + struct drm_i915_gem_exec_object2 exec_objects[2] = { + { + .handle = results->gem_handle, + }, + { + .handle = bo->gem_handle, + .relocation_count = 1, + .relocs_ptr = (uintptr_t) &reloc, + } + }; + + struct drm_i915_gem_execbuffer2 execbuf = { + .buffers_ptr = (uintptr_t) exec_objects, + .buffer_count = 2, + .batch_len = ALIGN((char *) batch - (char *) map, 8), + .flags = I915_EXEC_RENDER, + }; + + /* Don't bother with error checking - if the execbuf fails, the + * value won't be written and we'll just report that there's no access. + */ + __DRIscreen *dri_screen = screen->driScrnPriv; + drmIoctl(dri_screen->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf); /* Check whether the value got written. */ - if (drm_intel_bo_map(results, false) == 0) { - success = *((uint32_t *)results->virtual + offset) == expected_value; - drm_intel_bo_unmap(results); + void *results_map = brw_bo_map(NULL, results, MAP_READ); + if (results_map) { + success = *((uint32_t *)results_map + offset) == expected_value; + brw_bo_unmap(results); } err_batch: - drm_intel_bo_unreference(bo); + brw_bo_unreference(bo); err_results: - drm_intel_bo_unreference(results); + brw_bo_unreference(results); err: return success; } @@ -1316,13 +1527,19 @@ err: static bool intel_detect_pipelined_so(struct intel_screen *screen) { + const struct gen_device_info *devinfo = &screen->devinfo; + /* Supposedly, Broadwell just works. */ - if (screen->devinfo.gen >= 8) + if (devinfo->gen >= 8) return true; - if (screen->devinfo.gen <= 6) + if (devinfo->gen <= 6) return false; + /* See the big explanation about command parser versions below */ + if (screen->cmd_parser_version >= (devinfo->is_haswell ? 7 : 2)) + return true; + /* We use SO_WRITE_OFFSET0 since you're supposed to write it (unlike the * statistics registers), and we already reset it to zero before using it. */ @@ -1509,8 +1726,14 @@ set_max_gl_versions(struct intel_screen *screen) dri_screen->max_gl_es2_version = has_astc ? 32 : 31; break; case 7: - dri_screen->max_gl_core_version = screen->devinfo.is_haswell && - can_do_pipelined_register_writes(screen) ? 45 : 33; + dri_screen->max_gl_core_version = 33; + if (can_do_pipelined_register_writes(screen)) { + dri_screen->max_gl_core_version = 42; + if (screen->devinfo.is_haswell && can_do_compute_dispatch(screen)) + dri_screen->max_gl_core_version = 43; + if (screen->devinfo.is_haswell && can_do_mi_math_and_lrr(screen)) + dri_screen->max_gl_core_version = 45; + } dri_screen->max_gl_compat_version = 30; dri_screen->max_gl_es1_version = 11; dri_screen->max_gl_es2_version = screen->devinfo.is_haswell ? 31 : 30; @@ -1600,6 +1823,53 @@ shader_perf_log_mesa(void *data, const char *fmt, ...) va_end(args); } +static int +parse_devid_override(const char *devid_override) +{ + static const struct { + const char *name; + int pci_id; + } name_map[] = { + { "brw", 0x2a02 }, + { "g4x", 0x2a42 }, + { "ilk", 0x0042 }, + { "snb", 0x0126 }, + { "ivb", 0x016a }, + { "hsw", 0x0d2e }, + { "byt", 0x0f33 }, + { "bdw", 0x162e }, + { "skl", 0x1912 }, + { "kbl", 0x5912 }, + }; + + for (unsigned i = 0; i < ARRAY_SIZE(name_map); i++) { + if (!strcmp(name_map[i].name, devid_override)) + return name_map[i].pci_id; + } + + return strtod(devid_override, NULL); +} + +/** + * Get the PCI ID for the device. This can be overridden by setting the + * INTEL_DEVID_OVERRIDE environment variable to the desired ID. + * + * Returns -1 on ioctl failure. + */ +static int +get_pci_device_id(struct intel_screen *screen) +{ + if (geteuid() == getuid()) { + char *devid_override = getenv("INTEL_DEVID_OVERRIDE"); + if (devid_override) { + screen->no_hw = true; + return parse_devid_override(devid_override); + } + } + + return intel_get_integer(screen, I915_PARAM_CHIPSET_ID); +} + /** * This is the driver specific part of the createNewScreen entry point. * Called when using DRI2. @@ -1632,20 +1902,18 @@ __DRIconfig **intelInitScreen2(__DRIscreen *dri_screen) screen->driScrnPriv = dri_screen; dri_screen->driverPrivate = (void *) screen; - if (!intel_init_bufmgr(screen)) - return NULL; + screen->deviceID = get_pci_device_id(screen); - screen->deviceID = drm_intel_bufmgr_gem_get_devid(screen->bufmgr); if (!gen_get_device_info(screen->deviceID, &screen->devinfo)) return NULL; + if (!intel_init_bufmgr(screen)) + return NULL; + const struct gen_device_info *devinfo = &screen->devinfo; brw_process_intel_debug_variable(); - if (INTEL_DEBUG & DEBUG_BUFMGR) - dri_bufmgr_set_debug(screen->bufmgr, true); - if ((INTEL_DEBUG & DEBUG_SHADER_TIME) && devinfo->gen < 7) { fprintf(stderr, "shader_time debugging requires gen7 (Ivybridge) or better.\n"); @@ -1658,7 +1926,7 @@ __DRIconfig **intelInitScreen2(__DRIscreen *dri_screen) * Currently the entire (global) address space for all GTT maps is * limited to 64bits. That is all objects on the system that are * setup for GTT mmapping must fit within 64bits. An attempt to use - * one that exceeds the limit with fail in drm_intel_bo_map_gtt(). + * one that exceeds the limit with fail in brw_bo_map_gtt(). * * Long before we hit that limit, we will be practically limited by * that any single object must fit in physical memory (RAM). The upper @@ -1686,6 +1954,8 @@ __DRIconfig **intelInitScreen2(__DRIscreen *dri_screen) screen->max_gtt_map_object_size = gtt_size / 4; } + screen->aperture_threshold = get_aperture_size(dri_screen->fd) * 3 / 4; + screen->hw_has_swizzling = intel_detect_swizzling(screen); screen->hw_has_timestamp = intel_detect_timestamp(screen); @@ -1696,8 +1966,147 @@ __DRIconfig **intelInitScreen2(__DRIscreen *dri_screen) screen->subslice_total = 1 << (devinfo->gt - 1); } - if (intel_detect_pipelined_so(screen)) + /* Gen7-7.5 kernel requirements / command parser saga: + * + * - pre-v3.16: + * Haswell and Baytrail cannot use any privileged batchbuffer features. + * + * Ivybridge has aliasing PPGTT on by default, which accidentally marks + * all batches secure, allowing them to use any feature with no checking. + * This is effectively equivalent to a command parser version of + * \infinity - everything is possible. + * + * The command parser does not exist, and querying the version will + * return -EINVAL. + * + * - v3.16: + * The kernel enables the command parser by default, for systems with + * aliasing PPGTT enabled (Ivybridge and Haswell). However, the + * hardware checker is still enabled, so Haswell and Baytrail cannot + * do anything. + * + * Ivybridge goes from "everything is possible" to "only what the + * command parser allows" (if the user boots with i915.cmd_parser=0, + * then everything is possible again). We can only safely use features + * allowed by the supported command parser version. + * + * Annoyingly, I915_PARAM_CMD_PARSER_VERSION reports the static version + * implemented by the kernel, even if it's turned off. So, checking + * for version > 0 does not mean that you can write registers. We have + * to try it and see. The version does, however, indicate the age of + * the kernel. + * + * Instead of matching the hardware checker's behavior of converting + * privileged commands to MI_NOOP, it makes execbuf2 start returning + * -EINVAL, making it dangerous to try and use privileged features. + * + * Effective command parser versions: + * - Haswell: 0 (reporting 1, writes don't work) + * - Baytrail: 0 (reporting 1, writes don't work) + * - Ivybridge: 1 (enabled) or infinite (disabled) + * + * - v3.17: + * Baytrail aliasing PPGTT is enabled, making it like Ivybridge: + * effectively version 1 (enabled) or infinite (disabled). + * + * - v3.19: f1f55cc0556031c8ee3fe99dae7251e78b9b653b + * Command parser v2 supports predicate writes. + * + * - Haswell: 0 (reporting 1, writes don't work) + * - Baytrail: 2 (enabled) or infinite (disabled) + * - Ivybridge: 2 (enabled) or infinite (disabled) + * + * So version >= 2 is enough to know that Ivybridge and Baytrail + * will work. Haswell still can't do anything. + * + * - v4.0: Version 3 happened. Largely not relevant. + * + * - v4.1: 6702cf16e0ba8b0129f5aa1b6609d4e9c70bc13b + * L3 config registers are properly saved and restored as part + * of the hardware context. We can approximately detect this point + * in time by checking if I915_PARAM_REVISION is recognized - it + * landed in a later commit, but in the same release cycle. + * + * - v4.2: 245054a1fe33c06ad233e0d58a27ec7b64db9284 + * Command parser finally gains secure batch promotion. On Haswell, + * the hardware checker gets disabled, which finally allows it to do + * privileged commands. + * + * I915_PARAM_CMD_PARSER_VERSION reports 3. Effective versions: + * - Haswell: 3 (enabled) or 0 (disabled) + * - Baytrail: 3 (enabled) or infinite (disabled) + * - Ivybridge: 3 (enabled) or infinite (disabled) + * + * Unfortunately, detecting this point in time is tricky, because + * no version bump happened when this important change occurred. + * On Haswell, if we can write any register, then the kernel is at + * least this new, and we can start trusting the version number. + * + * - v4.4: 2bbe6bbb0dc94fd4ce287bdac9e1bd184e23057b and + * Command parser reaches version 4, allowing access to Haswell + * atomic scratch and chicken3 registers. If version >= 4, we know + * the kernel is new enough to support privileged features on all + * hardware. However, the user might have disabled it...and the + * kernel will still report version 4. So we still have to guess + * and check. + * + * - v4.4: 7b9748cb513a6bef4af87b79f0da3ff7e8b56cd8 + * Command parser v5 whitelists indirect compute shader dispatch + * registers, needed for OpenGL 4.3 and later. + * + * - v4.8: + * Command parser v7 lets us use MI_MATH on Haswell. + * + * Additionally, the kernel begins reporting version 0 when + * the command parser is disabled, allowing us to skip the + * guess-and-check step on Haswell. Unfortunately, this also + * means that we can no longer use it as an indicator of the + * age of the kernel. + */ + if (intel_get_param(screen, I915_PARAM_CMD_PARSER_VERSION, + &screen->cmd_parser_version) < 0) { + /* Command parser does not exist - getparam is unrecognized */ + screen->cmd_parser_version = 0; + } + + /* Kernel 4.13 retuired for exec object capture */ +#ifndef I915_PARAM_HAS_EXEC_CAPTURE +#define I915_PARAM_HAS_EXEC_CAPTURE 45 +#endif + if (intel_get_boolean(screen, I915_PARAM_HAS_EXEC_CAPTURE)) { + screen->kernel_features |= KERNEL_ALLOWS_EXEC_CAPTURE; + } + + if (!intel_detect_pipelined_so(screen)) { + /* We can't do anything, so the effective version is 0. */ + screen->cmd_parser_version = 0; + } else { screen->kernel_features |= KERNEL_ALLOWS_SOL_OFFSET_WRITES; + } + + if (devinfo->gen >= 8 || screen->cmd_parser_version >= 2) + screen->kernel_features |= KERNEL_ALLOWS_PREDICATE_WRITES; + + /* Haswell requires command parser version 4 in order to have L3 + * atomic scratch1 and chicken3 bits + */ + if (devinfo->is_haswell && screen->cmd_parser_version >= 4) { + screen->kernel_features |= + KERNEL_ALLOWS_HSW_SCRATCH1_AND_ROW_CHICKEN3; + } + + /* Haswell requires command parser version 6 in order to write to the + * MI_MATH GPR registers, and version 7 in order to use + * MI_LOAD_REGISTER_REG (which all users of MI_MATH use). + */ + if (devinfo->gen >= 8 || + (devinfo->is_haswell && screen->cmd_parser_version >= 7)) { + screen->kernel_features |= KERNEL_ALLOWS_MI_MATH_AND_LRR; + } + + /* Gen7 needs at least command parser version 5 to support compute */ + if (devinfo->gen >= 8 || screen->cmd_parser_version >= 5) + screen->kernel_features |= KERNEL_ALLOWS_COMPUTE_DISPATCH; const char *force_msaa = getenv("INTEL_FORCE_MSAA"); if (force_msaa) { @@ -1730,35 +2139,6 @@ __DRIconfig **intelInitScreen2(__DRIscreen *dri_screen) (ret != -1 || errno != EINVAL); } - if (intel_get_param(screen, I915_PARAM_CMD_PARSER_VERSION, - &screen->cmd_parser_version) < 0) { - screen->cmd_parser_version = 0; - } - - if (devinfo->gen >= 8 || screen->cmd_parser_version >= 2) - screen->kernel_features |= KERNEL_ALLOWS_PREDICATE_WRITES; - - /* Haswell requires command parser version 4 in order to have L3 - * atomic scratch1 and chicken3 bits - */ - if (devinfo->is_haswell && screen->cmd_parser_version >= 4) { - screen->kernel_features |= - KERNEL_ALLOWS_HSW_SCRATCH1_AND_ROW_CHICKEN3; - } - - /* Haswell requires command parser version 6 in order to write to the - * MI_MATH GPR registers, and version 7 in order to use - * MI_LOAD_REGISTER_REG (which all users of MI_MATH use). - */ - if (devinfo->gen >= 8 || - (devinfo->is_haswell && screen->cmd_parser_version >= 7)) { - screen->kernel_features |= KERNEL_ALLOWS_MI_MATH_AND_LRR; - } - - /* Gen7 needs at least command parser version 5 to support compute */ - if (devinfo->gen >= 8 || screen->cmd_parser_version >= 5) - screen->kernel_features |= KERNEL_ALLOWS_COMPUTE_DISPATCH; - dri_screen->extensions = !screen->has_context_reset_notification ? screenExtensions : intelRobustScreenExtensions; @@ -1775,7 +2155,7 @@ __DRIconfig **intelInitScreen2(__DRIscreen *dri_screen) struct intel_buffer { __DRIbuffer base; - drm_intel_bo *bo; + struct brw_bo *bo; }; static __DRIbuffer * @@ -1793,24 +2173,25 @@ intelAllocateBuffer(__DRIscreen *dri_screen, if (intelBuffer == NULL) return NULL; - /* The front and back buffers are color buffers, which are X tiled. */ - uint32_t tiling = I915_TILING_X; - unsigned long pitch; + /* The front and back buffers are color buffers, which are X tiled. GEN9+ + * supports Y tiled and compressed buffers, but there is no way to plumb that + * through to here. */ + uint32_t pitch; int cpp = format / 8; - intelBuffer->bo = drm_intel_bo_alloc_tiled(screen->bufmgr, - "intelAllocateBuffer", - width, - height, - cpp, - &tiling, &pitch, - BO_ALLOC_FOR_RENDER); + intelBuffer->bo = brw_bo_alloc_tiled(screen->bufmgr, + "intelAllocateBuffer", + width, + height, + cpp, + I915_TILING_X, &pitch, + BO_ALLOC_FOR_RENDER); if (intelBuffer->bo == NULL) { free(intelBuffer); return NULL; } - drm_intel_bo_flink(intelBuffer->bo, &intelBuffer->base.name); + brw_bo_flink(intelBuffer->bo, &intelBuffer->base.name); intelBuffer->base.attachment = attachment; intelBuffer->base.cpp = cpp; @@ -1824,7 +2205,7 @@ intelReleaseBuffer(__DRIscreen *dri_screen, __DRIbuffer *buffer) { struct intel_buffer *intelBuffer = (struct intel_buffer *) buffer; - drm_intel_bo_unreference(intelBuffer->bo); + brw_bo_unreference(intelBuffer->bo); free(intelBuffer); }