X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_wm_surface_state.c;h=4566696fa98dafda8da740770422174476b03ad6;hb=e989acb03ba802737f762627dd16ac1d0b9f0d13;hp=6ebe6481c32cf723e7a0387788403f319cc16eb9;hpb=958fc04dc51a2561c8598f42df59e3d9139e56a7;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 6ebe6481c32..39e898243db 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -30,13 +30,19 @@ */ +#include "compiler/nir/nir.h" #include "main/context.h" #include "main/blend.h" #include "main/mtypes.h" #include "main/samplerobj.h" #include "main/shaderimage.h" +#include "main/teximage.h" #include "program/prog_parameter.h" +#include "program/prog_instruction.h" #include "main/framebuffer.h" +#include "main/shaderapi.h" + +#include "isl/isl.h" #include "intel_mipmap_tree.h" #include "intel_batchbuffer.h" @@ -49,6 +55,217 @@ #include "brw_defines.h" #include "brw_wm.h" +uint32_t wb_mocs[] = { + [7] = GEN7_MOCS_L3, + [8] = BDW_MOCS_WB, + [9] = SKL_MOCS_WB, + [10] = CNL_MOCS_WB, + [11] = ICL_MOCS_WB, +}; + +uint32_t pte_mocs[] = { + [7] = GEN7_MOCS_L3, + [8] = BDW_MOCS_PTE, + [9] = SKL_MOCS_PTE, + [10] = CNL_MOCS_PTE, + [11] = ICL_MOCS_PTE, +}; + +uint32_t +brw_get_bo_mocs(const struct gen_device_info *devinfo, struct brw_bo *bo) +{ + return (bo && bo->external ? pte_mocs : wb_mocs)[devinfo->gen]; +} + +static void +get_isl_surf(struct brw_context *brw, struct intel_mipmap_tree *mt, + GLenum target, struct isl_view *view, + uint32_t *tile_x, uint32_t *tile_y, + uint32_t *offset, struct isl_surf *surf) +{ + *surf = mt->surf; + + const struct gen_device_info *devinfo = &brw->screen->devinfo; + const enum isl_dim_layout dim_layout = + get_isl_dim_layout(devinfo, mt->surf.tiling, target); + + surf->dim = get_isl_surf_dim(target); + + if (surf->dim_layout == dim_layout) + return; + + /* The layout of the specified texture target is not compatible with the + * actual layout of the miptree structure in memory -- You're entering + * dangerous territory, this can only possibly work if you only intended + * to access a single level and slice of the texture, and the hardware + * supports the tile offset feature in order to allow non-tile-aligned + * base offsets, since we'll have to point the hardware to the first + * texel of the level instead of relying on the usual base level/layer + * controls. + */ + assert(devinfo->has_surface_tile_offset); + assert(view->levels == 1 && view->array_len == 1); + assert(*tile_x == 0 && *tile_y == 0); + + *offset += intel_miptree_get_tile_offsets(mt, view->base_level, + view->base_array_layer, + tile_x, tile_y); + + /* Minify the logical dimensions of the texture. */ + const unsigned l = view->base_level - mt->first_level; + surf->logical_level0_px.width = minify(surf->logical_level0_px.width, l); + surf->logical_level0_px.height = surf->dim <= ISL_SURF_DIM_1D ? 1 : + minify(surf->logical_level0_px.height, l); + surf->logical_level0_px.depth = surf->dim <= ISL_SURF_DIM_2D ? 1 : + minify(surf->logical_level0_px.depth, l); + + /* Only the base level and layer can be addressed with the overridden + * layout. + */ + surf->logical_level0_px.array_len = 1; + surf->levels = 1; + surf->dim_layout = dim_layout; + + /* The requested slice of the texture is now at the base level and + * layer. + */ + view->base_level = 0; + view->base_array_layer = 0; +} + +static void +brw_emit_surface_state(struct brw_context *brw, + struct intel_mipmap_tree *mt, + GLenum target, struct isl_view view, + enum isl_aux_usage aux_usage, + uint32_t *surf_offset, int surf_index, + unsigned reloc_flags) +{ + const struct gen_device_info *devinfo = &brw->screen->devinfo; + uint32_t tile_x = mt->level[0].level_x; + uint32_t tile_y = mt->level[0].level_y; + uint32_t offset = mt->offset; + + struct isl_surf surf; + + get_isl_surf(brw, mt, target, &view, &tile_x, &tile_y, &offset, &surf); + + union isl_color_value clear_color = { .u32 = { 0, 0, 0, 0 } }; + + struct brw_bo *aux_bo = NULL; + struct isl_surf *aux_surf = NULL; + uint64_t aux_offset = 0; + struct brw_bo *clear_bo = NULL; + uint32_t clear_offset = 0; + + if (aux_usage != ISL_AUX_USAGE_NONE) { + aux_surf = &mt->aux_buf->surf; + aux_bo = mt->aux_buf->bo; + aux_offset = mt->aux_buf->offset; + + /* We only really need a clear color if we also have an auxiliary + * surface. Without one, it does nothing. + */ + clear_color = + intel_miptree_get_clear_color(devinfo, mt, view.format, + view.usage & ISL_SURF_USAGE_TEXTURE_BIT, + &clear_bo, &clear_offset); + } + + void *state = brw_state_batch(brw, + brw->isl_dev.ss.size, + brw->isl_dev.ss.align, + surf_offset); + + isl_surf_fill_state(&brw->isl_dev, state, .surf = &surf, .view = &view, + .address = brw_state_reloc(&brw->batch, + *surf_offset + brw->isl_dev.ss.addr_offset, + mt->bo, offset, reloc_flags), + .aux_surf = aux_surf, .aux_usage = aux_usage, + .aux_address = aux_offset, + .mocs = brw_get_bo_mocs(devinfo, mt->bo), + .clear_color = clear_color, + .use_clear_address = clear_bo != NULL, + .clear_address = clear_offset, + .x_offset_sa = tile_x, .y_offset_sa = tile_y); + if (aux_surf) { + /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the + * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits + * contain other control information. Since buffer addresses are always + * on 4k boundaries (and thus have their lower 12 bits zero), we can use + * an ordinary reloc to do the necessary address translation. + * + * FIXME: move to the point of assignment. + */ + assert((aux_offset & 0xfff) == 0); + + if (devinfo->gen >= 8) { + uint64_t *aux_addr = state + brw->isl_dev.ss.aux_addr_offset; + *aux_addr = brw_state_reloc(&brw->batch, + *surf_offset + + brw->isl_dev.ss.aux_addr_offset, + aux_bo, *aux_addr, + reloc_flags); + } else { + uint32_t *aux_addr = state + brw->isl_dev.ss.aux_addr_offset; + *aux_addr = brw_state_reloc(&brw->batch, + *surf_offset + + brw->isl_dev.ss.aux_addr_offset, + aux_bo, *aux_addr, + reloc_flags); + + } + } + + if (clear_bo != NULL) { + /* Make sure the offset is aligned with a cacheline. */ + assert((clear_offset & 0x3f) == 0); + uint32_t *clear_address = + state + brw->isl_dev.ss.clear_color_state_offset; + *clear_address = brw_state_reloc(&brw->batch, + *surf_offset + + brw->isl_dev.ss.clear_color_state_offset, + clear_bo, *clear_address, reloc_flags); + } +} + +static uint32_t +gen6_update_renderbuffer_surface(struct brw_context *brw, + struct gl_renderbuffer *rb, + unsigned unit, + uint32_t surf_index) +{ + struct gl_context *ctx = &brw->ctx; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + struct intel_mipmap_tree *mt = irb->mt; + + assert(brw_render_target_supported(brw, rb)); + + mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb)); + if (unlikely(!brw->mesa_format_supports_render[rb_format])) { + _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n", + __func__, _mesa_get_format_name(rb_format)); + } + enum isl_format isl_format = brw->mesa_to_isl_render_format[rb_format]; + + struct isl_view view = { + .format = isl_format, + .base_level = irb->mt_level - irb->mt->first_level, + .levels = 1, + .base_array_layer = irb->mt_layer, + .array_len = MAX2(irb->layer_count, 1), + .swizzle = ISL_SWIZZLE_IDENTITY, + .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT, + }; + + uint32_t offset; + brw_emit_surface_state(brw, mt, mt->target, view, + brw->draw_aux_usage[unit], + &offset, surf_index, + RELOC_WRITE); + return offset; +} + GLuint translate_tex_target(GLenum target) { @@ -80,12 +297,12 @@ translate_tex_target(GLenum target) } uint32_t -brw_get_surface_tiling_bits(uint32_t tiling) +brw_get_surface_tiling_bits(enum isl_tiling tiling) { switch (tiling) { - case I915_TILING_X: + case ISL_TILING_X: return BRW_SURFACE_TILED; - case I915_TILING_Y: + case ISL_TILING_Y0: return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y; default: return 0; @@ -102,31 +319,6 @@ brw_get_surface_num_multisamples(unsigned num_samples) return BRW_SURFACE_MULTISAMPLECOUNT_1; } -void -brw_configure_w_tiled(const struct intel_mipmap_tree *mt, - bool is_render_target, - unsigned *width, unsigned *height, - unsigned *pitch, uint32_t *tiling, unsigned *format) -{ - static const unsigned halign_stencil = 8; - - /* In Y-tiling row is twice as wide as in W-tiling, and subsequently - * there are half as many rows. - * In addition, mip-levels are accessed manually by the program and - * therefore the surface is setup to cover all the mip-levels for one slice. - * (Hardware is still used to access individual slices). - */ - *tiling = I915_TILING_Y; - *pitch = mt->pitch * 2; - *width = ALIGN(mt->total_width, halign_stencil) * 2; - *height = (mt->total_height / mt->physical_depth0) / 2; - - if (is_render_target) { - *format = BRW_SURFACEFORMAT_R8_UINT; - } -} - - /** * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle * swizzling. @@ -230,7 +422,9 @@ brw_get_texture_swizzle(const struct gl_context *ctx, case GL_RED: case GL_RG: case GL_RGB: - if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0) + if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0 || + img->TexFormat == MESA_FORMAT_RGB_DXT1 || + img->TexFormat == MESA_FORMAT_SRGB_DXT1) swizzles[3] = SWIZZLE_ONE; break; } @@ -241,197 +435,271 @@ brw_get_texture_swizzle(const struct gl_context *ctx, swizzles[GET_SWZ(t->_Swizzle, 3)]); } -static void -gen4_emit_buffer_surface_state(struct brw_context *brw, - uint32_t *out_offset, - drm_intel_bo *bo, - unsigned buffer_offset, - unsigned surface_format, - unsigned buffer_size, - unsigned pitch, - bool rw) +/** + * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+ + * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED). The mappings are + * + * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE + * 0 1 2 3 4 5 + * 4 5 6 7 0 1 + * SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, SCS_ZERO, SCS_ONE + * + * which is simply adding 4 then modding by 8 (or anding with 7). + * + * We then may need to apply workarounds for textureGather hardware bugs. + */ +static unsigned +swizzle_to_scs(GLenum swizzle, bool need_green_to_blue) { - uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, - 6 * 4, 32, out_offset); - memset(surf, 0, 6 * 4); + unsigned scs = (swizzle + 4) & 7; - surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT | - surface_format << BRW_SURFACE_FORMAT_SHIFT | - (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0); - surf[1] = (bo ? bo->offset64 : 0) + buffer_offset; /* reloc */ - surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT | - ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT; - surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT | - (pitch - 1) << BRW_SURFACE_PITCH_SHIFT; - - /* Emit relocation to surface contents. The 965 PRM, Volume 4, section - * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate - * physical cache. It is mapped in hardware to the sampler cache." - */ - if (bo) { - drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4, - bo, buffer_offset, - I915_GEM_DOMAIN_SAMPLER, - (rw ? I915_GEM_DOMAIN_SAMPLER : 0)); - } + return (need_green_to_blue && scs == HSW_SCS_GREEN) ? HSW_SCS_BLUE : scs; } -void -brw_update_buffer_texture_surface(struct gl_context *ctx, - unsigned unit, - uint32_t *surf_offset) -{ - struct brw_context *brw = brw_context(ctx); - struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current; - struct intel_buffer_object *intel_obj = - intel_buffer_object(tObj->BufferObject); - uint32_t size = tObj->BufferSize; - drm_intel_bo *bo = NULL; - mesa_format format = tObj->_BufferObjectFormat; - uint32_t brw_format = brw_format_for_mesa_format(format); - int texel_size = _mesa_get_format_bytes(format); - - if (intel_obj) { - size = MIN2(size, intel_obj->Base.Size); - bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size); - } - - if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) { - _mesa_problem(NULL, "bad format %s for texture buffer\n", - _mesa_get_format_name(format)); - } - - brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo, - tObj->BufferOffset, - brw_format, - size / texel_size, - texel_size, - false /* rw */); -} - -static void -brw_update_texture_surface(struct gl_context *ctx, +static void brw_update_texture_surface(struct gl_context *ctx, unsigned unit, uint32_t *surf_offset, - bool for_gather) + bool for_gather, + bool for_txf, + uint32_t plane) { struct brw_context *brw = brw_context(ctx); - struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current; - struct intel_texture_object *intelObj = intel_texture_object(tObj); - struct intel_mipmap_tree *mt = intelObj->mt; - struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit); - uint32_t *surf; + const struct gen_device_info *devinfo = &brw->screen->devinfo; + struct gl_texture_object *obj = ctx->Texture.Unit[unit]._Current; - /* BRW_NEW_TEXTURE_BUFFER */ - if (tObj->Target == GL_TEXTURE_BUFFER) { + if (obj->Target == GL_TEXTURE_BUFFER) { brw_update_buffer_texture_surface(ctx, unit, surf_offset); - return; - } - surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, - 6 * 4, 32, surf_offset); + } else { + struct intel_texture_object *intel_obj = intel_texture_object(obj); + struct intel_mipmap_tree *mt = intel_obj->mt; - uint32_t tex_format = translate_tex_format(brw, mt->format, - sampler->sRGBDecode); + if (plane > 0) { + if (mt->plane[plane - 1] == NULL) + return; + mt = mt->plane[plane - 1]; + } - if (for_gather) { - /* Sandybridge's gather4 message is broken for integer formats. - * To work around this, we pretend the surface is UNORM for - * 8 or 16-bit formats, and emit shader instructions to recover - * the real INT/UINT value. For 32-bit formats, we pretend - * the surface is FLOAT, and simply reinterpret the resulting - * bits. + struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit); + /* If this is a view with restricted NumLayers, then our effective depth + * is not just the miptree depth. */ - switch (tex_format) { - case BRW_SURFACEFORMAT_R8_SINT: - case BRW_SURFACEFORMAT_R8_UINT: - tex_format = BRW_SURFACEFORMAT_R8_UNORM; - break; - - case BRW_SURFACEFORMAT_R16_SINT: - case BRW_SURFACEFORMAT_R16_UINT: - tex_format = BRW_SURFACEFORMAT_R16_UNORM; - break; - - case BRW_SURFACEFORMAT_R32_SINT: - case BRW_SURFACEFORMAT_R32_UINT: - tex_format = BRW_SURFACEFORMAT_R32_FLOAT; - break; - - default: - break; + unsigned view_num_layers; + if (obj->Immutable && obj->Target != GL_TEXTURE_3D) { + view_num_layers = obj->NumLayers; + } else { + view_num_layers = mt->surf.dim == ISL_SURF_DIM_3D ? + mt->surf.logical_level0_px.depth : + mt->surf.logical_level0_px.array_len; } - } - surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT | - BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT | - BRW_SURFACE_CUBEFACE_ENABLES | - tex_format << BRW_SURFACE_FORMAT_SHIFT); - - surf[1] = mt->bo->offset64 + mt->offset; /* reloc */ + /* Handling GL_ALPHA as a surface format override breaks 1.30+ style + * texturing functions that return a float, as our code generation always + * selects the .x channel (which would always be 0). + */ + struct gl_texture_image *firstImage = obj->Image[0][obj->BaseLevel]; + const bool alpha_depth = obj->DepthMode == GL_ALPHA && + (firstImage->_BaseFormat == GL_DEPTH_COMPONENT || + firstImage->_BaseFormat == GL_DEPTH_STENCIL); + const unsigned swizzle = (unlikely(alpha_depth) ? SWIZZLE_XYZW : + brw_get_texture_swizzle(&brw->ctx, obj)); + + mesa_format mesa_fmt; + if (firstImage->_BaseFormat == GL_DEPTH_STENCIL || + firstImage->_BaseFormat == GL_DEPTH_COMPONENT) { + /* The format from intel_obj may be a combined depth stencil format + * when we just want depth. Pull it from the miptree instead. This + * is safe because texture views aren't allowed on depth/stencil. + */ + mesa_fmt = mt->format; + } else if (mt->etc_format != MESA_FORMAT_NONE) { + mesa_fmt = mt->format; + } else if (plane > 0) { + mesa_fmt = mt->format; + } else { + mesa_fmt = intel_obj->_Format; + } + enum isl_format format = translate_tex_format(brw, mesa_fmt, + for_txf ? GL_DECODE_EXT : + sampler->sRGBDecode); + + /* Implement gen6 and gen7 gather work-around */ + bool need_green_to_blue = false; + if (for_gather) { + if (devinfo->gen == 7 && (format == ISL_FORMAT_R32G32_FLOAT || + format == ISL_FORMAT_R32G32_SINT || + format == ISL_FORMAT_R32G32_UINT)) { + format = ISL_FORMAT_R32G32_FLOAT_LD; + need_green_to_blue = devinfo->is_haswell; + } else if (devinfo->gen == 6) { + /* Sandybridge's gather4 message is broken for integer formats. + * To work around this, we pretend the surface is UNORM for + * 8 or 16-bit formats, and emit shader instructions to recover + * the real INT/UINT value. For 32-bit formats, we pretend + * the surface is FLOAT, and simply reinterpret the resulting + * bits. + */ + switch (format) { + case ISL_FORMAT_R8_SINT: + case ISL_FORMAT_R8_UINT: + format = ISL_FORMAT_R8_UNORM; + break; + + case ISL_FORMAT_R16_SINT: + case ISL_FORMAT_R16_UINT: + format = ISL_FORMAT_R16_UNORM; + break; + + case ISL_FORMAT_R32_SINT: + case ISL_FORMAT_R32_UINT: + format = ISL_FORMAT_R32_FLOAT; + break; + + default: + break; + } + } + } - surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT | - (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT | - (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT); + if (obj->StencilSampling && firstImage->_BaseFormat == GL_DEPTH_STENCIL) { + if (devinfo->gen <= 7) { + assert(mt->r8stencil_mt && !mt->stencil_mt->r8stencil_needs_update); + mt = mt->r8stencil_mt; + } else { + mt = mt->stencil_mt; + } + format = ISL_FORMAT_R8_UINT; + } else if (devinfo->gen <= 7 && mt->format == MESA_FORMAT_S_UINT8) { + assert(mt->r8stencil_mt && !mt->r8stencil_needs_update); + mt = mt->r8stencil_mt; + format = ISL_FORMAT_R8_UINT; + } - surf[3] = (brw_get_surface_tiling_bits(mt->tiling) | - (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT | - (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT); + const int surf_index = surf_offset - &brw->wm.base.surf_offset[0]; + + struct isl_view view = { + .format = format, + .base_level = obj->MinLevel + obj->BaseLevel, + .levels = intel_obj->_MaxLevel - obj->BaseLevel + 1, + .base_array_layer = obj->MinLayer, + .array_len = view_num_layers, + .swizzle = { + .r = swizzle_to_scs(GET_SWZ(swizzle, 0), need_green_to_blue), + .g = swizzle_to_scs(GET_SWZ(swizzle, 1), need_green_to_blue), + .b = swizzle_to_scs(GET_SWZ(swizzle, 2), need_green_to_blue), + .a = swizzle_to_scs(GET_SWZ(swizzle, 3), need_green_to_blue), + }, + .usage = ISL_SURF_USAGE_TEXTURE_BIT, + }; + + /* On Ivy Bridge and earlier, we handle texture swizzle with shader + * code. The actual surface swizzle should be identity. + */ + if (devinfo->gen <= 7 && !devinfo->is_haswell) + view.swizzle = ISL_SWIZZLE_IDENTITY; - surf[4] = (brw_get_surface_num_multisamples(mt->num_samples) | - SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD)); + if (obj->Target == GL_TEXTURE_CUBE_MAP || + obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY) + view.usage |= ISL_SURF_USAGE_CUBE_BIT; - surf[5] = mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0; + enum isl_aux_usage aux_usage = + intel_miptree_texture_aux_usage(brw, mt, format); - /* Emit relocation to surface contents */ - drm_intel_bo_emit_reloc(brw->batch.bo, - *surf_offset + 4, - mt->bo, - surf[1] - mt->bo->offset64, - I915_GEM_DOMAIN_SAMPLER, 0); + brw_emit_surface_state(brw, mt, mt->target, view, aux_usage, + surf_offset, surf_index, + 0); + } } -/** - * Create the constant buffer surface. Vertex/fragment shader constants will be - * read from this buffer with Data Port Read instructions/messages. - */ void -brw_create_constant_surface(struct brw_context *brw, - drm_intel_bo *bo, - uint32_t offset, - uint32_t size, - uint32_t *out_offset, - bool dword_pitch) +brw_emit_buffer_surface_state(struct brw_context *brw, + uint32_t *out_offset, + struct brw_bo *bo, + unsigned buffer_offset, + unsigned surface_format, + unsigned buffer_size, + unsigned pitch, + unsigned reloc_flags) { - uint32_t stride = dword_pitch ? 4 : 16; - uint32_t elements = ALIGN(size, stride) / stride; + const struct gen_device_info *devinfo = &brw->screen->devinfo; + uint32_t *dw = brw_state_batch(brw, + brw->isl_dev.ss.size, + brw->isl_dev.ss.align, + out_offset); + + isl_buffer_fill_state(&brw->isl_dev, dw, + .address = !bo ? buffer_offset : + brw_state_reloc(&brw->batch, + *out_offset + brw->isl_dev.ss.addr_offset, + bo, buffer_offset, + reloc_flags), + .size = buffer_size, + .format = surface_format, + .stride = pitch, + .mocs = brw_get_bo_mocs(devinfo, bo)); +} + +static unsigned +buffer_texture_range_size(struct brw_context *brw, + struct gl_texture_object *obj) +{ + assert(obj->Target == GL_TEXTURE_BUFFER); + const unsigned texel_size = _mesa_get_format_bytes(obj->_BufferObjectFormat); + const unsigned buffer_size = (!obj->BufferObject ? 0 : + obj->BufferObject->Size); + const unsigned buffer_offset = MIN2(buffer_size, obj->BufferOffset); - brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset, - BRW_SURFACEFORMAT_R32G32B32A32_FLOAT, - elements, stride, false); + /* The ARB_texture_buffer_specification says: + * + * "The number of texels in the buffer texture's texel array is given by + * + * floor( / ( * sizeof()), + * + * where is the size of the buffer object, in basic + * machine units and and are the element count + * and base data type for elements, as specified in Table X.1. The + * number of texels in the texel array is then clamped to the + * implementation-dependent limit MAX_TEXTURE_BUFFER_SIZE_ARB." + * + * We need to clamp the size in bytes to MAX_TEXTURE_BUFFER_SIZE * stride, + * so that when ISL divides by stride to obtain the number of texels, that + * texel count is clamped to MAX_TEXTURE_BUFFER_SIZE. + */ + return MIN3((unsigned)obj->BufferSize, + buffer_size - buffer_offset, + brw->ctx.Const.MaxTextureBufferSize * texel_size); } -/** - * Create the buffer surface. Shader buffer variables will be - * read from / write to this buffer with Data Port Read/Write - * instructions/messages. - */ void -brw_create_buffer_surface(struct brw_context *brw, - drm_intel_bo *bo, - uint32_t offset, - uint32_t size, - uint32_t *out_offset, - bool dword_pitch) +brw_update_buffer_texture_surface(struct gl_context *ctx, + unsigned unit, + uint32_t *surf_offset) { - /* Use a raw surface so we can reuse existing untyped read/write/atomic - * messages. We need these specifically for the fragment shader since they - * include a pixel mask header that we need to ensure correct behavior - * with helper invocations, which cannot write to the buffer. - */ - brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset, - BRW_SURFACEFORMAT_RAW, - size, 1, true); + struct brw_context *brw = brw_context(ctx); + struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current; + struct intel_buffer_object *intel_obj = + intel_buffer_object(tObj->BufferObject); + const unsigned size = buffer_texture_range_size(brw, tObj); + struct brw_bo *bo = NULL; + mesa_format format = tObj->_BufferObjectFormat; + const enum isl_format isl_format = brw_isl_format_for_mesa_format(format); + int texel_size = _mesa_get_format_bytes(format); + + if (intel_obj) + bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size, + false); + + if (isl_format == ISL_FORMAT_UNSUPPORTED) { + _mesa_problem(NULL, "bad format %s for texture buffer\n", + _mesa_get_format_name(format)); + } + + brw_emit_buffer_surface_state(brw, surf_offset, bo, + tObj->BufferOffset, + isl_format, + size, + texel_size, + 0); } /** @@ -448,11 +716,11 @@ brw_update_sol_surface(struct brw_context *brw, { struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj); uint32_t offset_bytes = 4 * offset_dwords; - drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo, + struct brw_bo *bo = intel_bufferobj_buffer(brw, intel_bo, offset_bytes, - buffer_obj->Size - offset_bytes); - uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, - out_offset); + buffer_obj->Size - offset_bytes, + true); + uint32_t *surf = brw_state_batch(brw, 6 * 4, 32, out_offset); uint32_t pitch_minus_1 = 4*stride_dwords - 1; size_t size_dwords = buffer_obj->Size / 4; uint32_t buffer_size_minus_1, width, height, depth, surface_format; @@ -485,16 +753,16 @@ brw_update_sol_surface(struct brw_context *brw, switch (num_vector_components) { case 1: - surface_format = BRW_SURFACEFORMAT_R32_FLOAT; + surface_format = ISL_FORMAT_R32_FLOAT; break; case 2: - surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT; + surface_format = ISL_FORMAT_R32G32_FLOAT; break; case 3: - surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT; + surface_format = ISL_FORMAT_R32G32B32_FLOAT; break; case 4: - surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; + surface_format = ISL_FORMAT_R32G32B32A32_FLOAT; break; default: unreachable("Invalid vector size for transform feedback output"); @@ -504,19 +772,14 @@ brw_update_sol_surface(struct brw_context *brw, BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT | surface_format << BRW_SURFACE_FORMAT_SHIFT | BRW_SURFACE_RC_READ_WRITE; - surf[1] = bo->offset64 + offset_bytes; /* reloc */ + surf[1] = brw_state_reloc(&brw->batch, + *out_offset + 4, bo, offset_bytes, RELOC_WRITE); surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT | height << BRW_SURFACE_HEIGHT_SHIFT); surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT | pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT); surf[4] = 0; surf[5] = 0; - - /* Emit relocation to surface contents. */ - drm_intel_bo_emit_reloc(brw->batch.bo, - *out_offset + 4, - bo, offset_bytes, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); } /* Creates a new WM constant buffer reflecting the current fragment program's @@ -530,14 +793,16 @@ brw_upload_wm_pull_constants(struct brw_context *brw) { struct brw_stage_state *stage_state = &brw->wm.base; /* BRW_NEW_FRAGMENT_PROGRAM */ - struct brw_fragment_program *fp = - (struct brw_fragment_program *) brw->fragment_program; + struct brw_program *fp = + (struct brw_program *) brw->programs[MESA_SHADER_FRAGMENT]; + /* BRW_NEW_FS_PROG_DATA */ - struct brw_stage_prog_data *prog_data = &brw->wm.prog_data->base; + struct brw_stage_prog_data *prog_data = brw->wm.base.prog_data; + _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_FRAGMENT); /* _NEW_PROGRAM_CONSTANTS */ - brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program.Base, - stage_state, prog_data, true); + brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program, + stage_state, prog_data); } const struct brw_tracked_state brw_wm_pull_constants = { @@ -559,72 +824,53 @@ const struct brw_tracked_state brw_wm_pull_constants = { * hardware discard the target 0 color output.. */ static void -brw_emit_null_surface_state(struct brw_context *brw, - unsigned width, - unsigned height, - unsigned samples, - uint32_t *out_offset) +emit_null_surface_state(struct brw_context *brw, + const struct gl_framebuffer *fb, + uint32_t *out_offset) { - /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming - * Notes): - * - * A null surface will be used in instances where an actual surface is - * not bound. When a write message is generated to a null surface, no - * actual surface is written to. When a read message (including any - * sampling engine message) is generated to a null surface, the result - * is all zeros. Note that a null surface type is allowed to be used - * with all messages, even if it is not specificially indicated as - * supported. All of the remaining fields in surface state are ignored - * for null surfaces, with the following exceptions: + const struct gen_device_info *devinfo = &brw->screen->devinfo; + uint32_t *surf = brw_state_batch(brw, + brw->isl_dev.ss.size, + brw->isl_dev.ss.align, + out_offset); + + /* Use the fb dimensions or 1x1x1 */ + const unsigned width = fb ? _mesa_geometric_width(fb) : 1; + const unsigned height = fb ? _mesa_geometric_height(fb) : 1; + const unsigned samples = fb ? _mesa_geometric_samples(fb) : 1; + + if (devinfo->gen != 6 || samples <= 1) { + isl_null_fill_state(&brw->isl_dev, surf, + isl_extent3d(width, height, 1)); + return; + } + + /* On Gen6, null render targets seem to cause GPU hangs when multisampling. + * So work around this problem by rendering into dummy color buffer. * - * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the - * depth buffer’s corresponding state for all render target surfaces, - * including null. + * To decrease the amount of memory needed by the workaround buffer, we + * set its pitch to 128 bytes (the width of a Y tile). This means that + * the amount of memory needed for the workaround buffer is + * (width_in_tiles + height_in_tiles - 1) tiles. * - * - Surface Format must be R8G8B8A8_UNORM. + * Note that since the workaround buffer will be interpreted by the + * hardware as an interleaved multisampled buffer, we need to compute + * width_in_tiles and height_in_tiles by dividing the width and height + * by 16 rather than the normal Y-tile size of 32. */ - unsigned surface_type = BRW_SURFACE_NULL; - drm_intel_bo *bo = NULL; - unsigned pitch_minus_1 = 0; - uint32_t multisampling_state = 0; - uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, - out_offset); + unsigned width_in_tiles = ALIGN(width, 16) / 16; + unsigned height_in_tiles = ALIGN(height, 16) / 16; + unsigned pitch_minus_1 = 127; + unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096; + brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo, + size_needed); - if (samples > 1) { - /* On Gen6, null render targets seem to cause GPU hangs when - * multisampling. So work around this problem by rendering into dummy - * color buffer. - * - * To decrease the amount of memory needed by the workaround buffer, we - * set its pitch to 128 bytes (the width of a Y tile). This means that - * the amount of memory needed for the workaround buffer is - * (width_in_tiles + height_in_tiles - 1) tiles. - * - * Note that since the workaround buffer will be interpreted by the - * hardware as an interleaved multisampled buffer, we need to compute - * width_in_tiles and height_in_tiles by dividing the width and height - * by 16 rather than the normal Y-tile size of 32. - */ - unsigned width_in_tiles = ALIGN(width, 16) / 16; - unsigned height_in_tiles = ALIGN(height, 16) / 16; - unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096; - brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo, - size_needed); - bo = brw->wm.multisampled_null_render_target_bo; - surface_type = BRW_SURFACE_2D; - pitch_minus_1 = 127; - multisampling_state = brw_get_surface_num_multisamples(samples); - } + surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT | + ISL_FORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT); + surf[1] = brw_state_reloc(&brw->batch, *out_offset + 4, + brw->wm.multisampled_null_render_target_bo, + 0, RELOC_WRITE); - surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT | - BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT); - if (brw->gen < 6) { - surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT | - 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT | - 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT | - 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT); - } - surf[1] = bo ? bo->offset64 : 0; surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT | (height - 1) << BRW_SURFACE_HEIGHT_SHIFT); @@ -635,15 +881,8 @@ brw_emit_null_surface_state(struct brw_context *brw, */ surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y | pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT); - surf[4] = multisampling_state; + surf[4] = BRW_SURFACE_MULTISAMPLECOUNT_4; surf[5] = 0; - - if (bo) { - drm_intel_bo_emit_reloc(brw->batch.bo, - *out_offset + 4, - bo, 0, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); - } } /** @@ -652,25 +891,24 @@ brw_emit_null_surface_state(struct brw_context *brw, * usable for further buffers when doing ARB_draw_buffer support. */ static uint32_t -brw_update_renderbuffer_surface(struct brw_context *brw, - struct gl_renderbuffer *rb, - bool layered, unsigned unit, - uint32_t surf_index) +gen4_update_renderbuffer_surface(struct brw_context *brw, + struct gl_renderbuffer *rb, + unsigned unit, + uint32_t surf_index) { + const struct gen_device_info *devinfo = &brw->screen->devinfo; struct gl_context *ctx = &brw->ctx; struct intel_renderbuffer *irb = intel_renderbuffer(rb); struct intel_mipmap_tree *mt = irb->mt; uint32_t *surf; uint32_t tile_x, tile_y; - uint32_t format = 0; + enum isl_format format; uint32_t offset; /* _NEW_BUFFERS */ mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb)); /* BRW_NEW_FS_PROG_DATA */ - assert(!layered); - - if (rb->TexImage && !brw->has_surface_tile_offset) { + if (rb->TexImage && !devinfo->has_surface_tile_offset) { intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y); if (tile_x != 0 || tile_y != 0) { @@ -681,16 +919,15 @@ brw_update_renderbuffer_surface(struct brw_context *brw, * miptree and render into that. */ intel_renderbuffer_move_to_temp(brw, irb, false); - mt = irb->mt; + assert(irb->align_wa_mt); + mt = irb->align_wa_mt; } } - intel_miptree_used_for_rendering(irb->mt); + surf = brw_state_batch(brw, 6 * 4, 32, &offset); - surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, &offset); - - format = brw->render_target_format[rb_format]; - if (unlikely(!brw->format_supported_as_render_target[rb_format])) { + format = brw->mesa_to_isl_render_format[rb_format]; + if (unlikely(!brw->mesa_format_supports_render[rb_format])) { _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n", __func__, _mesa_get_format_name(rb_format)); } @@ -700,18 +937,22 @@ brw_update_renderbuffer_surface(struct brw_context *brw, /* reloc */ assert(mt->offset % mt->cpp == 0); - surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) + - mt->bo->offset64 + mt->offset); + surf[1] = brw_state_reloc(&brw->batch, offset + 4, mt->bo, + mt->offset + + intel_renderbuffer_get_tile_offsets(irb, + &tile_x, + &tile_y), + RELOC_WRITE); surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT | (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT); - surf[3] = (brw_get_surface_tiling_bits(mt->tiling) | - (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT); + surf[3] = (brw_get_surface_tiling_bits(mt->surf.tiling) | + (mt->surf.row_pitch - 1) << BRW_SURFACE_PITCH_SHIFT); - surf[4] = brw_get_surface_num_multisamples(mt->num_samples); + surf[4] = brw_get_surface_num_multisamples(mt->surf.samples); - assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0)); + assert(devinfo->has_surface_tile_offset || (tile_x == 0 && tile_y == 0)); /* Note that the low bits of these fields are missing, so * there's the possibility of getting in trouble. */ @@ -719,87 +960,79 @@ brw_update_renderbuffer_surface(struct brw_context *brw, assert(tile_y % 2 == 0); surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT | (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT | - (mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0)); + (mt->surf.image_alignment_el.height == 4 ? + BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0)); - if (brw->gen < 6) { + if (devinfo->gen < 6) { /* _NEW_COLOR */ - if (!ctx->Color.ColorLogicOpEnabled && - (ctx->Color.BlendEnabled & (1 << unit))) + if (!ctx->Color.ColorLogicOpEnabled && !ctx->Color._AdvancedBlendMode && + (ctx->Color.BlendEnabled & (1 << unit))) surf[0] |= BRW_SURFACE_BLEND_ENABLED; - if (!ctx->Color.ColorMask[unit][0]) + if (!GET_COLORMASK_BIT(ctx->Color.ColorMask, unit, 0)) surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT; - if (!ctx->Color.ColorMask[unit][1]) + if (!GET_COLORMASK_BIT(ctx->Color.ColorMask, unit, 1)) surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT; - if (!ctx->Color.ColorMask[unit][2]) + if (!GET_COLORMASK_BIT(ctx->Color.ColorMask, unit, 2)) surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT; /* As mentioned above, disable writes to the alpha component when the * renderbuffer is XRGB. */ if (ctx->DrawBuffer->Visual.alphaBits == 0 || - !ctx->Color.ColorMask[unit][3]) { + !GET_COLORMASK_BIT(ctx->Color.ColorMask, unit, 3)) { surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT; } } - drm_intel_bo_emit_reloc(brw->batch.bo, - offset + 4, - mt->bo, - surf[1] - mt->bo->offset64, - I915_GEM_DOMAIN_RENDER, - I915_GEM_DOMAIN_RENDER); - return offset; } -/** - * Construct SURFACE_STATE objects for renderbuffers/draw buffers. - */ -void -brw_update_renderbuffer_surfaces(struct brw_context *brw, - const struct gl_framebuffer *fb, - uint32_t render_target_start, - uint32_t *surf_offset) +static void +update_renderbuffer_surfaces(struct brw_context *brw) { - GLuint i; - const unsigned int w = _mesa_geometric_width(fb); - const unsigned int h = _mesa_geometric_height(fb); - const unsigned int s = _mesa_geometric_samples(fb); + const struct gen_device_info *devinfo = &brw->screen->devinfo; + const struct gl_context *ctx = &brw->ctx; + + /* _NEW_BUFFERS | _NEW_COLOR */ + const struct gl_framebuffer *fb = ctx->DrawBuffer; + + /* Render targets always start at binding table index 0. */ + const unsigned rt_start = 0; + + uint32_t *surf_offsets = brw->wm.base.surf_offset; /* Update surfaces for drawing buffers */ if (fb->_NumColorDrawBuffers >= 1) { - for (i = 0; i < fb->_NumColorDrawBuffers; i++) { - const uint32_t surf_index = render_target_start + i; - - if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) { - surf_offset[surf_index] = - brw->vtbl.update_renderbuffer_surface( - brw, fb->_ColorDrawBuffers[i], - _mesa_geometric_layers(fb) > 0, i, surf_index); + for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) { + struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i]; + + if (intel_renderbuffer(rb)) { + surf_offsets[rt_start + i] = devinfo->gen >= 6 ? + gen6_update_renderbuffer_surface(brw, rb, i, rt_start + i) : + gen4_update_renderbuffer_surface(brw, rb, i, rt_start + i); } else { - brw->vtbl.emit_null_surface_state(brw, w, h, s, - &surf_offset[surf_index]); + emit_null_surface_state(brw, fb, &surf_offsets[rt_start + i]); } } } else { - const uint32_t surf_index = render_target_start; - brw->vtbl.emit_null_surface_state(brw, w, h, s, - &surf_offset[surf_index]); + emit_null_surface_state(brw, fb, &surf_offsets[rt_start]); } -} -static void -update_renderbuffer_surfaces(struct brw_context *brw) -{ - const struct gl_context *ctx = &brw->ctx; + /* The PIPE_CONTROL command description says: + * + * "Whenever a Binding Table Index (BTI) used by a Render Taget Message + * points to a different RENDER_SURFACE_STATE, SW must issue a Render + * Target Cache Flush by enabling this bit. When render target flush + * is set due to new association of BTI, PS Scoreboard Stall bit must + * be set in this packet." + */ + if (devinfo->gen >= 11) { + brw_emit_pipe_control_flush(brw, + PIPE_CONTROL_RENDER_TARGET_FLUSH | + PIPE_CONTROL_STALL_AT_SCOREBOARD); + } - /* _NEW_BUFFERS | _NEW_COLOR */ - const struct gl_framebuffer *fb = ctx->DrawBuffer; - brw_update_renderbuffer_surfaces( - brw, fb, - brw->wm.prog_data->binding_table.render_target_start, - brw->wm.base.surf_offset); brw->ctx.NewDriverState |= BRW_NEW_SURFACES; } @@ -807,8 +1040,7 @@ const struct brw_tracked_state brw_renderbuffer_surfaces = { .dirty = { .mesa = _NEW_BUFFERS | _NEW_COLOR, - .brw = BRW_NEW_BATCH | - BRW_NEW_FS_PROG_DATA, + .brw = BRW_NEW_BATCH, }, .emit = update_renderbuffer_surfaces, }; @@ -816,17 +1048,105 @@ const struct brw_tracked_state brw_renderbuffer_surfaces = { const struct brw_tracked_state gen6_renderbuffer_surfaces = { .dirty = { .mesa = _NEW_BUFFERS, - .brw = BRW_NEW_BATCH, + .brw = BRW_NEW_BATCH | + BRW_NEW_AUX_STATE, }, .emit = update_renderbuffer_surfaces, }; +static void +update_renderbuffer_read_surfaces(struct brw_context *brw) +{ + const struct gl_context *ctx = &brw->ctx; + + /* BRW_NEW_FS_PROG_DATA */ + const struct brw_wm_prog_data *wm_prog_data = + brw_wm_prog_data(brw->wm.base.prog_data); + + if (wm_prog_data->has_render_target_reads && + !ctx->Extensions.EXT_shader_framebuffer_fetch) { + /* _NEW_BUFFERS */ + const struct gl_framebuffer *fb = ctx->DrawBuffer; + + for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) { + struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i]; + const struct intel_renderbuffer *irb = intel_renderbuffer(rb); + const unsigned surf_index = + wm_prog_data->binding_table.render_target_read_start + i; + uint32_t *surf_offset = &brw->wm.base.surf_offset[surf_index]; + + if (irb) { + const enum isl_format format = brw->mesa_to_isl_render_format[ + _mesa_get_render_format(ctx, intel_rb_format(irb))]; + assert(isl_format_supports_sampling(&brw->screen->devinfo, + format)); + + /* Override the target of the texture if the render buffer is a + * single slice of a 3D texture (since the minimum array element + * field of the surface state structure is ignored by the sampler + * unit for 3D textures on some hardware), or if the render buffer + * is a 1D array (since shaders always provide the array index + * coordinate at the Z component to avoid state-dependent + * recompiles when changing the texture target of the + * framebuffer). + */ + const GLenum target = + (irb->mt->target == GL_TEXTURE_3D && + irb->layer_count == 1) ? GL_TEXTURE_2D : + irb->mt->target == GL_TEXTURE_1D_ARRAY ? GL_TEXTURE_2D_ARRAY : + irb->mt->target; + + const struct isl_view view = { + .format = format, + .base_level = irb->mt_level - irb->mt->first_level, + .levels = 1, + .base_array_layer = irb->mt_layer, + .array_len = irb->layer_count, + .swizzle = ISL_SWIZZLE_IDENTITY, + .usage = ISL_SURF_USAGE_TEXTURE_BIT, + }; + + enum isl_aux_usage aux_usage = + intel_miptree_texture_aux_usage(brw, irb->mt, format); + if (brw->draw_aux_usage[i] == ISL_AUX_USAGE_NONE) + aux_usage = ISL_AUX_USAGE_NONE; + + brw_emit_surface_state(brw, irb->mt, target, view, aux_usage, + surf_offset, surf_index, + 0); + + } else { + emit_null_surface_state(brw, fb, surf_offset); + } + } + + brw->ctx.NewDriverState |= BRW_NEW_SURFACES; + } +} + +const struct brw_tracked_state brw_renderbuffer_read_surfaces = { + .dirty = { + .mesa = _NEW_BUFFERS, + .brw = BRW_NEW_BATCH | + BRW_NEW_AUX_STATE | + BRW_NEW_FS_PROG_DATA, + }, + .emit = update_renderbuffer_read_surfaces, +}; + +static bool +is_depth_texture(struct intel_texture_object *iobj) +{ + GLenum base_format = _mesa_get_format_base_format(iobj->_Format); + return base_format == GL_DEPTH_COMPONENT || + (base_format == GL_DEPTH_STENCIL && !iobj->base.StencilSampling); +} static void update_stage_texture_surfaces(struct brw_context *brw, const struct gl_program *prog, struct brw_stage_state *stage_state, - bool for_gather) + bool for_gather, uint32_t plane) { if (!prog) return; @@ -839,18 +1159,43 @@ update_stage_texture_surfaces(struct brw_context *brw, if (for_gather) surf_offset += stage_state->prog_data->binding_table.gather_texture_start; else - surf_offset += stage_state->prog_data->binding_table.texture_start; + surf_offset += stage_state->prog_data->binding_table.plane_start[plane]; - unsigned num_samplers = _mesa_fls(prog->SamplersUsed); + unsigned num_samplers = util_last_bit(prog->SamplersUsed); for (unsigned s = 0; s < num_samplers; s++) { surf_offset[s] = 0; if (prog->SamplersUsed & (1 << s)) { const unsigned unit = prog->SamplerUnits[s]; + const bool used_by_txf = prog->info.textures_used_by_txf & (1 << s); + struct gl_texture_object *obj = ctx->Texture.Unit[unit]._Current; + struct intel_texture_object *iobj = intel_texture_object(obj); /* _NEW_TEXTURE */ - if (ctx->Texture.Unit[unit]._Current) { - brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather); + if (!obj) + continue; + + if ((prog->ShadowSamplers & (1 << s)) && !is_depth_texture(iobj)) { + /* A programming note for the sample_c message says: + * + * "The Surface Format of the associated surface must be + * indicated as supporting shadow mapping as indicated in the + * surface format table." + * + * Accessing non-depth textures via a sampler*Shadow type is + * undefined. GLSL 4.50 page 162 says: + * + * "If a shadow texture call is made to a sampler that does not + * represent a depth texture, then results are undefined." + * + * We give them a null surface (zeros) for undefined. We've seen + * GPU hangs with color buffers and sample_c, so we try and avoid + * those with this hack. + */ + emit_null_surface_state(brw, NULL, surf_offset + s); + } else { + brw_update_texture_surface(ctx, unit, surf_offset + s, for_gather, + used_by_txf, plane); } } } @@ -863,36 +1208,47 @@ update_stage_texture_surfaces(struct brw_context *brw, static void brw_update_texture_surfaces(struct brw_context *brw) { + const struct gen_device_info *devinfo = &brw->screen->devinfo; + /* BRW_NEW_VERTEX_PROGRAM */ - struct gl_program *vs = (struct gl_program *) brw->vertex_program; + struct gl_program *vs = brw->programs[MESA_SHADER_VERTEX]; + + /* BRW_NEW_TESS_PROGRAMS */ + struct gl_program *tcs = brw->programs[MESA_SHADER_TESS_CTRL]; + struct gl_program *tes = brw->programs[MESA_SHADER_TESS_EVAL]; /* BRW_NEW_GEOMETRY_PROGRAM */ - struct gl_program *gs = (struct gl_program *) brw->geometry_program; + struct gl_program *gs = brw->programs[MESA_SHADER_GEOMETRY]; /* BRW_NEW_FRAGMENT_PROGRAM */ - struct gl_program *fs = (struct gl_program *) brw->fragment_program; - - /* BRW_NEW_COMPUTE_PROGRAM */ - struct gl_program *cs = (struct gl_program *) brw->compute_program; + struct gl_program *fs = brw->programs[MESA_SHADER_FRAGMENT]; /* _NEW_TEXTURE */ - update_stage_texture_surfaces(brw, vs, &brw->vs.base, false); - update_stage_texture_surfaces(brw, gs, &brw->gs.base, false); - update_stage_texture_surfaces(brw, fs, &brw->wm.base, false); - update_stage_texture_surfaces(brw, cs, &brw->cs.base, false); + update_stage_texture_surfaces(brw, vs, &brw->vs.base, false, 0); + update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false, 0); + update_stage_texture_surfaces(brw, tes, &brw->tes.base, false, 0); + update_stage_texture_surfaces(brw, gs, &brw->gs.base, false, 0); + update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 0); /* emit alternate set of surface state for gather. this * allows the surface format to be overriden for only the * gather4 messages. */ - if (brw->gen < 8) { - if (vs && vs->UsesGather) - update_stage_texture_surfaces(brw, vs, &brw->vs.base, true); - if (gs && gs->UsesGather) - update_stage_texture_surfaces(brw, gs, &brw->gs.base, true); - if (fs && fs->UsesGather) - update_stage_texture_surfaces(brw, fs, &brw->wm.base, true); - if (cs && cs->UsesGather) - update_stage_texture_surfaces(brw, cs, &brw->cs.base, true); + if (devinfo->gen < 8) { + if (vs && vs->info.uses_texture_gather) + update_stage_texture_surfaces(brw, vs, &brw->vs.base, true, 0); + if (tcs && tcs->info.uses_texture_gather) + update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, true, 0); + if (tes && tes->info.uses_texture_gather) + update_stage_texture_surfaces(brw, tes, &brw->tes.base, true, 0); + if (gs && gs->info.uses_texture_gather) + update_stage_texture_surfaces(brw, gs, &brw->gs.base, true, 0); + if (fs && fs->info.uses_texture_gather) + update_stage_texture_surfaces(brw, fs, &brw->wm.base, true, 0); + } + + if (fs) { + update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 1); + update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 2); } brw->ctx.NewDriverState |= BRW_NEW_SURFACES; @@ -902,11 +1258,14 @@ const struct brw_tracked_state brw_texture_surfaces = { .dirty = { .mesa = _NEW_TEXTURE, .brw = BRW_NEW_BATCH | - BRW_NEW_COMPUTE_PROGRAM | + BRW_NEW_AUX_STATE | BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_FS_PROG_DATA | BRW_NEW_GEOMETRY_PROGRAM | BRW_NEW_GS_PROG_DATA | + BRW_NEW_TESS_PROGRAMS | + BRW_NEW_TCS_PROG_DATA | + BRW_NEW_TES_PROG_DATA | BRW_NEW_TEXTURE_BUFFER | BRW_NEW_VERTEX_PROGRAM | BRW_NEW_VS_PROG_DATA, @@ -914,66 +1273,109 @@ const struct brw_tracked_state brw_texture_surfaces = { .emit = brw_update_texture_surfaces, }; +static void +brw_update_cs_texture_surfaces(struct brw_context *brw) +{ + const struct gen_device_info *devinfo = &brw->screen->devinfo; + + /* BRW_NEW_COMPUTE_PROGRAM */ + struct gl_program *cs = brw->programs[MESA_SHADER_COMPUTE]; + + /* _NEW_TEXTURE */ + update_stage_texture_surfaces(brw, cs, &brw->cs.base, false, 0); + + /* emit alternate set of surface state for gather. this + * allows the surface format to be overriden for only the + * gather4 messages. + */ + if (devinfo->gen < 8) { + if (cs && cs->info.uses_texture_gather) + update_stage_texture_surfaces(brw, cs, &brw->cs.base, true, 0); + } + + brw->ctx.NewDriverState |= BRW_NEW_SURFACES; +} + +const struct brw_tracked_state brw_cs_texture_surfaces = { + .dirty = { + .mesa = _NEW_TEXTURE, + .brw = BRW_NEW_BATCH | + BRW_NEW_COMPUTE_PROGRAM | + BRW_NEW_AUX_STATE, + }, + .emit = brw_update_cs_texture_surfaces, +}; + +static void +upload_buffer_surface(struct brw_context *brw, + struct gl_buffer_binding *binding, + uint32_t *out_offset, + enum isl_format format, + unsigned reloc_flags) +{ + struct gl_context *ctx = &brw->ctx; + + if (binding->BufferObject == ctx->Shared->NullBufferObj) { + emit_null_surface_state(brw, NULL, out_offset); + } else { + ptrdiff_t size = binding->BufferObject->Size - binding->Offset; + if (!binding->AutomaticSize) + size = MIN2(size, binding->Size); + + struct intel_buffer_object *iobj = + intel_buffer_object(binding->BufferObject); + struct brw_bo *bo = + intel_bufferobj_buffer(brw, iobj, binding->Offset, size, + (reloc_flags & RELOC_WRITE) != 0); + + brw_emit_buffer_surface_state(brw, out_offset, bo, binding->Offset, + format, size, 1, reloc_flags); + } +} + void -brw_upload_ubo_surfaces(struct brw_context *brw, - struct gl_shader *shader, +brw_upload_ubo_surfaces(struct brw_context *brw, struct gl_program *prog, struct brw_stage_state *stage_state, - struct brw_stage_prog_data *prog_data, - bool dword_pitch) + struct brw_stage_prog_data *prog_data) { struct gl_context *ctx = &brw->ctx; - if (!shader) + if (!prog || (prog->info.num_ubos == 0 && + prog->info.num_ssbos == 0 && + prog->info.num_abos == 0)) return; uint32_t *ubo_surf_offsets = &stage_state->surf_offset[prog_data->binding_table.ubo_start]; - for (int i = 0; i < shader->NumUniformBlocks; i++) { - struct gl_uniform_buffer_binding *binding = - &ctx->UniformBufferBindings[shader->UniformBlocks[i]->Binding]; - - if (binding->BufferObject == ctx->Shared->NullBufferObj) { - brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ubo_surf_offsets[i]); - } else { - struct intel_buffer_object *intel_bo = - intel_buffer_object(binding->BufferObject); - drm_intel_bo *bo = - intel_bufferobj_buffer(brw, intel_bo, - binding->Offset, - binding->BufferObject->Size - binding->Offset); - brw_create_constant_surface(brw, bo, binding->Offset, - binding->BufferObject->Size - binding->Offset, - &ubo_surf_offsets[i], - dword_pitch); - } + for (int i = 0; i < prog->info.num_ubos; i++) { + struct gl_buffer_binding *binding = + &ctx->UniformBufferBindings[prog->sh.UniformBlocks[i]->Binding]; + upload_buffer_surface(brw, binding, &ubo_surf_offsets[i], + ISL_FORMAT_R32G32B32A32_FLOAT, 0); } - uint32_t *ssbo_surf_offsets = + uint32_t *abo_surf_offsets = &stage_state->surf_offset[prog_data->binding_table.ssbo_start]; + uint32_t *ssbo_surf_offsets = abo_surf_offsets + prog->info.num_abos; - for (int i = 0; i < shader->NumShaderStorageBlocks; i++) { - struct gl_shader_storage_buffer_binding *binding = - &ctx->ShaderStorageBufferBindings[shader->ShaderStorageBlocks[i]->Binding]; + for (int i = 0; i < prog->info.num_abos; i++) { + struct gl_buffer_binding *binding = + &ctx->AtomicBufferBindings[prog->sh.AtomicBuffers[i]->Binding]; + upload_buffer_surface(brw, binding, &abo_surf_offsets[i], + ISL_FORMAT_RAW, RELOC_WRITE); + } - if (binding->BufferObject == ctx->Shared->NullBufferObj) { - brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ssbo_surf_offsets[i]); - } else { - struct intel_buffer_object *intel_bo = - intel_buffer_object(binding->BufferObject); - drm_intel_bo *bo = - intel_bufferobj_buffer(brw, intel_bo, - binding->Offset, - binding->BufferObject->Size - binding->Offset); - brw_create_buffer_surface(brw, bo, binding->Offset, - binding->BufferObject->Size - binding->Offset, - &ssbo_surf_offsets[i], - dword_pitch); - } + for (int i = 0; i < prog->info.num_ssbos; i++) { + struct gl_buffer_binding *binding = + &ctx->ShaderStorageBufferBindings[prog->sh.ShaderStorageBlocks[i]->Binding]; + + upload_buffer_surface(brw, binding, &ssbo_surf_offsets[i], + ISL_FORMAT_RAW, RELOC_WRITE); } - if (shader->NumUniformBlocks || shader->NumShaderStorageBlocks) - brw->ctx.NewDriverState |= BRW_NEW_SURFACES; + stage_state->push_constants_dirty = true; + brw->ctx.NewDriverState |= BRW_NEW_SURFACES; } static void @@ -981,14 +1383,10 @@ brw_upload_wm_ubo_surfaces(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; /* _NEW_PROGRAM */ - struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram; - - if (!prog) - return; + struct gl_program *prog = ctx->FragmentProgram._Current; /* BRW_NEW_FS_PROG_DATA */ - brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT], - &brw->wm.base, &brw->wm.prog_data->base, true); + brw_upload_ubo_surfaces(brw, prog, &brw->wm.base, brw->wm.base.prog_data); } const struct brw_tracked_state brw_wm_ubo_surfaces = { @@ -1006,15 +1404,11 @@ brw_upload_cs_ubo_surfaces(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; /* _NEW_PROGRAM */ - struct gl_shader_program *prog = + struct gl_program *prog = ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE]; - if (!prog) - return; - /* BRW_NEW_CS_PROG_DATA */ - brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE], - &brw->cs.base, &brw->cs.prog_data->base, true); + brw_upload_ubo_surfaces(brw, prog, &brw->cs.base, brw->cs.base.prog_data); } const struct brw_tracked_state brw_cs_ubo_surfaces = { @@ -1027,94 +1421,16 @@ const struct brw_tracked_state brw_cs_ubo_surfaces = { .emit = brw_upload_cs_ubo_surfaces, }; -void -brw_upload_abo_surfaces(struct brw_context *brw, - struct gl_shader_program *prog, - struct brw_stage_state *stage_state, - struct brw_stage_prog_data *prog_data) -{ - struct gl_context *ctx = &brw->ctx; - uint32_t *surf_offsets = - &stage_state->surf_offset[prog_data->binding_table.abo_start]; - - for (unsigned i = 0; i < prog->NumAtomicBuffers; i++) { - struct gl_atomic_buffer_binding *binding = - &ctx->AtomicBufferBindings[prog->AtomicBuffers[i].Binding]; - struct intel_buffer_object *intel_bo = - intel_buffer_object(binding->BufferObject); - drm_intel_bo *bo = intel_bufferobj_buffer( - brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset); - - brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo, - binding->Offset, BRW_SURFACEFORMAT_RAW, - bo->size - binding->Offset, 1, true); - } - - if (prog->NumAtomicBuffers) - brw->ctx.NewDriverState |= BRW_NEW_SURFACES; -} - -static void -brw_upload_wm_abo_surfaces(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - /* _NEW_PROGRAM */ - struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram; - - if (prog) { - /* BRW_NEW_FS_PROG_DATA */ - brw_upload_abo_surfaces(brw, prog, &brw->wm.base, - &brw->wm.prog_data->base); - } -} - -const struct brw_tracked_state brw_wm_abo_surfaces = { - .dirty = { - .mesa = _NEW_PROGRAM, - .brw = BRW_NEW_ATOMIC_BUFFER | - BRW_NEW_BATCH | - BRW_NEW_FS_PROG_DATA, - }, - .emit = brw_upload_wm_abo_surfaces, -}; - -static void -brw_upload_cs_abo_surfaces(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - /* _NEW_PROGRAM */ - struct gl_shader_program *prog = - ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE]; - - if (prog) { - /* BRW_NEW_CS_PROG_DATA */ - brw_upload_abo_surfaces(brw, prog, &brw->cs.base, - &brw->cs.prog_data->base); - } -} - -const struct brw_tracked_state brw_cs_abo_surfaces = { - .dirty = { - .mesa = _NEW_PROGRAM, - .brw = BRW_NEW_ATOMIC_BUFFER | - BRW_NEW_BATCH | - BRW_NEW_CS_PROG_DATA, - }, - .emit = brw_upload_cs_abo_surfaces, -}; - static void brw_upload_cs_image_surfaces(struct brw_context *brw) { - struct gl_context *ctx = &brw->ctx; /* _NEW_PROGRAM */ - struct gl_shader_program *prog = - ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE]; + const struct gl_program *cp = brw->programs[MESA_SHADER_COMPUTE]; - if (prog) { + if (cp) { /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */ - brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE], - &brw->cs.base, &brw->cs.prog_data->base); + brw_upload_image_surfaces(brw, cp, &brw->cs.base, + brw->cs.base.prog_data); } } @@ -1123,6 +1439,7 @@ const struct brw_tracked_state brw_cs_image_surfaces = { .mesa = _NEW_TEXTURE | _NEW_PROGRAM, .brw = BRW_NEW_BATCH | BRW_NEW_CS_PROG_DATA | + BRW_NEW_AUX_STATE | BRW_NEW_IMAGE_UNITS }, .emit = brw_upload_cs_image_surfaces, @@ -1131,20 +1448,21 @@ const struct brw_tracked_state brw_cs_image_surfaces = { static uint32_t get_image_format(struct brw_context *brw, mesa_format format, GLenum access) { + const struct gen_device_info *devinfo = &brw->screen->devinfo; + enum isl_format hw_format = brw_isl_format_for_mesa_format(format); if (access == GL_WRITE_ONLY) { - return brw_format_for_mesa_format(format); - } else { + return hw_format; + } else if (isl_has_matching_typed_storage_image_format(devinfo, hw_format)) { /* Typed surface reads support a very limited subset of the shader * image formats. Translate it into the closest format the * hardware supports. */ - if ((_mesa_get_format_bytes(format) >= 16 && brw->gen <= 8) || - (_mesa_get_format_bytes(format) >= 8 && - (brw->gen == 7 && !brw->is_haswell))) - return BRW_SURFACEFORMAT_RAW; - else - return brw_format_for_mesa_format( - brw_lower_mesa_image_format(brw->intelScreen->devinfo, format)); + return isl_lower_storage_image_format(devinfo, hw_format); + } else { + /* The hardware doesn't actually support a typed format that we can use + * so we have to fall back to untyped read/write messages. + */ + return ISL_FORMAT_RAW; } } @@ -1170,79 +1488,23 @@ update_buffer_image_param(struct brw_context *brw, unsigned surface_idx, struct brw_image_param *param) { - struct gl_buffer_object *obj = u->TexObj->BufferObject; - + const unsigned size = buffer_texture_range_size(brw, u->TexObj); update_default_image_param(brw, u, surface_idx, param); - param->size[0] = obj->Size / _mesa_get_format_bytes(u->_ActualFormat); + param->size[0] = size / _mesa_get_format_bytes(u->_ActualFormat); param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat); } -static void -update_texture_image_param(struct brw_context *brw, - struct gl_image_unit *u, - unsigned surface_idx, - struct brw_image_param *param) +static unsigned +get_image_num_layers(const struct intel_mipmap_tree *mt, GLenum target, + unsigned level) { - struct intel_mipmap_tree *mt = intel_texture_object(u->TexObj)->mt; - - update_default_image_param(brw, u, surface_idx, param); - - param->size[0] = minify(mt->logical_width0, u->Level); - param->size[1] = minify(mt->logical_height0, u->Level); - param->size[2] = (!u->Layered ? 1 : - u->TexObj->Target == GL_TEXTURE_CUBE_MAP ? 6 : - u->TexObj->Target == GL_TEXTURE_3D ? - minify(mt->logical_depth0, u->Level) : - mt->logical_depth0); - - intel_miptree_get_image_offset(mt, u->Level, u->_Layer, - ¶m->offset[0], - ¶m->offset[1]); - - param->stride[0] = mt->cpp; - param->stride[1] = mt->pitch / mt->cpp; - param->stride[2] = - brw_miptree_get_horizontal_slice_pitch(brw, mt, u->Level); - param->stride[3] = - brw_miptree_get_vertical_slice_pitch(brw, mt, u->Level); - - if (mt->tiling == I915_TILING_X) { - /* An X tile is a rectangular block of 512x8 bytes. */ - param->tiling[0] = _mesa_logbase2(512 / mt->cpp); - param->tiling[1] = _mesa_logbase2(8); - - if (brw->has_swizzling) { - /* Right shifts required to swizzle bits 9 and 10 of the memory - * address with bit 6. - */ - param->swizzling[0] = 3; - param->swizzling[1] = 4; - } - } else if (mt->tiling == I915_TILING_Y) { - /* The layout of a Y-tiled surface in memory isn't really fundamentally - * different to the layout of an X-tiled surface, we simply pretend that - * the surface is broken up in a number of smaller 16Bx32 tiles, each - * one arranged in X-major order just like is the case for X-tiling. - */ - param->tiling[0] = _mesa_logbase2(16 / mt->cpp); - param->tiling[1] = _mesa_logbase2(32); - - if (brw->has_swizzling) { - /* Right shift required to swizzle bit 9 of the memory address with - * bit 6. - */ - param->swizzling[0] = 3; - } - } + if (target == GL_TEXTURE_CUBE_MAP) + return 6; - /* 3D textures are arranged in 2D in memory with 2^lod slices per row. The - * address calculation algorithm (emit_address_calculation() in - * brw_fs_surface_builder.cpp) handles this as a sort of tiling with - * modulus equal to the LOD. - */ - param->tiling[2] = (u->TexObj->Target == GL_TEXTURE_3D ? u->Level : - 0); + return target == GL_TEXTURE_3D ? + minify(mt->surf.logical_level0_px.depth, level) : + mt->surf.logical_level0_px.array_len; } static void @@ -1260,87 +1522,100 @@ update_image_surface(struct brw_context *brw, if (obj->Target == GL_TEXTURE_BUFFER) { struct intel_buffer_object *intel_obj = intel_buffer_object(obj->BufferObject); - const unsigned texel_size = (format == BRW_SURFACEFORMAT_RAW ? 1 : + const unsigned texel_size = (format == ISL_FORMAT_RAW ? 1 : _mesa_get_format_bytes(u->_ActualFormat)); + const unsigned buffer_size = buffer_texture_range_size(brw, obj); - brw->vtbl.emit_buffer_surface_state( + brw_emit_buffer_surface_state( brw, surf_offset, intel_obj->buffer, obj->BufferOffset, - format, intel_obj->Base.Size / texel_size, texel_size, - access != GL_READ_ONLY); + format, buffer_size, texel_size, + access != GL_READ_ONLY ? RELOC_WRITE : 0); update_buffer_image_param(brw, u, surface_idx, param); } else { struct intel_texture_object *intel_obj = intel_texture_object(obj); struct intel_mipmap_tree *mt = intel_obj->mt; - - if (format == BRW_SURFACEFORMAT_RAW) { - brw->vtbl.emit_buffer_surface_state( + const unsigned num_layers = u->Layered ? + get_image_num_layers(mt, obj->Target, u->Level) : 1; + + struct isl_view view = { + .format = format, + .base_level = obj->MinLevel + u->Level, + .levels = 1, + .base_array_layer = obj->MinLayer + u->_Layer, + .array_len = num_layers, + .swizzle = ISL_SWIZZLE_IDENTITY, + .usage = ISL_SURF_USAGE_STORAGE_BIT, + }; + + if (format == ISL_FORMAT_RAW) { + brw_emit_buffer_surface_state( brw, surf_offset, mt->bo, mt->offset, format, mt->bo->size - mt->offset, 1 /* pitch */, - access != GL_READ_ONLY); + access != GL_READ_ONLY ? RELOC_WRITE : 0); } else { - const unsigned min_layer = obj->MinLayer + u->_Layer; - const unsigned min_level = obj->MinLevel + u->Level; - const unsigned num_layers = (!u->Layered ? 1 : - obj->Target == GL_TEXTURE_CUBE_MAP ? 6 : - mt->logical_depth0); - const GLenum target = (obj->Target == GL_TEXTURE_CUBE_MAP || - obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY ? - GL_TEXTURE_2D_ARRAY : obj->Target); - - brw->vtbl.emit_texture_surface_state( - brw, mt, target, - min_layer, min_layer + num_layers, - min_level, min_level + 1, - format, SWIZZLE_XYZW, - surf_offset, access != GL_READ_ONLY, false); + const int surf_index = surf_offset - &brw->wm.base.surf_offset[0]; + assert(!intel_miptree_has_color_unresolved(mt, + view.base_level, 1, + view.base_array_layer, + view.array_len)); + brw_emit_surface_state(brw, mt, mt->target, view, + ISL_AUX_USAGE_NONE, + surf_offset, surf_index, + access == GL_READ_ONLY ? 0 : RELOC_WRITE); } - update_texture_image_param(brw, u, surface_idx, param); + isl_surf_fill_image_param(&brw->isl_dev, param, &mt->surf, &view); + param->surface_idx = surface_idx; } } else { - brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, surf_offset); + emit_null_surface_state(brw, NULL, surf_offset); update_default_image_param(brw, u, surface_idx, param); } } void brw_upload_image_surfaces(struct brw_context *brw, - struct gl_shader *shader, + const struct gl_program *prog, struct brw_stage_state *stage_state, struct brw_stage_prog_data *prog_data) { + assert(prog); struct gl_context *ctx = &brw->ctx; - if (shader && shader->NumImages) { - for (unsigned i = 0; i < shader->NumImages; i++) { - struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[i]]; + if (prog->info.num_images) { + for (unsigned i = 0; i < prog->info.num_images; i++) { + struct gl_image_unit *u = &ctx->ImageUnits[prog->sh.ImageUnits[i]]; const unsigned surf_idx = prog_data->binding_table.image_start + i; - update_image_surface(brw, u, shader->ImageAccess[i], + update_image_surface(brw, u, prog->sh.ImageAccess[i], surf_idx, &stage_state->surf_offset[surf_idx], - &prog_data->image_param[i]); + &stage_state->image_param[i]); } brw->ctx.NewDriverState |= BRW_NEW_SURFACES; + /* This may have changed the image metadata dependent on the context + * image unit state and passed to the program as uniforms, make sure + * that push and pull constants are reuploaded. + */ + brw->NewGLState |= _NEW_PROGRAM_CONSTANTS; } } static void brw_upload_wm_image_surfaces(struct brw_context *brw) { - struct gl_context *ctx = &brw->ctx; /* BRW_NEW_FRAGMENT_PROGRAM */ - struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram; + const struct gl_program *wm = brw->programs[MESA_SHADER_FRAGMENT]; - if (prog) { + if (wm) { /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */ - brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT], - &brw->wm.base, &brw->wm.prog_data->base); + brw_upload_image_surfaces(brw, wm, &brw->wm.base, + brw->wm.base.prog_data); } } @@ -1348,6 +1623,7 @@ const struct brw_tracked_state brw_wm_image_surfaces = { .dirty = { .mesa = _NEW_TEXTURE, .brw = BRW_NEW_BATCH | + BRW_NEW_AUX_STATE | BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_FS_PROG_DATA | BRW_NEW_IMAGE_UNITS @@ -1355,54 +1631,50 @@ const struct brw_tracked_state brw_wm_image_surfaces = { .emit = brw_upload_wm_image_surfaces, }; -void -gen4_init_vtable_surface_functions(struct brw_context *brw) -{ - brw->vtbl.update_texture_surface = brw_update_texture_surface; - brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface; - brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state; - brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state; -} - static void brw_upload_cs_work_groups_surface(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; /* _NEW_PROGRAM */ - struct gl_shader_program *prog = + struct gl_program *prog = ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE]; + /* BRW_NEW_CS_PROG_DATA */ + const struct brw_cs_prog_data *cs_prog_data = + brw_cs_prog_data(brw->cs.base.prog_data); - if (prog && brw->cs.prog_data->uses_num_work_groups) { + if (prog && cs_prog_data->uses_num_work_groups) { const unsigned surf_idx = - brw->cs.prog_data->binding_table.work_groups_start; + cs_prog_data->binding_table.work_groups_start; uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx]; - drm_intel_bo *bo; + struct brw_bo *bo; uint32_t bo_offset; if (brw->compute.num_work_groups_bo == NULL) { bo = NULL; - intel_upload_data(brw, - (void *)brw->compute.num_work_groups, - 3 * sizeof(GLuint), - sizeof(GLuint), - &bo, - &bo_offset); + brw_upload_data(&brw->upload, + (void *)brw->compute.num_work_groups, + 3 * sizeof(GLuint), + sizeof(GLuint), + &bo, + &bo_offset); } else { bo = brw->compute.num_work_groups_bo; bo_offset = brw->compute.num_work_groups_offset; } - brw->vtbl.emit_buffer_surface_state(brw, surf_offset, - bo, bo_offset, - BRW_SURFACEFORMAT_RAW, - 3 * sizeof(GLuint), 1, true); + brw_emit_buffer_surface_state(brw, surf_offset, + bo, bo_offset, + ISL_FORMAT_RAW, + 3 * sizeof(GLuint), 1, + RELOC_WRITE); brw->ctx.NewDriverState |= BRW_NEW_SURFACES; } } const struct brw_tracked_state brw_cs_work_groups_surface = { .dirty = { - .brw = BRW_NEW_CS_WORK_GROUPS + .brw = BRW_NEW_CS_PROG_DATA | + BRW_NEW_CS_WORK_GROUPS }, .emit = brw_upload_cs_work_groups_surface, };