GL_MAX_VERTEX_ATTRIB_STRIDE DONE (all drivers)
GL_ARB_buffer_storage DONE (i965, nv50, nvc0, r600, radeonsi)
GL_ARB_clear_texture DONE (i965) (gallium - in progress, VMware)
- GL_ARB_enhanced_layouts not started
+ GL_ARB_enhanced_layouts in progress (Timothy)
+ - compile-time constant expressions in progress
+ - explicit byte offsets for blocks in progress
+ - forced alignment within blocks in progress
+ - specified vec4-slot component numbers in progress
+ - specified transform/feedback layout in progress
+ - input/output block locations in progress
GL_ARB_multi_bind DONE (all drivers)
GL_ARB_query_buffer_object not started
GL_ARB_texture_mirror_clamp_to_edge DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
<ul>
<li>GL_ARB_blend_func_extended on freedreno (a3xx)</li>
+<li>GL_ARB_gpu_shader_fp64 on r600 for Cypress/Cayman/Aruba chips</li>
<li>GL_ARB_shader_storage_buffer_object on i965</li>
<li>GL_ARB_shader_texture_image_samples on i965, nv50, nvc0, r600, radeonsi</li>
<li>GL_ARB_texture_barrier / GL_NV_texture_barrier on i965</li>
<li>GL_ARB_texture_query_lod on softpipe</li>
-<li>GL_ARB_gpu_shader_fp64 on r600 for Cypress/Cayman/Aruba chips</li>
+<li>EGL_KHR_create_context on softpipe, llvmpipe</li>
+<li>EGL_KHR_gl_colorspace on softpipe, llvmpipe</li>
</ul>
<h2>Bug fixes</h2>
{
struct timespec abs_time;
int rt;
+
assert(mtx != NULL);
assert(cond != NULL);
+ assert(xt != NULL);
+
+ abs_time.tv_sec = xt->sec;
+ abs_time.tv_nsec = xt->nsec;
+
rt = pthread_cond_timedwait(cond, mtx, &abs_time);
if (rt == ETIMEDOUT)
return thrd_busy;
dri2_get_dri_config(struct dri2_egl_config *conf, EGLint surface_type,
EGLenum colorspace)
{
- if (colorspace == EGL_GL_COLORSPACE_SRGB_KHR)
- return surface_type == EGL_WINDOW_BIT ? conf->dri_srgb_double_config :
- conf->dri_srgb_single_config;
- else
- return surface_type == EGL_WINDOW_BIT ? conf->dri_double_config :
- conf->dri_single_config;
+ const bool srgb = colorspace == EGL_GL_COLORSPACE_SRGB_KHR;
+
+ return surface_type == EGL_WINDOW_BIT ? conf->dri_double_config[srgb] :
+ conf->dri_single_config[srgb];
}
static EGLBoolean
if (num_configs == 1) {
conf = (struct dri2_egl_config *) matching_config;
- if (double_buffer && srgb && !conf->dri_srgb_double_config)
- conf->dri_srgb_double_config = dri_config;
- else if (double_buffer && !srgb && !conf->dri_double_config)
- conf->dri_double_config = dri_config;
- else if (!double_buffer && srgb && !conf->dri_srgb_single_config)
- conf->dri_srgb_single_config = dri_config;
- else if (!double_buffer && !srgb && !conf->dri_single_config)
- conf->dri_single_config = dri_config;
+ if (double_buffer && !conf->dri_double_config[srgb])
+ conf->dri_double_config[srgb] = dri_config;
+ else if (!double_buffer && !conf->dri_single_config[srgb])
+ conf->dri_single_config[srgb] = dri_config;
else
/* a similar config type is already added (unlikely) => discard */
return NULL;
if (conf == NULL)
return NULL;
+ if (double_buffer)
+ conf->dri_double_config[srgb] = dri_config;
+ else
+ conf->dri_single_config[srgb] = dri_config;
+
memcpy(&conf->base, &base, sizeof base);
- if (double_buffer) {
- if (srgb)
- conf->dri_srgb_double_config = dri_config;
- else
- conf->dri_double_config = dri_config;
- } else {
- if (srgb)
- conf->dri_srgb_single_config = dri_config;
- else
- conf->dri_single_config = dri_config;
- }
+ conf->base.SurfaceType = 0;
conf->base.ConfigID = config_id;
_eglLinkConfig(&conf->base);
* doubleBufferMode check in
* src/mesa/main/context.c:check_compatible()
*/
- if (dri2_config->dri_double_config)
- dri_config = dri2_config->dri_double_config;
+ if (dri2_config->dri_double_config[0])
+ dri_config = dri2_config->dri_double_config[0];
else
- dri_config = dri2_config->dri_single_config;
+ dri_config = dri2_config->dri_single_config[0];
/* EGL_WINDOW_BIT is set only when there is a dri_double_config. This
* makes sure the back buffer will always be used.
unsigned wait_flags = 0;
EGLint ret = EGL_CONDITION_SATISFIED_KHR;
- if (flags & EGL_SYNC_FLUSH_COMMANDS_BIT_KHR)
+ /* The EGL_KHR_fence_sync spec states:
+ *
+ * "If no context is current for the bound API,
+ * the EGL_SYNC_FLUSH_COMMANDS_BIT_KHR bit is ignored.
+ */
+ if (dri2_ctx && flags & EGL_SYNC_FLUSH_COMMANDS_BIT_KHR)
wait_flags |= __DRI2_FENCE_FLAG_FLUSH_COMMANDS;
/* the sync object should take a reference while waiting */
dri2_egl_ref_sync(dri2_sync);
- if (dri2_dpy->fence->client_wait_sync(dri2_ctx->dri_context,
+ if (dri2_dpy->fence->client_wait_sync(dri2_ctx ? dri2_ctx->dri_context : NULL,
dri2_sync->fence, wait_flags,
timeout))
dri2_sync->base.SyncStatus = EGL_SIGNALED_KHR;
struct dri2_egl_config
{
_EGLConfig base;
- const __DRIconfig *dri_single_config;
- const __DRIconfig *dri_double_config;
- const __DRIconfig *dri_srgb_single_config;
- const __DRIconfig *dri_srgb_double_config;
+ const __DRIconfig *dri_single_config[2];
+ const __DRIconfig *dri_double_config[2];
};
struct dri2_egl_image
struct dri2_egl_surface *dri2_surf;
struct gbm_surface *window = native_window;
struct gbm_dri_surface *surf;
+ const __DRIconfig *config;
(void) drv;
goto cleanup_surf;
}
- if (dri2_dpy->dri2) {
- const __DRIconfig *config =
- dri2_get_dri_config(dri2_conf, EGL_WINDOW_BIT,
- dri2_surf->base.GLColorspace);
+ config = dri2_get_dri_config(dri2_conf, EGL_WINDOW_BIT,
+ dri2_surf->base.GLColorspace);
+ if (dri2_dpy->dri2) {
dri2_surf->dri_drawable =
(*dri2_dpy->dri2->createNewDrawable)(dri2_dpy->dri_screen, config,
dri2_surf->gbm_surf);
} else {
assert(dri2_dpy->swrast != NULL);
+
dri2_surf->dri_drawable =
- (*dri2_dpy->swrast->createNewDrawable) (dri2_dpy->dri_screen,
- dri2_conf->dri_double_config,
- dri2_surf->gbm_surf);
+ (*dri2_dpy->swrast->createNewDrawable)(dri2_dpy->dri_screen, config,
+ dri2_surf->gbm_surf);
}
if (dri2_surf->dri_drawable == NULL) {
struct dri2_egl_config *dri2_conf = dri2_egl_config(conf);
struct wl_egl_window *window = native_window;
struct dri2_egl_surface *dri2_surf;
+ const __DRIconfig *config;
(void) drv;
dri2_surf->base.Width = -1;
dri2_surf->base.Height = -1;
+ config = dri2_get_dri_config(dri2_conf, EGL_WINDOW_BIT,
+ dri2_surf->base.GLColorspace);
+
dri2_surf->dri_drawable =
- (*dri2_dpy->swrast->createNewDrawable) (dri2_dpy->dri_screen,
- dri2_conf->dri_double_config,
- dri2_surf);
+ (*dri2_dpy->swrast->createNewDrawable)(dri2_dpy->dri_screen,
+ config, dri2_surf);
if (dri2_surf->dri_drawable == NULL) {
_eglError(EGL_BAD_ALLOC, "swrast->createNewDrawable");
goto cleanup_dri_drawable;
xcb_generic_error_t *error;
xcb_drawable_t drawable;
xcb_screen_t *screen;
+ const __DRIconfig *config;
STATIC_ASSERT(sizeof(uintptr_t) == sizeof(native_surface));
drawable = (uintptr_t) native_surface;
dri2_surf->drawable = drawable;
}
- if (dri2_dpy->dri2) {
- const __DRIconfig *config =
- dri2_get_dri_config(dri2_conf, type, dri2_surf->base.GLColorspace);
+ config = dri2_get_dri_config(dri2_conf, type,
+ dri2_surf->base.GLColorspace);
+ if (dri2_dpy->dri2) {
dri2_surf->dri_drawable =
(*dri2_dpy->dri2->createNewDrawable)(dri2_dpy->dri_screen, config,
dri2_surf);
} else {
assert(dri2_dpy->swrast);
dri2_surf->dri_drawable =
- (*dri2_dpy->swrast->createNewDrawable) (dri2_dpy->dri_screen,
- dri2_conf->dri_double_config,
- dri2_surf);
+ (*dri2_dpy->swrast->createNewDrawable)(dri2_dpy->dri_screen, config,
+ dri2_surf);
}
if (dri2_surf->dri_drawable == NULL) {
break;
}
+ if (fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_CENTROID ||
+ fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
+ fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
+ const struct tgsi_full_src_register *src0 = &fullinst->Src[0];
+ unsigned input;
+
+ if (src0->Register.Indirect && src0->Indirect.ArrayID)
+ input = info->input_array_first[src0->Indirect.ArrayID];
+ else
+ input = src0->Register.Index;
+
+ /* For the INTERP opcodes, the interpolation is always
+ * PERSPECTIVE unless LINEAR is specified.
+ */
+ switch (info->input_interpolate[input]) {
+ case TGSI_INTERPOLATE_COLOR:
+ case TGSI_INTERPOLATE_CONSTANT:
+ case TGSI_INTERPOLATE_PERSPECTIVE:
+ switch (fullinst->Instruction.Opcode) {
+ case TGSI_OPCODE_INTERP_CENTROID:
+ info->uses_persp_opcode_interp_centroid = true;
+ break;
+ case TGSI_OPCODE_INTERP_OFFSET:
+ info->uses_persp_opcode_interp_offset = true;
+ break;
+ case TGSI_OPCODE_INTERP_SAMPLE:
+ info->uses_persp_opcode_interp_sample = true;
+ break;
+ }
+ break;
+
+ case TGSI_INTERPOLATE_LINEAR:
+ switch (fullinst->Instruction.Opcode) {
+ case TGSI_OPCODE_INTERP_CENTROID:
+ info->uses_linear_opcode_interp_centroid = true;
+ break;
+ case TGSI_OPCODE_INTERP_OFFSET:
+ info->uses_linear_opcode_interp_offset = true;
+ break;
+ case TGSI_OPCODE_INTERP_SAMPLE:
+ info->uses_linear_opcode_interp_sample = true;
+ break;
+ }
+ break;
+ }
+ }
+
if (fullinst->Instruction.Opcode >= TGSI_OPCODE_F2D &&
fullinst->Instruction.Opcode <= TGSI_OPCODE_DSSG)
info->uses_doubles = true;
info->input_cylindrical_wrap[reg] = (ubyte)fulldecl->Interp.CylindricalWrap;
info->num_inputs++;
- if (fulldecl->Interp.Location == TGSI_INTERPOLATE_LOC_CENTROID)
- info->uses_centroid = TRUE;
+ /* Only interpolated varyings. Don't include POSITION.
+ * Don't include integer varyings, because they are not
+ * interpolated.
+ */
+ if (semName == TGSI_SEMANTIC_GENERIC ||
+ semName == TGSI_SEMANTIC_TEXCOORD ||
+ semName == TGSI_SEMANTIC_COLOR ||
+ semName == TGSI_SEMANTIC_BCOLOR ||
+ semName == TGSI_SEMANTIC_FOG ||
+ semName == TGSI_SEMANTIC_CLIPDIST ||
+ semName == TGSI_SEMANTIC_CULLDIST) {
+ switch (fulldecl->Interp.Interpolate) {
+ case TGSI_INTERPOLATE_COLOR:
+ case TGSI_INTERPOLATE_PERSPECTIVE:
+ switch (fulldecl->Interp.Location) {
+ case TGSI_INTERPOLATE_LOC_CENTER:
+ info->uses_persp_center = true;
+ break;
+ case TGSI_INTERPOLATE_LOC_CENTROID:
+ info->uses_persp_centroid = true;
+ break;
+ case TGSI_INTERPOLATE_LOC_SAMPLE:
+ info->uses_persp_sample = true;
+ break;
+ }
+ break;
+ case TGSI_INTERPOLATE_LINEAR:
+ switch (fulldecl->Interp.Location) {
+ case TGSI_INTERPOLATE_LOC_CENTER:
+ info->uses_linear_center = true;
+ break;
+ case TGSI_INTERPOLATE_LOC_CENTROID:
+ info->uses_linear_centroid = true;
+ break;
+ case TGSI_INTERPOLATE_LOC_SAMPLE:
+ info->uses_linear_sample = true;
+ break;
+ }
+ break;
+ /* TGSI_INTERPOLATE_CONSTANT doesn't do any interpolation. */
+ }
+ }
if (semName == TGSI_SEMANTIC_PRIMID)
info->uses_primid = TRUE;
boolean writes_stencil; /**< does fragment shader write stencil value? */
boolean writes_edgeflag; /**< vertex shader outputs edgeflag */
boolean uses_kill; /**< KILL or KILL_IF instruction used? */
- boolean uses_centroid;
+ boolean uses_persp_center;
+ boolean uses_persp_centroid;
+ boolean uses_persp_sample;
+ boolean uses_linear_center;
+ boolean uses_linear_centroid;
+ boolean uses_linear_sample;
+ boolean uses_persp_opcode_interp_centroid;
+ boolean uses_persp_opcode_interp_offset;
+ boolean uses_persp_opcode_interp_sample;
+ boolean uses_linear_opcode_interp_centroid;
+ boolean uses_linear_opcode_interp_offset;
+ boolean uses_linear_opcode_interp_sample;
boolean uses_instanceid;
boolean uses_vertexid;
boolean uses_vertexid_nobase;
bounds_max states of pipe_depth_stencil_alpha_state behave according
to the GL_EXT_depth_bounds_test specification.
* ``PIPE_CAP_TGSI_TXQS``: Whether the `TXQS` opcode is supported
+* ``PIPE_CAP_FORCE_PERSAMPLE_INTERP``: If the driver can force per-sample
+ interpolation for all fragment shader inputs if
+ pipe_rasterizer_state::force_persample_interp is set. This is only used
+ by GL3-level sample shading (ARB_sample_shading). GL4-level sample shading
+ (ARB_gpu_shader5) doesn't use this. While GL3 hardware has a state for it,
+ GL4 hardware will likely need to emulate it with a shader variant, or by
+ selecting the interpolation weights with a conditional assignment
+ in the shader.
+
.. _pipe_capf:
C_SOURCES := \
- dd_pipe.h \
- dd_public.h \
dd_context.c \
dd_draw.c \
- dd_screen.c
+ dd_pipe.h \
+ dd_public.h \
+ dd_screen.c \
+ dd_util.h
#include "util/u_dump.h"
#include "util/u_format.h"
#include "tgsi/tgsi_scan.h"
-#include "os/os_process.h"
-#include <errno.h>
-#include <sys/stat.h>
enum call_type
} info;
};
-
static FILE *
dd_get_file_stream(struct dd_context *dctx)
{
struct pipe_screen *screen = dctx->pipe->screen;
- static unsigned index;
- char proc_name[128], dir[256], name[512];
- FILE *f;
-
- if (!os_get_process_name(proc_name, sizeof(proc_name))) {
- fprintf(stderr, "dd: can't get the process name\n");
- return NULL;
- }
-
- snprintf(dir, sizeof(dir), "%s/"DD_DIR, debug_get_option("HOME", "."));
-
- if (mkdir(dir, 0774) && errno != EEXIST) {
- fprintf(stderr, "dd: can't create a directory (%i)\n", errno);
- return NULL;
- }
-
- snprintf(name, sizeof(name), "%s/%s_%u_%08u", dir, proc_name, getpid(), index++);
- f = fopen(name, "w");
- if (!f) {
- fprintf(stderr, "dd: can't open file %s\n", name);
+ FILE *f = dd_get_debug_file();
+ if (!f)
return NULL;
- }
fprintf(f, "Driver vendor: %s\n", screen->get_vendor(screen));
fprintf(f, "Device vendor: %s\n", screen->get_device_vendor(screen));
#include "pipe/p_context.h"
#include "pipe/p_state.h"
#include "pipe/p_screen.h"
-
-/* name of the directory in home */
-#define DD_DIR "ddebug_dumps"
+#include "dd_util.h"
enum dd_mode {
DD_DETECT_HANGS,
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ * Copyright 2008 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef DD_UTIL_H
+#define DD_UTIL_H
+
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/stat.h>
+
+#include "os/os_process.h"
+#include "util/u_debug.h"
+
+/* name of the directory in home */
+#define DD_DIR "ddebug_dumps"
+
+static inline FILE *
+dd_get_debug_file()
+{
+ static unsigned index;
+ char proc_name[128], dir[256], name[512];
+ FILE *f;
+
+ if (!os_get_process_name(proc_name, sizeof(proc_name))) {
+ fprintf(stderr, "dd: can't get the process name\n");
+ return NULL;
+ }
+
+ snprintf(dir, sizeof(dir), "%s/"DD_DIR, debug_get_option("HOME", "."));
+
+ if (mkdir(dir, 0774) && errno != EEXIST) {
+ fprintf(stderr, "dd: can't create a directory (%i)\n", errno);
+ return NULL;
+ }
+
+ snprintf(name, sizeof(name), "%s/%s_%u_%08u", dir, proc_name, getpid(), index++);
+ f = fopen(name, "w");
+ if (!f) {
+ fprintf(stderr, "dd: can't open file %s\n", name);
+ return NULL;
+ }
+
+ return f;
+}
+
+#endif /* DD_UTIL_H */
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
case PIPE_CAP_DEPTH_BOUNDS_TEST:
case PIPE_CAP_TGSI_TXQS:
+ case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
return 0;
case PIPE_CAP_MAX_VIEWPORTS:
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
case PIPE_CAP_DEPTH_BOUNDS_TEST:
case PIPE_CAP_TGSI_TXQS:
+ case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
return 0;
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
case PIPE_CAP_DEPTH_BOUNDS_TEST:
case PIPE_CAP_TGSI_TXQS:
+ case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
return 0;
case PIPE_CAP_VENDOR_ID:
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
case PIPE_CAP_DEPTH_BOUNDS_TEST:
case PIPE_CAP_TGSI_TXQS:
+ case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
return 0;
}
/* should only get here on unhandled cases */
static inline bool
PUSH_SPACE(struct nouveau_pushbuf *push, uint32_t size)
{
+ /* Provide a buffer so that fences always have room to be emitted */
+ size += 8;
if (PUSH_AVAIL(push) < size)
return nouveau_pushbuf_space(push, size, 0, 0) == 0;
return true;
_(B4G4R4X4_UNORM , S___),
_(B4G4R4A4_UNORM , S___),
_(B5G6R5_UNORM , SB__),
- _(B8G8R8X8_UNORM , SB__),
- _(B8G8R8X8_SRGB , S___),
- _(B8G8R8A8_UNORM , SB__),
- _(B8G8R8A8_SRGB , S___),
+ _(BGRX8888_UNORM , SB__),
+ _(BGRX8888_SRGB , S___),
+ _(BGRA8888_UNORM , SB__),
+ _(BGRA8888_SRGB , S___),
_(R8G8B8A8_UNORM , __V_),
- _(R8G8B8A8_SNORM , S___),
+ _(RGBA8888_SNORM , S___),
_(DXT1_RGB , S___),
_(DXT1_SRGB , S___),
_(DXT1_RGBA , S___),
nv30_format_table[PIPE_FORMAT_COUNT] = {
R_(B5G5R5X1_UNORM , X1R5G5B5 ),
R_(B5G6R5_UNORM , R5G6B5 ),
- R_(B8G8R8X8_UNORM , X8R8G8B8 ),
- R_(B8G8R8A8_UNORM , A8R8G8B8 ),
+ R_(BGRX8888_UNORM , X8R8G8B8 ),
+ R_(BGRA8888_UNORM , A8R8G8B8 ),
Z_(Z16_UNORM , Z16 ),
Z_(X8Z24_UNORM , Z24S8 ),
Z_(S8_UINT_Z24_UNORM , Z24S8 ),
_(B4G4R4X4_UNORM , A4R4G4B4, 0, C, C, C, 1, 2, 1, 0, x, NONE, ____),
_(B4G4R4A4_UNORM , A4R4G4B4, 0, C, C, C, C, 2, 1, 0, 3, NONE, ____),
_(B5G6R5_UNORM , R5G6B5 , 0, C, C, C, 1, 2, 1, 0, x, NONE, ____),
- _(B8G8R8X8_UNORM , A8R8G8B8, 0, C, C, C, 1, 2, 1, 0, x, NONE, ____),
- _(B8G8R8X8_SRGB , A8R8G8B8, 0, C, C, C, 1, 2, 1, 0, x, SRGB, ____),
- _(B8G8R8A8_UNORM , A8R8G8B8, 0, C, C, C, C, 2, 1, 0, 3, NONE, ____),
- _(B8G8R8A8_SRGB , A8R8G8B8, 0, C, C, C, C, 2, 1, 0, 3, SRGB, ____),
- _(R8G8B8A8_SNORM , A8R8G8B8, 0, C, C, C, C, 0, 1, 2, 3, NONE, SSSS),
+ _(BGRX8888_UNORM , A8R8G8B8, 0, C, C, C, 1, 2, 1, 0, x, NONE, ____),
+ _(BGRX8888_SRGB , A8R8G8B8, 0, C, C, C, 1, 2, 1, 0, x, SRGB, ____),
+ _(BGRA8888_UNORM , A8R8G8B8, 0, C, C, C, C, 2, 1, 0, 3, NONE, ____),
+ _(BGRA8888_SRGB , A8R8G8B8, 0, C, C, C, C, 2, 1, 0, 3, SRGB, ____),
+ _(RGBA8888_SNORM , A8R8G8B8, 0, C, C, C, C, 0, 1, 2, 3, NONE, SSSS),
_(DXT1_RGB , DXT1 , 0, C, C, C, 1, 2, 1, 0, x, NONE, ____),
_(DXT1_SRGB , DXT1 , 0, C, C, C, 1, 2, 1, 0, x, SRGB, ____),
_(DXT1_RGBA , DXT1 , 0, C, C, C, C, 2, 1, 0, 3, NONE, ____),
case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
case PIPE_CAP_TGSI_TXQS:
+ case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
return 0;
case PIPE_CAP_VENDOR_ID:
*sequence = ++screen->base.fence.sequence;
- BEGIN_NV04(push, NV30_3D(FENCE_OFFSET), 2);
+ assert(PUSH_AVAIL(push) >= 3);
+ PUSH_DATA (push, NV30_3D_FENCE_OFFSET |
+ (2 /* size */ << 18) | (7 /* subchan */ << 13));
PUSH_DATA (push, 0);
PUSH_DATA (push, *sequence);
}
if (!nv30->vertex || nv30->draw_flags)
return;
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ if (1) { /* Figure out where the buffers are getting messed up */
+#else
if (unlikely(vertex->need_conversion)) {
+#endif
nv30->vbo_fifo = ~0;
nv30->vbo_user = 0;
} else {
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+ case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
return 0;
case PIPE_CAP_VENDOR_ID:
/* we need to do it after possible flush in MARK_RING */
*sequence = ++screen->base.fence.sequence;
+ assert(PUSH_AVAIL(push) >= 5);
PUSH_DATA (push, NV50_FIFO_PKHDR(NV50_3D(QUERY_ADDRESS_HIGH), 4));
PUSH_DATAh(push, screen->fence.bo->offset);
PUSH_DATA (push, screen->fence.bo->offset);
case PIPE_CAP_VERTEXID_NOBASE:
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+ case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
return 0;
case PIPE_CAP_VENDOR_ID:
/* we need to do it after possible flush in MARK_RING */
*sequence = ++screen->base.fence.sequence;
- BEGIN_NVC0(push, NVC0_3D(QUERY_ADDRESS_HIGH), 4);
+ assert(PUSH_AVAIL(push) >= 5);
+ PUSH_DATA (push, NVC0_FIFO_PKHDR_SQ(NVC0_3D(QUERY_ADDRESS_HIGH), 4));
PUSH_DATAh(push, screen->fence.bo->offset);
PUSH_DATA (push, screen->fence.bo->offset);
PUSH_DATA (push, *sequence);
/**
- * Writing relocations.
+ * Writing buffers.
*/
#define OUT_CS_RELOC(r) do { \
assert((r)); \
assert((r)->cs_buf); \
OUT_CS(0xc0001000); /* PKT3_NOP */ \
- OUT_CS(cs_winsys->cs_get_reloc(cs_copy, (r)->cs_buf) * 4); \
+ OUT_CS(cs_winsys->cs_lookup_buffer(cs_copy, (r)->cs_buf) * 4); \
} while (0)
assert(r300->vbo_cs);
OUT_CS(0xc0001000); /* PKT3_NOP */
- OUT_CS(r300->rws->cs_get_reloc(r300->cs, r300->vbo_cs) * 4);
+ OUT_CS(r300->rws->cs_lookup_buffer(r300->cs, r300->vbo_cs) * 4);
END_CS;
}
continue;
tex = r300_resource(fb->cbufs[i]->texture);
assert(tex && tex->buf && "cbuf is marked, but NULL!");
- r300->rws->cs_add_reloc(r300->cs, tex->cs_buf,
+ r300->rws->cs_add_buffer(r300->cs, tex->cs_buf,
RADEON_USAGE_READWRITE,
r300_surface(fb->cbufs[i])->domain,
tex->b.b.nr_samples > 1 ?
if (fb->zsbuf) {
tex = r300_resource(fb->zsbuf->texture);
assert(tex && tex->buf && "zsbuf is marked, but NULL!");
- r300->rws->cs_add_reloc(r300->cs, tex->cs_buf,
+ r300->rws->cs_add_buffer(r300->cs, tex->cs_buf,
RADEON_USAGE_READWRITE,
r300_surface(fb->zsbuf)->domain,
tex->b.b.nr_samples > 1 ?
/* The AA resolve buffer. */
if (r300->aa_state.dirty) {
if (aa->dest) {
- r300->rws->cs_add_reloc(r300->cs, aa->dest->cs_buf,
+ r300->rws->cs_add_buffer(r300->cs, aa->dest->cs_buf,
RADEON_USAGE_WRITE,
aa->dest->domain,
RADEON_PRIO_COLOR_BUFFER);
}
tex = r300_resource(texstate->sampler_views[i]->base.texture);
- r300->rws->cs_add_reloc(r300->cs, tex->cs_buf, RADEON_USAGE_READ,
- tex->domain, RADEON_PRIO_SHADER_TEXTURE_RO);
+ r300->rws->cs_add_buffer(r300->cs, tex->cs_buf, RADEON_USAGE_READ,
+ tex->domain, RADEON_PRIO_SAMPLER_TEXTURE);
}
}
/* ...occlusion query buffer... */
if (r300->query_current)
- r300->rws->cs_add_reloc(r300->cs, r300->query_current->cs_buf,
+ r300->rws->cs_add_buffer(r300->cs, r300->query_current->cs_buf,
RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT,
- RADEON_PRIO_MIN);
+ RADEON_PRIO_QUERY);
/* ...vertex buffer for SWTCL path... */
if (r300->vbo_cs)
- r300->rws->cs_add_reloc(r300->cs, r300->vbo_cs,
+ r300->rws->cs_add_buffer(r300->cs, r300->vbo_cs,
RADEON_USAGE_READ, RADEON_DOMAIN_GTT,
- RADEON_PRIO_MIN);
+ RADEON_PRIO_VERTEX_BUFFER);
/* ...vertex buffers for HWTCL path... */
if (do_validate_vertex_buffers && r300->vertex_arrays_dirty) {
struct pipe_vertex_buffer *vbuf = r300->vertex_buffer;
if (!buf)
continue;
- r300->rws->cs_add_reloc(r300->cs, r300_resource(buf)->cs_buf,
+ r300->rws->cs_add_buffer(r300->cs, r300_resource(buf)->cs_buf,
RADEON_USAGE_READ,
r300_resource(buf)->domain,
- RADEON_PRIO_SHADER_BUFFER_RO);
+ RADEON_PRIO_SAMPLER_BUFFER);
}
}
/* ...and index buffer for HWTCL path. */
if (index_buffer)
- r300->rws->cs_add_reloc(r300->cs, r300_resource(index_buffer)->cs_buf,
+ r300->rws->cs_add_buffer(r300->cs, r300_resource(index_buffer)->cs_buf,
RADEON_USAGE_READ,
r300_resource(index_buffer)->domain,
- RADEON_PRIO_MIN);
+ RADEON_PRIO_INDEX_BUFFER);
/* Now do the validation (flush is called inside cs_validate on failure). */
if (!r300->rws->cs_validate(r300->cs)) {
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
case PIPE_CAP_DEPTH_BOUNDS_TEST:
case PIPE_CAP_TGSI_TXQS:
+ case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
return 0;
/* SWTCL-only features. */
unsigned reloc = radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.gfx,
(struct r600_resource*)cb->base.texture,
RADEON_USAGE_READWRITE,
- RADEON_PRIO_SHADER_RESOURCE_RW);
+ RADEON_PRIO_SHADER_RW_BUFFER);
radeon_compute_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 7);
radeon_emit(cs, cb->cb_color_base); /* R_028C60_CB_COLOR0_BASE */
radeon_emit(cs, PKT3C(PKT3_NOP, 0, 0));
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
code_bo, RADEON_USAGE_READ,
- RADEON_PRIO_SHADER_DATA));
+ RADEON_PRIO_USER_SHADER));
}
static void evergreen_launch_grid(
csize = size < EG_DMA_COPY_MAX_SIZE ? size : EG_DMA_COPY_MAX_SIZE;
/* emit reloc before writing cs so that cs is always in consistent state */
radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
- RADEON_PRIO_MIN);
+ RADEON_PRIO_SDMA_BUFFER);
radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
- RADEON_PRIO_MIN);
+ RADEON_PRIO_SDMA_BUFFER);
cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, sub_cmd, csize);
cs->buf[cs->cdw++] = dst_offset & 0xffffffff;
cs->buf[cs->cdw++] = src_offset & 0xffffffff;
/* This must be done after r600_need_cs_space. */
reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
(struct r600_resource*)dst, RADEON_USAGE_WRITE,
- RADEON_PRIO_MIN);
+ RADEON_PRIO_CP_DMA);
radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0));
radeon_emit(cs, clear_value); /* DATA [31:0] */
va = tmp->resource.gpu_address;
+ if (state->format == PIPE_FORMAT_X24S8_UINT ||
+ state->format == PIPE_FORMAT_S8X24_UINT ||
+ state->format == PIPE_FORMAT_X32_S8X24_UINT ||
+ state->format == PIPE_FORMAT_S8_UINT)
+ view->is_stencil_sampler = true;
+
view->tex_resource = &tmp->resource;
view->tex_resource_words[0] = (S_030000_DIM(r600_tex_dim(texture->target, texture->nr_samples)) |
S_030000_PITCH((pitch / 8) - 1) |
if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) {
cmask_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
tex->cmask_buffer, RADEON_USAGE_READWRITE,
- RADEON_PRIO_COLOR_META);
+ RADEON_PRIO_CMASK);
} else {
cmask_reloc = reloc;
}
radeon_set_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, a->rsurf->db_preload_control);
radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base);
reloc_idx = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rtex->htile_buffer,
- RADEON_USAGE_READWRITE, RADEON_PRIO_DEPTH_META);
+ RADEON_USAGE_READWRITE, RADEON_PRIO_HTILE);
cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
cs->buf[cs->cdw++] = reloc_idx;
} else {
S_028000_STENCIL_COPY_ENABLE(a->copy_stencil) |
S_028000_COPY_CENTROID(1) |
S_028000_COPY_SAMPLE(a->copy_sample);
- } else if (a->flush_depthstencil_in_place) {
- db_render_control |= S_028000_DEPTH_COMPRESS_DISABLE(1) |
- S_028000_STENCIL_COMPRESS_DISABLE(1);
+ } else if (a->flush_depth_inplace || a->flush_stencil_inplace) {
+ db_render_control |= S_028000_DEPTH_COMPRESS_DISABLE(a->flush_depth_inplace) |
+ S_028000_STENCIL_COMPRESS_DISABLE(a->flush_stencil_inplace);
db_render_override |= S_02800C_DISABLE_PIXEL_RATE_TILES(1);
}
if (a->htile_clear) {
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
- RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO));
+ RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER));
}
state->dirty_mask = 0;
}
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
- RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO));
+ RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER));
radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags);
radeon_emit(cs, (buffer_id_base + buffer_index) * 8);
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
- RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO));
+ RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER));
dirty_mask &= ~(1 << buffer_index);
}
reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rview->tex_resource,
RADEON_USAGE_READ,
- rview->tex_resource->b.b.nr_samples > 1 ?
- RADEON_PRIO_SHADER_TEXTURE_MSAA :
- RADEON_PRIO_SHADER_TEXTURE_RO);
+ r600_get_sampler_view_priority(rview->tex_resource));
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
radeon_emit(cs, reloc);
(shader->buffer->gpu_address + shader->offset) >> 8);
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, shader->buffer,
- RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA));
+ RADEON_USAGE_READ,
+ RADEON_PRIO_INTERNAL_SHADER));
}
static void evergreen_emit_shader_stages(struct r600_context *rctx, struct r600_atom *a)
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
RADEON_USAGE_READWRITE,
- RADEON_PRIO_SHADER_RESOURCE_RW));
+ RADEON_PRIO_RINGS_STREAMOUT));
radeon_set_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE,
state->esgs_ring.buffer_size >> 8);
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
RADEON_USAGE_READWRITE,
- RADEON_PRIO_SHADER_RESOURCE_RW));
+ RADEON_PRIO_RINGS_STREAMOUT));
radeon_set_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE,
state->gsvs_ring.buffer_size >> 8);
} else {
size = (cheight * pitch) / 4;
/* emit reloc before writing cs so that cs is always in consistent state */
radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rsrc->resource,
- RADEON_USAGE_READ, RADEON_PRIO_MIN);
+ RADEON_USAGE_READ, RADEON_PRIO_SDMA_TEXTURE);
radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rdst->resource,
- RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
+ RADEON_USAGE_WRITE, RADEON_PRIO_SDMA_TEXTURE);
cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, sub_cmd, size);
cs->buf[cs->cdw++] = base >> 8;
cs->buf[cs->cdw++] = (detile << 31) | (array_mode << 27) |
}
if (src->format != dst->format || src_box->depth > 1 ||
- rdst->dirty_level_mask != 0) {
+ (rdst->dirty_level_mask | rdst->stencil_dirty_level_mask) & (1 << dst_level)) {
goto fallback;
}
- if (rsrc->dirty_level_mask) {
+ if (rsrc->dirty_level_mask & (1 << src_level)) {
ctx->flush_resource(ctx, src);
}
static void r600_blit_decompress_depth_in_place(struct r600_context *rctx,
struct r600_texture *texture,
+ bool is_stencil_sampler,
unsigned first_level, unsigned last_level,
unsigned first_layer, unsigned last_layer)
{
struct pipe_surface *zsurf, surf_tmpl = {{0}};
unsigned layer, max_layer, checked_last_layer, level;
+ unsigned *dirty_level_mask;
/* Enable decompression in DB_RENDER_CONTROL */
- rctx->db_misc_state.flush_depthstencil_in_place = true;
+ if (is_stencil_sampler) {
+ rctx->db_misc_state.flush_stencil_inplace = true;
+ dirty_level_mask = &texture->stencil_dirty_level_mask;
+ } else {
+ rctx->db_misc_state.flush_depth_inplace = true;
+ dirty_level_mask = &texture->dirty_level_mask;
+ }
r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
surf_tmpl.format = texture->resource.b.b.format;
for (level = first_level; level <= last_level; level++) {
- if (!(texture->dirty_level_mask & (1 << level)))
+ if (!(*dirty_level_mask & (1 << level)))
continue;
surf_tmpl.u.tex.level = level;
/* The texture will always be dirty if some layers or samples aren't flushed.
* I don't think this case occurs often though. */
if (first_layer == 0 && last_layer == max_layer) {
- texture->dirty_level_mask &= ~(1 << level);
+ *dirty_level_mask &= ~(1 << level);
}
}
/* Disable decompression in DB_RENDER_CONTROL */
- rctx->db_misc_state.flush_depthstencil_in_place = false;
+ rctx->db_misc_state.flush_depth_inplace = false;
+ rctx->db_misc_state.flush_stencil_inplace = false;
r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
while (depth_texture_mask) {
struct pipe_sampler_view *view;
+ struct r600_pipe_sampler_view *rview;
struct r600_texture *tex;
i = u_bit_scan(&depth_texture_mask);
view = &textures->views[i]->base;
assert(view);
+ rview = (struct r600_pipe_sampler_view*)view;
tex = (struct r600_texture *)view->texture;
assert(tex->is_depth && !tex->is_flushing_texture);
if (rctx->b.chip_class >= EVERGREEN ||
r600_can_read_depth(tex)) {
r600_blit_decompress_depth_in_place(rctx, tex,
+ rview->is_stencil_sampler,
view->u.tex.first_level, view->u.tex.last_level,
0, util_max_layer(&tex->resource.b.b, view->u.tex.first_level));
} else {
if (rtex->is_depth && !rtex->is_flushing_texture) {
if (rctx->b.chip_class >= EVERGREEN ||
r600_can_read_depth(rtex)) {
- r600_blit_decompress_depth_in_place(rctx, rtex,
+ r600_blit_decompress_depth_in_place(rctx, rtex, false,
level, level,
first_layer, last_layer);
+ if (rtex->surface.flags & RADEON_SURF_SBUFFER) {
+ r600_blit_decompress_depth_in_place(rctx, rtex, true,
+ level, level,
+ first_layer, last_layer);
+ }
} else {
if (!r600_init_flushed_depth_texture(ctx, tex, NULL))
return false; /* error */
/* This must be done after r600_need_cs_space. */
src_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)src,
- RADEON_USAGE_READ, RADEON_PRIO_MIN);
+ RADEON_USAGE_READ, RADEON_PRIO_CP_DMA);
dst_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)dst,
- RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
+ RADEON_USAGE_WRITE, RADEON_PRIO_CP_DMA);
radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0));
radeon_emit(cs, src_offset); /* SRC_ADDR_LO [31:0] */
csize = size < R600_DMA_COPY_MAX_SIZE_DW ? size : R600_DMA_COPY_MAX_SIZE_DW;
/* emit reloc before writing cs so that cs is always in consistent state */
radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
- RADEON_PRIO_MIN);
+ RADEON_PRIO_SDMA_BUFFER);
radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
- RADEON_PRIO_MIN);
+ RADEON_PRIO_SDMA_BUFFER);
cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, 0, 0, csize);
cs->buf[cs->cdw++] = dst_offset & 0xfffffffc;
cs->buf[cs->cdw++] = src_offset & 0xfffffffc;
case PIPE_CAP_VERTEXID_NOBASE:
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
case PIPE_CAP_DEPTH_BOUNDS_TEST:
+ case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
return 0;
/* Stream output. */
struct r600_atom atom;
bool occlusion_query_enabled;
bool flush_depthstencil_through_cb;
- bool flush_depthstencil_in_place;
+ bool flush_depth_inplace;
+ bool flush_stencil_inplace;
bool copy_depth, copy_stencil;
unsigned copy_sample;
unsigned log_samples;
struct r600_resource *tex_resource;
uint32_t tex_resource_words[8];
bool skip_mip_address_reloc;
+ bool is_stencil_sampler;
};
struct r600_rasterizer_state {
break;
}
+ if (state->format == PIPE_FORMAT_X24S8_UINT ||
+ state->format == PIPE_FORMAT_S8X24_UINT ||
+ state->format == PIPE_FORMAT_X32_S8X24_UINT ||
+ state->format == PIPE_FORMAT_S8_UINT)
+ view->is_stencil_sampler = true;
+
view->tex_resource = &tmp->resource;
view->tex_resource_words[0] = (S_038000_DIM(r600_tex_dim(texture->target, texture->nr_samples)) |
S_038000_TILE_MODE(array_mode) |
radeon_set_context_reg(cs, R_028D24_DB_HTILE_SURFACE, a->rsurf->db_htile_surface);
radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base);
reloc_idx = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rtex->htile_buffer,
- RADEON_USAGE_READWRITE, RADEON_PRIO_DEPTH_META);
+ RADEON_USAGE_READWRITE, RADEON_PRIO_HTILE);
cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
cs->buf[cs->cdw++] = reloc_idx;
} else {
if (rctx->b.family == CHIP_RV610 || rctx->b.family == CHIP_RV630 ||
rctx->b.family == CHIP_RV620 || rctx->b.family == CHIP_RV635)
db_render_override |= S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_DISABLE);
- } else if (a->flush_depthstencil_in_place) {
- db_render_control |= S_028D0C_DEPTH_COMPRESS_DISABLE(1) |
- S_028D0C_STENCIL_COMPRESS_DISABLE(1);
+ } else if (a->flush_depth_inplace || a->flush_stencil_inplace) {
+ db_render_control |= S_028D0C_DEPTH_COMPRESS_DISABLE(a->flush_depth_inplace) |
+ S_028D0C_STENCIL_COMPRESS_DISABLE(a->flush_stencil_inplace);
db_render_override |= S_028D10_NOOP_CULL_DISABLE(1);
}
if (a->htile_clear) {
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
- RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO));
+ RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER));
}
}
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
- RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO));
+ RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER));
radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 7, 0));
radeon_emit(cs, (buffer_id_base + buffer_index) * 7);
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
- RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO));
+ RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER));
dirty_mask &= ~(1 << buffer_index);
}
reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rview->tex_resource,
RADEON_USAGE_READ,
- rview->tex_resource->b.b.nr_samples > 1 ?
- RADEON_PRIO_SHADER_TEXTURE_MSAA :
- RADEON_PRIO_SHADER_TEXTURE_RO);
+ r600_get_sampler_view_priority(rview->tex_resource));
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
radeon_emit(cs, reloc);
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
radeon_set_context_reg(cs, R_028894_SQ_PGM_START_FS, shader->offset >> 8);
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, shader->buffer,
- RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA));
+ RADEON_USAGE_READ,
+ RADEON_PRIO_INTERNAL_SHADER));
}
static void r600_emit_shader_stages(struct r600_context *rctx, struct r600_atom *a)
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
RADEON_USAGE_READWRITE,
- RADEON_PRIO_SHADER_RESOURCE_RW));
+ RADEON_PRIO_RINGS_STREAMOUT));
radeon_set_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE,
state->esgs_ring.buffer_size >> 8);
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
RADEON_USAGE_READWRITE,
- RADEON_PRIO_SHADER_RESOURCE_RW));
+ RADEON_PRIO_RINGS_STREAMOUT));
radeon_set_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE,
state->gsvs_ring.buffer_size >> 8);
} else {
size = (cheight * pitch) / 4;
/* emit reloc before writing cs so that cs is always in consistent state */
radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rsrc->resource, RADEON_USAGE_READ,
- RADEON_PRIO_MIN);
+ RADEON_PRIO_SDMA_TEXTURE);
radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rdst->resource, RADEON_USAGE_WRITE,
- RADEON_PRIO_MIN);
+ RADEON_PRIO_SDMA_TEXTURE);
cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, 1, 0, size);
cs->buf[cs->cdw++] = base >> 8;
cs->buf[cs->cdw++] = (detile << 31) | (array_mode << 27) |
cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing);
cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
(struct r600_resource*)info.indirect,
- RADEON_USAGE_READ, RADEON_PRIO_MIN);
+ RADEON_USAGE_READ,
+ RADEON_PRIO_DRAW_INDIRECT);
}
if (info.indexed) {
cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing);
cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
(struct r600_resource*)ib.buffer,
- RADEON_USAGE_READ, RADEON_PRIO_MIN);
+ RADEON_USAGE_READ,
+ RADEON_PRIO_INDEX_BUFFER);
}
else {
uint32_t max_size = (ib.buffer->width0 - ib.offset) / ib.index_size;
cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing);
cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
(struct r600_resource*)ib.buffer,
- RADEON_USAGE_READ, RADEON_PRIO_MIN);
+ RADEON_USAGE_READ,
+ RADEON_PRIO_INDEX_BUFFER);
cs->buf[cs->cdw++] = PKT3(EG_PKT3_INDEX_BUFFER_SIZE, 0, rctx->b.predicate_drawing);
cs->buf[cs->cdw++] = max_size;
cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
t->buf_filled_size, RADEON_USAGE_READ,
- RADEON_PRIO_MIN);
+ RADEON_PRIO_SO_FILLED_SIZE);
}
if (likely(!info.indirect)) {
struct r600_texture *rtex = (struct r600_texture *)surf->texture;
rtex->dirty_level_mask |= 1 << surf->u.tex.level;
+
+ if (rtex->surface.flags & RADEON_SURF_SBUFFER)
+ rtex->stencil_dirty_level_mask |= 1 << surf->u.tex.level;
}
if (rctx->framebuffer.compressed_cb_mask) {
struct pipe_surface *surf;
r600_emit_command_buffer(cs, &shader->command_buffer);
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, shader->bo,
- RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA));
+ RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER));
}
unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format,
va = rscreen->b.trace_bo->gpu_address;
reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rscreen->b.trace_bo,
- RADEON_USAGE_READWRITE, RADEON_PRIO_MIN);
+ RADEON_USAGE_READWRITE, RADEON_PRIO_TRACE);
radeon_emit(cs, PKT3(PKT3_MEM_WRITE, 3, 0));
radeon_emit(cs, va & 0xFFFFFFFFUL);
radeon_emit(cs, (va >> 32UL) & 0xFFUL);
rctx->rings.gfx.flush(rctx, RADEON_FLUSH_ASYNC, NULL);
}
}
- return rctx->ws->cs_add_reloc(ring->cs, rbo->cs_buf, usage,
+ return rctx->ws->cs_add_buffer(ring->cs, rbo->cs_buf, usage,
rbo->domains, priority) * 4;
}
{ "forcedma", DBG_FORCE_DMA, "Use asynchronous DMA for all operations when possible." },
{ "precompile", DBG_PRECOMPILE, "Compile one shader variant at shader creation." },
{ "nowc", DBG_NO_WC, "Disable GTT write combining" },
+ { "check_vm", DBG_CHECK_VM, "Check VM faults and dump debug info." },
DEBUG_NAMED_VALUE_END /* must be last */
};
#define DBG_PRECOMPILE (1llu << 39)
#define DBG_INFO (1llu << 40)
#define DBG_NO_WC (1llu << 41)
+#define DBG_CHECK_VM (1llu << 42)
#define R600_MAP_BUFFER_ALIGNMENT 64
unsigned pitch_override;
bool is_depth;
unsigned dirty_level_mask; /* each bit says if that mipmap is compressed */
+ unsigned stencil_dirty_level_mask; /* each bit says if that mipmap is compressed */
struct r600_texture *flushed_depth_texture;
boolean is_flushing_texture;
struct radeon_surf surface;
}
}
+static inline enum radeon_bo_priority
+r600_get_sampler_view_priority(struct r600_resource *res)
+{
+ if (res->b.b.target == PIPE_BUFFER)
+ return RADEON_PRIO_SAMPLER_BUFFER;
+
+ if (res->b.b.nr_samples > 1)
+ return RADEON_PRIO_SAMPLER_TEXTURE_MSAA;
+
+ return RADEON_PRIO_SAMPLER_TEXTURE;
+}
+
#define COMPUTE_DBG(rscreen, fmt, args...) \
do { \
if ((rscreen->b.debug_flags & DBG_COMPUTE)) fprintf(stderr, fmt, ##args); \
assert(0);
}
r600_emit_reloc(ctx, &ctx->rings.gfx, query->buffer.buf, RADEON_USAGE_WRITE,
- RADEON_PRIO_MIN);
+ RADEON_PRIO_QUERY);
if (r600_is_timer_query(query->type))
ctx->num_cs_dw_timer_queries_suspend += query->num_cs_dw;
assert(0);
}
r600_emit_reloc(ctx, &ctx->rings.gfx, query->buffer.buf, RADEON_USAGE_WRITE,
- RADEON_PRIO_MIN);
+ RADEON_PRIO_QUERY);
query->buffer.results_end += query->result_size;
radeon_emit(cs, va + results_base);
radeon_emit(cs, op | (((va + results_base) >> 32) & 0xFF));
r600_emit_reloc(ctx, &ctx->rings.gfx, qbuf->buf, RADEON_USAGE_READ,
- RADEON_PRIO_MIN);
+ RADEON_PRIO_QUERY);
results_base += query->result_size;
/* set CONTINUE bit for all packets except the first */
radeon_emit(cs, buffer->gpu_address);
radeon_emit(cs, buffer->gpu_address >> 32);
- r600_emit_reloc(ctx, &ctx->rings.gfx, buffer, RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
+ r600_emit_reloc(ctx, &ctx->rings.gfx, buffer,
+ RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
/* analyze results */
results = r600_buffer_map_sync_with_rings(ctx, buffer, PIPE_TRANSFER_READ);
radeon_emit(cs, va >> 8); /* BUFFER_BASE */
r600_emit_reloc(rctx, &rctx->rings.gfx, r600_resource(t[i]->b.buffer),
- RADEON_USAGE_WRITE, RADEON_PRIO_SHADER_RESOURCE_RW);
+ RADEON_USAGE_WRITE, RADEON_PRIO_RINGS_STREAMOUT);
/* R7xx requires this packet after updating BUFFER_BASE.
* Without this, R7xx locks up. */
radeon_emit(cs, va >> 8);
r600_emit_reloc(rctx, &rctx->rings.gfx, r600_resource(t[i]->b.buffer),
- RADEON_USAGE_WRITE, RADEON_PRIO_SHADER_RESOURCE_RW);
+ RADEON_USAGE_WRITE, RADEON_PRIO_RINGS_STREAMOUT);
}
}
radeon_emit(cs, va >> 32); /* src address hi */
r600_emit_reloc(rctx, &rctx->rings.gfx, t[i]->buf_filled_size,
- RADEON_USAGE_READ, RADEON_PRIO_MIN);
+ RADEON_USAGE_READ, RADEON_PRIO_SO_FILLED_SIZE);
} else {
/* Start from the beginning. */
radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
radeon_emit(cs, 0); /* unused */
r600_emit_reloc(rctx, &rctx->rings.gfx, t[i]->buf_filled_size,
- RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
+ RADEON_USAGE_WRITE, RADEON_PRIO_SO_FILLED_SIZE);
/* Zero the buffer size. The counters (primitives generated,
* primitives emitted) may be enabled even if there is not
{
int reloc_idx;
- reloc_idx = dec->ws->cs_add_reloc(dec->cs, cs_buf, usage, domain,
- RADEON_PRIO_MIN);
+ reloc_idx = dec->ws->cs_add_buffer(dec->cs, cs_buf, usage, domain,
+ RADEON_PRIO_UVD);
if (!dec->use_legacy) {
uint64_t addr;
addr = dec->ws->buffer_get_virtual_address(cs_buf);
{
int reloc_idx;
- reloc_idx = enc->ws->cs_add_reloc(enc->cs, buf, usage, domain, RADEON_PRIO_MIN);
+ reloc_idx = enc->ws->cs_add_buffer(enc->cs, buf, usage, domain, RADEON_PRIO_VCE);
if (enc->use_vm) {
uint64_t addr;
addr = enc->ws->buffer_get_virtual_address(buf);
RADEON_GPU_RESET_COUNTER, /* DRM 2.43.0 */
};
+/* Each group of four has the same priority. */
enum radeon_bo_priority {
- RADEON_PRIO_MIN,
- RADEON_PRIO_SHADER_DATA, /* shader code, resource descriptors */
- RADEON_PRIO_SHADER_BUFFER_RO, /* read-only */
- RADEON_PRIO_SHADER_TEXTURE_RO, /* read-only */
- RADEON_PRIO_SHADER_RESOURCE_RW, /* buffers, textures, streamout, GS rings, RATs; read/write */
- RADEON_PRIO_COLOR_BUFFER,
- RADEON_PRIO_DEPTH_BUFFER,
- RADEON_PRIO_SHADER_TEXTURE_MSAA,
- RADEON_PRIO_COLOR_BUFFER_MSAA,
- RADEON_PRIO_DEPTH_BUFFER_MSAA,
- RADEON_PRIO_COLOR_META,
- RADEON_PRIO_DEPTH_META,
- RADEON_PRIO_MAX /* must be <= 15 */
+ RADEON_PRIO_FENCE = 0,
+ RADEON_PRIO_TRACE,
+ RADEON_PRIO_SO_FILLED_SIZE,
+ RADEON_PRIO_QUERY,
+
+ RADEON_PRIO_IB1 = 4, /* main IB submitted to the kernel */
+ RADEON_PRIO_IB2, /* IB executed with INDIRECT_BUFFER */
+ RADEON_PRIO_DRAW_INDIRECT,
+ RADEON_PRIO_INDEX_BUFFER,
+
+ RADEON_PRIO_CP_DMA = 8,
+
+ RADEON_PRIO_VCE = 12,
+ RADEON_PRIO_UVD,
+ RADEON_PRIO_SDMA_BUFFER,
+ RADEON_PRIO_SDMA_TEXTURE,
+
+ RADEON_PRIO_USER_SHADER = 16,
+ RADEON_PRIO_INTERNAL_SHADER, /* fetch shader, etc. */
+
+ /* gap: 20 */
+
+ RADEON_PRIO_CONST_BUFFER = 24,
+ RADEON_PRIO_DESCRIPTORS,
+ RADEON_PRIO_BORDER_COLORS,
+
+ RADEON_PRIO_SAMPLER_BUFFER = 28,
+ RADEON_PRIO_VERTEX_BUFFER,
+
+ RADEON_PRIO_SHADER_RW_BUFFER = 32,
+ RADEON_PRIO_RINGS_STREAMOUT,
+ RADEON_PRIO_SCRATCH_BUFFER,
+ RADEON_PRIO_COMPUTE_GLOBAL,
+
+ RADEON_PRIO_SAMPLER_TEXTURE = 36,
+ RADEON_PRIO_SHADER_RW_IMAGE,
+
+ RADEON_PRIO_SAMPLER_TEXTURE_MSAA = 40,
+
+ RADEON_PRIO_COLOR_BUFFER = 44,
+
+ RADEON_PRIO_DEPTH_BUFFER = 48,
+
+ RADEON_PRIO_COLOR_BUFFER_MSAA = 52,
+
+ RADEON_PRIO_DEPTH_BUFFER_MSAA = 56,
+
+ RADEON_PRIO_CMASK = 60,
+ RADEON_PRIO_DCC,
+ RADEON_PRIO_HTILE,
+ /* 63 is the maximum value */
};
struct winsys_handle;
uint32_t num_banks;
};
+struct radeon_bo_list_item {
+ struct pb_buffer *buf;
+ uint64_t vm_address;
+ uint64_t priority_usage; /* mask of (1 << RADEON_PRIO_*) */
+};
+
struct radeon_winsys {
/**
* The screen object this winsys was created for
void (*cs_destroy)(struct radeon_winsys_cs *cs);
/**
- * Add a new buffer relocation. Every relocation must first be added
- * before it can be written.
+ * Add a buffer. Each buffer used by a CS must be added using this function.
*
- * \param cs A command stream to add buffer for validation against.
- * \param buf A winsys buffer to validate.
+ * \param cs Command stream
+ * \param buf Buffer
* \param usage Whether the buffer is used for read and/or write.
* \param domain Bitmask of the RADEON_DOMAIN_* flags.
* \param priority A higher number means a greater chance of being
* placed in the requested domain. 15 is the maximum.
- * \return Relocation index.
+ * \return Buffer index.
*/
- unsigned (*cs_add_reloc)(struct radeon_winsys_cs *cs,
+ unsigned (*cs_add_buffer)(struct radeon_winsys_cs *cs,
struct radeon_winsys_cs_handle *buf,
enum radeon_bo_usage usage,
enum radeon_bo_domain domain,
* \param buf Buffer
* \return The buffer index, or -1 if the buffer has not been added.
*/
- int (*cs_get_reloc)(struct radeon_winsys_cs *cs,
- struct radeon_winsys_cs_handle *buf);
+ int (*cs_lookup_buffer)(struct radeon_winsys_cs *cs,
+ struct radeon_winsys_cs_handle *buf);
/**
- * Return TRUE if there is enough memory in VRAM and GTT for the relocs
- * added so far. If the validation fails, all the relocations which have
+ * Return TRUE if there is enough memory in VRAM and GTT for the buffers
+ * added so far. If the validation fails, all buffers which have
* been added since the last call of cs_validate will be removed and
- * the CS will be flushed (provided there are still any relocations).
+ * the CS will be flushed (provided there are still any buffers).
*
* \param cs A command stream to validate.
*/
boolean (*cs_validate)(struct radeon_winsys_cs *cs);
/**
- * Return TRUE if there is enough memory in VRAM and GTT for the relocs
+ * Return TRUE if there is enough memory in VRAM and GTT for the buffers
* added so far.
*
* \param cs A command stream to validate.
*/
boolean (*cs_memory_below_limit)(struct radeon_winsys_cs *cs, uint64_t vram, uint64_t gtt);
+ /**
+ * Return the buffer list.
+ *
+ * \param cs Command stream
+ * \param list Returned buffer list. Set to NULL to query the count only.
+ * \return The buffer count.
+ */
+ unsigned (*cs_get_buffer_list)(struct radeon_winsys_cs *cs,
+ struct radeon_bo_list_item *list);
+
/**
* Flush a command stream.
*
r600_need_dma_space(&ctx->b, ncopy * 7);
radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
- RADEON_PRIO_MIN);
+ RADEON_PRIO_SDMA_BUFFER);
radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
- RADEON_PRIO_MIN);
+ RADEON_PRIO_SDMA_BUFFER);
for (i = 0; i < ncopy; i++) {
csize = size < CIK_SDMA_COPY_MAX_SIZE ? size : CIK_SDMA_COPY_MAX_SIZE;
r600_need_dma_space(&ctx->b, ncopy * 12);
radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, &rsrc->resource,
- RADEON_USAGE_READ, RADEON_PRIO_MIN);
+ RADEON_USAGE_READ, RADEON_PRIO_SDMA_TEXTURE);
radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, &rdst->resource,
- RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
+ RADEON_USAGE_WRITE, RADEON_PRIO_SDMA_TEXTURE);
copy_height = size * 4 / pitch;
for (i = 0; i < ncopy; i++) {
if (src->format != dst->format ||
rdst->surface.nsamples > 1 || rsrc->surface.nsamples > 1 ||
- rdst->dirty_level_mask & (1 << dst_level)) {
+ (rdst->dirty_level_mask | rdst->stencil_dirty_level_mask) & (1 << dst_level)) {
goto fallback;
}
static void si_blit_decompress_depth_in_place(struct si_context *sctx,
struct r600_texture *texture,
+ bool is_stencil_sampler,
unsigned first_level, unsigned last_level,
unsigned first_layer, unsigned last_layer)
{
struct pipe_surface *zsurf, surf_tmpl = {{0}};
unsigned layer, max_layer, checked_last_layer, level;
-
- sctx->db_inplace_flush_enabled = true;
+ unsigned *dirty_level_mask;
+
+ if (is_stencil_sampler) {
+ sctx->db_flush_stencil_inplace = true;
+ dirty_level_mask = &texture->stencil_dirty_level_mask;
+ } else {
+ sctx->db_flush_depth_inplace = true;
+ dirty_level_mask = &texture->dirty_level_mask;
+ }
si_mark_atom_dirty(sctx, &sctx->db_render_state);
surf_tmpl.format = texture->resource.b.b.format;
for (level = first_level; level <= last_level; level++) {
- if (!(texture->dirty_level_mask & (1 << level)))
+ if (!(*dirty_level_mask & (1 << level)))
continue;
surf_tmpl.u.tex.level = level;
/* The texture will always be dirty if some layers aren't flushed.
* I don't think this case occurs often though. */
if (first_layer == 0 && last_layer == max_layer) {
- texture->dirty_level_mask &= ~(1 << level);
+ *dirty_level_mask &= ~(1 << level);
}
}
- sctx->db_inplace_flush_enabled = false;
+ sctx->db_flush_depth_inplace = false;
+ sctx->db_flush_stencil_inplace = false;
si_mark_atom_dirty(sctx, &sctx->db_render_state);
}
while (mask) {
struct pipe_sampler_view *view;
+ struct si_sampler_view *sview;
struct r600_texture *tex;
i = u_bit_scan(&mask);
view = textures->views.views[i];
assert(view);
+ sview = (struct si_sampler_view*)view;
tex = (struct r600_texture *)view->texture;
assert(tex->is_depth && !tex->is_flushing_texture);
si_blit_decompress_depth_in_place(sctx, tex,
+ sview->is_stencil_sampler,
view->u.tex.first_level, view->u.tex.last_level,
0, util_max_layer(&tex->resource.b.b, view->u.tex.first_level));
}
struct r600_texture *rtex = (struct r600_texture*)tex;
if (rtex->is_depth && !rtex->is_flushing_texture) {
- si_blit_decompress_depth_in_place(sctx, rtex,
+ si_blit_decompress_depth_in_place(sctx, rtex, false,
level, level,
first_layer, last_layer);
+ if (rtex->surface.flags & RADEON_SURF_SBUFFER)
+ si_blit_decompress_depth_in_place(sctx, rtex, true,
+ level, level,
+ first_layer, last_layer);
} else if (rtex->fmask.size || rtex->cmask.size) {
si_blit_decompress_color(ctx, rtex, level, level,
first_layer, last_layer);
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
shader->scratch_bo,
RADEON_USAGE_READWRITE,
- RADEON_PRIO_SHADER_RESOURCE_RW);
+ RADEON_PRIO_SCRATCH_BUFFER);
scratch_buffer_va = shader->scratch_bo->gpu_address;
}
kernel_args_va += kernel_args_offset;
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, input_buffer,
- RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
+ RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER);
si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0, kernel_args_va);
si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 + 4, S_008F04_BASE_ADDRESS_HI (kernel_args_va >> 32) | S_008F04_STRIDE(0));
}
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, buffer,
RADEON_USAGE_READWRITE,
- RADEON_PRIO_SHADER_RESOURCE_RW);
+ RADEON_PRIO_COMPUTE_GLOBAL);
}
/* This register has been moved to R_00CD20_COMPUTE_MAX_WAVE_ID
shader_va += pc;
#endif
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, shader->bo,
- RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
+ RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
si_pm4_set_reg(pm4, R_00B830_COMPUTE_PGM_LO, shader_va >> 8);
si_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, shader_va >> 40);
/* This must be done after need_cs_space. */
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
(struct r600_resource*)dst, RADEON_USAGE_WRITE,
- RADEON_PRIO_MIN);
+ RADEON_PRIO_CP_DMA);
/* Flush the caches for the first copy only.
* Also wait for the previous CP DMA operations. */
/* This must be done after r600_need_cs_space. */
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)src,
- RADEON_USAGE_READ, RADEON_PRIO_MIN);
+ RADEON_USAGE_READ, RADEON_PRIO_CP_DMA);
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)dst,
- RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
+ RADEON_USAGE_WRITE, RADEON_PRIO_CP_DMA);
si_emit_cp_dma_copy_buffer(sctx, dst_offset, src_offset, byte_count, sync_flags);
#include "si_shader.h"
#include "sid.h"
#include "sid_tables.h"
+#include "ddebug/dd_util.h"
static void si_dump_shader(struct si_shader_selector *sel, const char *name,
fprintf(f, "\n");
}
+static void si_dump_last_ib(struct si_context *sctx, FILE *f)
+{
+ int last_trace_id = -1;
+
+ if (!sctx->last_ib)
+ return;
+
+ if (sctx->last_trace_buf) {
+ /* We are expecting that the ddebug pipe has already
+ * waited for the context, so this buffer should be idle.
+ * If the GPU is hung, there is no point in waiting for it.
+ */
+ uint32_t *map = sctx->b.ws->buffer_map(sctx->last_trace_buf->cs_buf,
+ NULL,
+ PIPE_TRANSFER_UNSYNCHRONIZED |
+ PIPE_TRANSFER_READ);
+ if (map)
+ last_trace_id = *map;
+ }
+
+ si_parse_ib(f, sctx->last_ib, sctx->last_ib_dw_size,
+ last_trace_id);
+ free(sctx->last_ib); /* dump only once */
+ sctx->last_ib = NULL;
+ r600_resource_reference(&sctx->last_trace_buf, NULL);
+}
+
+static const char *priority_to_string(enum radeon_bo_priority priority)
+{
+#define ITEM(x) [RADEON_PRIO_##x] = #x
+ static const char *table[64] = {
+ ITEM(FENCE),
+ ITEM(TRACE),
+ ITEM(SO_FILLED_SIZE),
+ ITEM(QUERY),
+ ITEM(IB1),
+ ITEM(IB2),
+ ITEM(DRAW_INDIRECT),
+ ITEM(INDEX_BUFFER),
+ ITEM(CP_DMA),
+ ITEM(VCE),
+ ITEM(UVD),
+ ITEM(SDMA_BUFFER),
+ ITEM(SDMA_TEXTURE),
+ ITEM(USER_SHADER),
+ ITEM(INTERNAL_SHADER),
+ ITEM(CONST_BUFFER),
+ ITEM(DESCRIPTORS),
+ ITEM(BORDER_COLORS),
+ ITEM(SAMPLER_BUFFER),
+ ITEM(VERTEX_BUFFER),
+ ITEM(SHADER_RW_BUFFER),
+ ITEM(RINGS_STREAMOUT),
+ ITEM(SCRATCH_BUFFER),
+ ITEM(COMPUTE_GLOBAL),
+ ITEM(SAMPLER_TEXTURE),
+ ITEM(SHADER_RW_IMAGE),
+ ITEM(SAMPLER_TEXTURE_MSAA),
+ ITEM(COLOR_BUFFER),
+ ITEM(DEPTH_BUFFER),
+ ITEM(COLOR_BUFFER_MSAA),
+ ITEM(DEPTH_BUFFER_MSAA),
+ ITEM(CMASK),
+ ITEM(DCC),
+ ITEM(HTILE),
+ };
+#undef ITEM
+
+ assert(priority < ARRAY_SIZE(table));
+ return table[priority];
+}
+
+static int bo_list_compare_va(const struct radeon_bo_list_item *a,
+ const struct radeon_bo_list_item *b)
+{
+ return a->vm_address < b->vm_address ? -1 :
+ a->vm_address > b->vm_address ? 1 : 0;
+}
+
+static void si_dump_last_bo_list(struct si_context *sctx, FILE *f)
+{
+ unsigned i,j;
+
+ if (!sctx->last_bo_list)
+ return;
+
+ /* Sort the list according to VM adddresses first. */
+ qsort(sctx->last_bo_list, sctx->last_bo_count,
+ sizeof(sctx->last_bo_list[0]), (void*)bo_list_compare_va);
+
+ fprintf(f, "Buffer list (in units of pages = 4kB):\n"
+ COLOR_YELLOW " Size VM start page "
+ "VM end page Usage" COLOR_RESET "\n");
+
+ for (i = 0; i < sctx->last_bo_count; i++) {
+ /* Note: Buffer sizes are expected to be aligned to 4k by the winsys. */
+ const unsigned page_size = 4096;
+ uint64_t va = sctx->last_bo_list[i].vm_address;
+ uint64_t size = sctx->last_bo_list[i].buf->size;
+ bool hit = false;
+
+ /* If there's unused virtual memory between 2 buffers, print it. */
+ if (i) {
+ uint64_t previous_va_end = sctx->last_bo_list[i-1].vm_address +
+ sctx->last_bo_list[i-1].buf->size;
+
+ if (va > previous_va_end) {
+ fprintf(f, " %10"PRIu64" -- hole --\n",
+ (va - previous_va_end) / page_size);
+ }
+ }
+
+ /* Print the buffer. */
+ fprintf(f, " %10"PRIu64" 0x%013"PRIx64" 0x%013"PRIx64" ",
+ size / page_size, va / page_size, (va + size) / page_size);
+
+ /* Print the usage. */
+ for (j = 0; j < 64; j++) {
+ if (!(sctx->last_bo_list[i].priority_usage & (1llu << j)))
+ continue;
+
+ fprintf(f, "%s%s", !hit ? "" : ", ", priority_to_string(j));
+ hit = true;
+ }
+ fprintf(f, "\n");
+ }
+ fprintf(f, "\nNote: The holes represent memory not used by the IB.\n"
+ " Other buffers can still be allocated there.\n\n");
+
+ for (i = 0; i < sctx->last_bo_count; i++)
+ pb_reference(&sctx->last_bo_list[i].buf, NULL);
+ free(sctx->last_bo_list);
+ sctx->last_bo_list = NULL;
+}
+
static void si_dump_debug_state(struct pipe_context *ctx, FILE *f,
unsigned flags)
{
si_dump_shader(sctx->gs_shader, "Geometry", f);
si_dump_shader(sctx->ps_shader, "Fragment", f);
- if (sctx->last_ib) {
- int last_trace_id = -1;
+ si_dump_last_bo_list(sctx, f);
+ si_dump_last_ib(sctx, f);
- if (sctx->last_trace_buf) {
- /* We are expecting that the ddebug pipe has already
- * waited for the context, so this buffer should be idle.
- * If the GPU is hung, there is no point in waiting for it.
- */
- uint32_t *map =
- sctx->b.ws->buffer_map(sctx->last_trace_buf->cs_buf,
- NULL,
- PIPE_TRANSFER_UNSYNCHRONIZED |
- PIPE_TRANSFER_READ);
- if (map)
- last_trace_id = *map;
+ fprintf(f, "Done.\n");
+}
+
+static bool si_vm_fault_occured(struct si_context *sctx, uint32_t *out_addr)
+{
+ char line[2000];
+ unsigned sec, usec;
+ int progress = 0;
+ uint64_t timestamp = 0;
+ bool fault = false;
+
+ FILE *p = popen("dmesg", "r");
+ if (!p)
+ return false;
+
+ while (fgets(line, sizeof(line), p)) {
+ char *msg, len;
+
+ /* Get the timestamp. */
+ if (sscanf(line, "[%u.%u]", &sec, &usec) != 2) {
+ assert(0);
+ continue;
}
+ timestamp = sec * 1000000llu + usec;
+
+ /* If just updating the timestamp. */
+ if (!out_addr)
+ continue;
+
+ /* Process messages only if the timestamp is newer. */
+ if (timestamp <= sctx->dmesg_timestamp)
+ continue;
+
+ /* Only process the first VM fault. */
+ if (fault)
+ continue;
+
+ /* Remove trailing \n */
+ len = strlen(line);
+ if (len && line[len-1] == '\n')
+ line[len-1] = 0;
+
+ /* Get the message part. */
+ msg = strchr(line, ']');
+ if (!msg) {
+ assert(0);
+ continue;
+ }
+ msg++;
- si_parse_ib(f, sctx->last_ib, sctx->last_ib_dw_size,
- last_trace_id);
- free(sctx->last_ib); /* dump only once */
- sctx->last_ib = NULL;
- r600_resource_reference(&sctx->last_trace_buf, NULL);
+ switch (progress) {
+ case 0:
+ if (strstr(msg, "GPU fault detected:"))
+ progress = 1;
+ break;
+ case 1:
+ msg = strstr(msg, "VM_CONTEXT1_PROTECTION_FAULT_ADDR");
+ if (msg) {
+ msg = strstr(msg, "0x");
+ if (msg) {
+ msg += 2;
+ if (sscanf(msg, "%X", out_addr) == 1)
+ fault = true;
+ }
+ }
+ progress = 0;
+ break;
+ default:
+ progress = 0;
+ }
}
+ pclose(p);
- fprintf(f, "Done.\n");
+ if (timestamp > sctx->dmesg_timestamp)
+ sctx->dmesg_timestamp = timestamp;
+ return fault;
+}
+
+void si_check_vm_faults(struct si_context *sctx)
+{
+ struct pipe_screen *screen = sctx->b.b.screen;
+ FILE *f;
+ uint32_t addr;
+
+ /* Use conservative timeout 800ms, after which we won't wait any
+ * longer and assume the GPU is hung.
+ */
+ screen->fence_finish(screen, sctx->last_gfx_fence, 800*1000*1000);
+
+ if (!si_vm_fault_occured(sctx, &addr))
+ return;
+
+ f = dd_get_debug_file();
+ if (!f)
+ return;
+
+ fprintf(f, "VM fault report.\n\n");
+ fprintf(f, "Driver vendor: %s\n", screen->get_vendor(screen));
+ fprintf(f, "Device vendor: %s\n", screen->get_device_vendor(screen));
+ fprintf(f, "Device name: %s\n\n", screen->get_name(screen));
+ fprintf(f, "Failing VM page: 0x%08x\n\n", addr);
+
+ si_dump_last_bo_list(sctx, f);
+ si_dump_last_ib(sctx, f);
+ fclose(f);
+
+ fprintf(stderr, "Detected a VM fault, exiting...\n");
+ exit(0);
}
void si_init_debug_functions(struct si_context *sctx)
{
sctx->b.b.dump_debug_state = si_dump_debug_state;
+
+ /* Set the initial dmesg timestamp for this context, so that
+ * only new messages will be checked for VM faults.
+ */
+ if (sctx->screen->b.debug_flags & DBG_CHECK_VM)
+ si_vm_fault_occured(sctx, NULL);
}
util_memcpy_cpu_to_le32(ptr, desc->list, list_size);
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, desc->buffer,
- RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
+ RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
desc->list_dirty = false;
desc->pointer_dirty = true;
si_release_descriptors(&views->desc);
}
-static enum radeon_bo_priority si_get_resource_ro_priority(struct r600_resource *res)
-{
- if (res->b.b.target == PIPE_BUFFER)
- return RADEON_PRIO_SHADER_BUFFER_RO;
-
- if (res->b.b.nr_samples > 1)
- return RADEON_PRIO_SHADER_TEXTURE_MSAA;
-
- return RADEON_PRIO_SHADER_TEXTURE_RO;
-}
-
static void si_sampler_views_begin_new_cs(struct si_context *sctx,
struct si_sampler_views *views)
{
uint64_t mask = views->desc.enabled_mask;
- /* Add relocations to the CS. */
+ /* Add buffers to the CS. */
while (mask) {
int i = u_bit_scan64(&mask);
struct si_sampler_view *rview =
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
rview->resource, RADEON_USAGE_READ,
- si_get_resource_ro_priority(rview->resource));
+ r600_get_sampler_view_priority(rview->resource));
}
if (!views->desc.buffer)
return;
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, views->desc.buffer,
- RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA);
+ RADEON_USAGE_READWRITE, RADEON_PRIO_DESCRIPTORS);
}
static void si_set_sampler_view(struct si_context *sctx, unsigned shader,
if (rview->resource)
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
rview->resource, RADEON_USAGE_READ,
- si_get_resource_ro_priority(rview->resource));
+ r600_get_sampler_view_priority(rview->resource));
pipe_sampler_view_reference(&views->views[slot], view);
memcpy(views->desc.list + slot*8, view_desc, 8*4);
if (!states->desc.buffer)
return;
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, states->desc.buffer,
- RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA);
+ RADEON_USAGE_READWRITE, RADEON_PRIO_DESCRIPTORS);
}
static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader,
{
uint64_t mask = buffers->desc.enabled_mask;
- /* Add relocations to the CS. */
+ /* Add buffers to the CS. */
while (mask) {
int i = u_bit_scan64(&mask);
return;
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
buffers->desc.buffer, RADEON_USAGE_READWRITE,
- RADEON_PRIO_SHADER_DATA);
+ RADEON_PRIO_DESCRIPTORS);
}
/* VERTEX BUFFERS */
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
(struct r600_resource*)sctx->vertex_buffer[vb].buffer,
- RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO);
+ RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER);
}
if (!desc->buffer)
return;
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
desc->buffer, RADEON_USAGE_READ,
- RADEON_PRIO_SHADER_DATA);
+ RADEON_PRIO_DESCRIPTORS);
}
static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
desc->buffer, RADEON_USAGE_READ,
- RADEON_PRIO_SHADER_DATA);
+ RADEON_PRIO_DESCRIPTORS);
assert(count <= SI_NUM_VERTEX_BUFFERS);
if (!bound[ve->vertex_buffer_index]) {
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
(struct r600_resource*)vb->buffer,
- RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO);
+ RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER);
bound[ve->vertex_buffer_index] = true;
}
}
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
rbuffer, RADEON_USAGE_READ,
- RADEON_PRIO_SHADER_BUFFER_RO);
+ RADEON_PRIO_SAMPLER_BUFFER);
}
}
}
for (i = 0; i < SI_NUM_SHADERS; i++) {
si_init_buffer_resources(&sctx->const_buffers[i],
SI_NUM_CONST_BUFFERS, SI_SGPR_CONST,
- RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO);
+ RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER);
si_init_buffer_resources(&sctx->rw_buffers[i],
SI_NUM_RW_BUFFERS, SI_SGPR_RW_BUFFERS,
- RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RESOURCE_RW);
+ RADEON_USAGE_READWRITE, RADEON_PRIO_RINGS_STREAMOUT);
si_init_descriptors(&sctx->samplers[i].views.desc,
SI_SGPR_RESOURCE, 8, SI_NUM_SAMPLER_VIEWS);
r600_need_dma_space(&ctx->b, ncopy * 5);
radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
- RADEON_PRIO_MIN);
+ RADEON_PRIO_SDMA_BUFFER);
radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
- RADEON_PRIO_MIN);
+ RADEON_PRIO_SDMA_BUFFER);
for (i = 0; i < ncopy; i++) {
csize = size < max_csize ? size : max_csize;
r600_need_dma_space(&ctx->b, ncopy * 9);
radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, &rsrc->resource,
- RADEON_USAGE_READ, RADEON_PRIO_MIN);
+ RADEON_USAGE_READ, RADEON_PRIO_SDMA_TEXTURE);
radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, &rdst->resource,
- RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
+ RADEON_USAGE_WRITE, RADEON_PRIO_SDMA_TEXTURE);
for (i = 0; i < ncopy; i++) {
cheight = copy_height;
goto fallback;
if (src->format != dst->format || src_box->depth > 1 ||
- rdst->dirty_level_mask != 0 ||
+ (rdst->dirty_level_mask | rdst->stencil_dirty_level_mask) & (1 << dst_level) ||
rdst->cmask.size || rdst->fmask.size ||
rsrc->cmask.size || rsrc->fmask.size) {
goto fallback;
}
- if (rsrc->dirty_level_mask) {
+ if (rsrc->dirty_level_mask & (1 << src_level)) {
ctx->flush_resource(ctx, src);
}
struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
/* There are two memory usage counters in the winsys for all buffers
- * that have been added (cs_add_reloc) and two counters in the pipe
+ * that have been added (cs_add_buffer) and two counters in the pipe
* driver for those that haven't been added yet.
*/
if (unlikely(!ctx->b.ws->cs_memory_below_limit(ctx->b.rings.gfx.cs,
if (ctx->trace_buf)
si_trace_emit(ctx);
- /* Save the IB for debug contexts. */
if (ctx->is_debug) {
+ unsigned i;
+
+ /* Save the IB for debug contexts. */
free(ctx->last_ib);
ctx->last_ib_dw_size = cs->cdw;
ctx->last_ib = malloc(cs->cdw * 4);
memcpy(ctx->last_ib, cs->buf, cs->cdw * 4);
r600_resource_reference(&ctx->last_trace_buf, ctx->trace_buf);
r600_resource_reference(&ctx->trace_buf, NULL);
+
+ /* Save the buffer list. */
+ if (ctx->last_bo_list) {
+ for (i = 0; i < ctx->last_bo_count; i++)
+ pb_reference(&ctx->last_bo_list[i].buf, NULL);
+ free(ctx->last_bo_list);
+ }
+ ctx->last_bo_count = ws->cs_get_buffer_list(cs, NULL);
+ ctx->last_bo_list = calloc(ctx->last_bo_count,
+ sizeof(ctx->last_bo_list[0]));
+ ws->cs_get_buffer_list(cs, ctx->last_bo_list);
}
/* Flush the CS. */
if (fence)
ws->fence_reference(fence, ctx->last_gfx_fence);
+ /* Check VM faults if needed. */
+ if (ctx->screen->b.debug_flags & DBG_CHECK_VM)
+ si_check_vm_faults(ctx);
+
si_begin_new_cs(ctx);
}
si_mark_atom_dirty(ctx, &ctx->db_render_state);
si_mark_atom_dirty(ctx, &ctx->stencil_ref.atom);
si_mark_atom_dirty(ctx, &ctx->spi_map);
+ si_mark_atom_dirty(ctx, &ctx->spi_ps_input);
si_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom);
si_all_descriptors_begin_new_cs(ctx);
r600_resource_reference(&sctx->trace_buf, NULL);
r600_resource_reference(&sctx->last_trace_buf, NULL);
free(sctx->last_ib);
+ if (sctx->last_bo_list) {
+ for (i = 0; i < sctx->last_bo_count; i++)
+ pb_reference(&sctx->last_bo_list[i].buf, NULL);
+ free(sctx->last_bo_list);
+ }
FREE(sctx);
}
if (sctx == NULL)
return NULL;
+ if (sscreen->b.debug_flags & DBG_CHECK_VM)
+ flags |= PIPE_CONTEXT_DEBUG;
+
sctx->b.b.screen = screen; /* this must be set first */
sctx->b.b.priv = priv;
sctx->b.b.destroy = si_destroy_context;
case PIPE_CAP_TEXTURE_QUERY_LOD:
case PIPE_CAP_TEXTURE_GATHER_SM5:
case PIPE_CAP_TGSI_TXQS:
+ case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
return 1;
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
* [4..7] = buffer descriptor */
uint32_t state[8];
uint32_t fmask_state[8];
+ bool is_stencil_sampler;
};
struct si_sampler_state {
struct si_viewports viewports;
struct si_stencil_ref stencil_ref;
struct r600_atom spi_map;
+ struct r600_atom spi_ps_input;
/* Precomputed states. */
struct si_pm4_state *init_config;
+ bool init_config_has_vgt_flush;
struct si_pm4_state *vgt_shader_config[4];
/* With rasterizer discard, there doesn't have to be a pixel shader.
* In that case, we bind this one: */
struct si_vertex_element *vertex_elements;
unsigned sprite_coord_enable;
bool flatshade;
+ bool force_persample_interp;
/* shader descriptors */
struct si_descriptors vertex_buffers;
bool dbcb_depth_copy_enabled;
bool dbcb_stencil_copy_enabled;
unsigned dbcb_copy_sample;
- bool db_inplace_flush_enabled;
+ bool db_flush_depth_inplace;
+ bool db_flush_stencil_inplace;
bool db_depth_clear;
bool db_depth_disable_expclear;
unsigned ps_db_shader_control;
struct r600_resource *last_trace_buf;
struct r600_resource *trace_buf;
unsigned trace_id;
+ uint64_t dmesg_timestamp;
+ unsigned last_bo_count;
+ struct radeon_bo_list_item *last_bo_list;
};
/* cik_sdma.c */
/* si_debug.c */
void si_init_debug_functions(struct si_context *sctx);
+void si_check_vm_faults(struct si_context *sctx);
/* si_dma.c */
void si_dma_copy(struct pipe_context *ctx,
struct r600_resource *ib = state->indirect_buffer;
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, ib,
- RADEON_USAGE_READ, RADEON_PRIO_MIN);
+ RADEON_USAGE_READ,
+ RADEON_PRIO_IB2);
radeon_emit(cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
radeon_emit(cs, ib->gpu_address);
}
}
+/* This shouldn't be used by explicit INTERP opcodes. */
+static LLVMValueRef get_interp_param(struct si_shader_context *si_shader_ctx,
+ unsigned param)
+{
+ struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm;
+ unsigned sample_param = 0;
+ LLVMValueRef default_ij, sample_ij, force_sample;
+
+ default_ij = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, param);
+
+ /* If the shader doesn't use center/centroid, just return the parameter.
+ *
+ * If the shader only uses one set of (i,j), "si_emit_spi_ps_input" can
+ * switch between center/centroid and sample without shader changes.
+ */
+ switch (param) {
+ case SI_PARAM_PERSP_CENTROID:
+ case SI_PARAM_PERSP_CENTER:
+ if (!si_shader_ctx->shader->selector->forces_persample_interp_for_persp)
+ return default_ij;
+
+ sample_param = SI_PARAM_PERSP_SAMPLE;
+ break;
+
+ case SI_PARAM_LINEAR_CENTROID:
+ case SI_PARAM_LINEAR_CENTER:
+ if (!si_shader_ctx->shader->selector->forces_persample_interp_for_linear)
+ return default_ij;
+
+ sample_param = SI_PARAM_LINEAR_SAMPLE;
+ break;
+
+ default:
+ return default_ij;
+ }
+
+ /* Otherwise, we have to select (i,j) based on a user data SGPR. */
+ sample_ij = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, sample_param);
+
+ /* TODO: this can be done more efficiently by switching between
+ * 2 prologs.
+ */
+ force_sample = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+ SI_PARAM_PS_STATE_BITS);
+ force_sample = LLVMBuildTrunc(gallivm->builder, force_sample,
+ LLVMInt1TypeInContext(gallivm->context), "");
+ return LLVMBuildSelect(gallivm->builder, force_sample,
+ sample_ij, default_ij, "");
+}
+
static void declare_input_fs(
struct radeon_llvm_context *radeon_bld,
unsigned input_index,
if (interp_param_idx == -1)
return;
else if (interp_param_idx)
- interp_param = LLVMGetParam(main_fn, interp_param_idx);
+ interp_param = get_interp_param(si_shader_ctx, interp_param_idx);
/* fs.constant returns the param from the middle vertex, so it's not
* really useful for flat shading. It's meant to be used for custom
case TGSI_PROCESSOR_FRAGMENT:
params[SI_PARAM_ALPHA_REF] = f32;
+ params[SI_PARAM_PS_STATE_BITS] = i32;
params[SI_PARAM_PRIM_MASK] = i32;
last_sgpr = SI_PARAM_PRIM_MASK;
params[SI_PARAM_PERSP_SAMPLE] = v2i32;
#define SI_SGPR_TCS_OUT_LAYOUT 9 /* TCS & TES only */
#define SI_SGPR_TCS_IN_LAYOUT 10 /* TCS only */
#define SI_SGPR_ALPHA_REF 8 /* PS only */
+#define SI_SGPR_PS_STATE_BITS 9 /* PS only */
#define SI_VS_NUM_USER_SGPR 12
#define SI_LS_NUM_USER_SGPR 13
#define SI_TES_NUM_USER_SGPR 10
#define SI_GS_NUM_USER_SGPR 8
#define SI_GSCOPY_NUM_USER_SGPR 4
-#define SI_PS_NUM_USER_SGPR 9
+#define SI_PS_NUM_USER_SGPR 10
/* LLVM function parameter indices */
#define SI_PARAM_RW_BUFFERS 0
/* PS only parameters */
#define SI_PARAM_ALPHA_REF 4
-#define SI_PARAM_PRIM_MASK 5
-#define SI_PARAM_PERSP_SAMPLE 6
-#define SI_PARAM_PERSP_CENTER 7
-#define SI_PARAM_PERSP_CENTROID 8
-#define SI_PARAM_PERSP_PULL_MODEL 9
-#define SI_PARAM_LINEAR_SAMPLE 10
-#define SI_PARAM_LINEAR_CENTER 11
-#define SI_PARAM_LINEAR_CENTROID 12
-#define SI_PARAM_LINE_STIPPLE_TEX 13
-#define SI_PARAM_POS_X_FLOAT 14
-#define SI_PARAM_POS_Y_FLOAT 15
-#define SI_PARAM_POS_Z_FLOAT 16
-#define SI_PARAM_POS_W_FLOAT 17
-#define SI_PARAM_FRONT_FACE 18
-#define SI_PARAM_ANCILLARY 19
-#define SI_PARAM_SAMPLE_COVERAGE 20
-#define SI_PARAM_POS_FIXED_PT 21
+/* Bits:
+ * 0: force_persample_interp
+ */
+#define SI_PARAM_PS_STATE_BITS 5
+#define SI_PARAM_PRIM_MASK 6
+#define SI_PARAM_PERSP_SAMPLE 7
+#define SI_PARAM_PERSP_CENTER 8
+#define SI_PARAM_PERSP_CENTROID 9
+#define SI_PARAM_PERSP_PULL_MODEL 10
+#define SI_PARAM_LINEAR_SAMPLE 11
+#define SI_PARAM_LINEAR_CENTER 12
+#define SI_PARAM_LINEAR_CENTROID 13
+#define SI_PARAM_LINE_STIPPLE_TEX 14
+#define SI_PARAM_POS_X_FLOAT 15
+#define SI_PARAM_POS_Y_FLOAT 16
+#define SI_PARAM_POS_Z_FLOAT 17
+#define SI_PARAM_POS_W_FLOAT 18
+#define SI_PARAM_FRONT_FACE 19
+#define SI_PARAM_ANCILLARY 20
+#define SI_PARAM_SAMPLE_COVERAGE 21
+#define SI_PARAM_POS_FIXED_PT 22
#define SI_NUM_PARAMS (SI_PARAM_POS_FIXED_PT + 1)
/* PIPE_SHADER_[VERTEX|FRAGMENT|...] */
unsigned type;
+ /* Whether the shader has to use a conditional assignment to
+ * choose between weights when emulating
+ * pipe_rasterizer_state::force_persample_interp.
+ * If false, "si_emit_spi_ps_input" will take care of it instead.
+ */
+ bool forces_persample_interp_for_persp;
+ bool forces_persample_interp_for_linear;
+
unsigned gs_output_prim;
unsigned gs_max_out_vertices;
unsigned gs_num_invocations;
rs->two_side = state->light_twoside;
rs->multisample_enable = state->multisample;
+ rs->force_persample_interp = state->force_persample_interp;
rs->clip_plane_enable = state->clip_plane_enable;
rs->line_stipple_enable = state->line_stipple_enable;
rs->poly_stipple_enable = state->poly_stipple_enable;
S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) |
S_028000_COPY_CENTROID(1) |
S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample));
- } else if (sctx->db_inplace_flush_enabled) {
+ } else if (sctx->db_flush_depth_inplace || sctx->db_flush_stencil_inplace) {
radeon_emit(cs,
- S_028000_DEPTH_COMPRESS_DISABLE(1) |
- S_028000_STENCIL_COMPRESS_DISABLE(1));
+ S_028000_DEPTH_COMPRESS_DISABLE(sctx->db_flush_depth_inplace) |
+ S_028000_STENCIL_COMPRESS_DISABLE(sctx->db_flush_stencil_inplace));
} else if (sctx->db_depth_clear) {
radeon_emit(cs, S_028000_DEPTH_CLEAR_ENABLE(1));
} else {
if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) {
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
tex->cmask_buffer, RADEON_USAGE_READWRITE,
- RADEON_PRIO_COLOR_META);
+ RADEON_PRIO_CMASK);
}
radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C,
if (zb->db_htile_data_base) {
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
rtex->htile_buffer, RADEON_USAGE_READWRITE,
- RADEON_PRIO_DEPTH_META);
+ RADEON_PRIO_HTILE);
}
radeon_set_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view);
pipe_resource_reference(&view->base.texture, texture);
view->resource = &tmp->resource;
+ if (state->format == PIPE_FORMAT_X24S8_UINT ||
+ state->format == PIPE_FORMAT_S8X24_UINT ||
+ state->format == PIPE_FORMAT_X32_S8X24_UINT ||
+ state->format == PIPE_FORMAT_S8_UINT)
+ view->is_stencil_sampler = true;
+
/* Buffer resource. */
if (texture->target == PIPE_BUFFER) {
unsigned stride, num_records;
if (sctx->b.chip_class >= CIK)
si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, border_color_va >> 40);
si_pm4_add_bo(pm4, sctx->border_color_buffer, RADEON_USAGE_READ,
- RADEON_PRIO_SHADER_DATA);
+ RADEON_PRIO_BORDER_COLORS);
si_pm4_upload_indirect_buffer(sctx, pm4);
sctx->init_config = pm4;
bool flatshade;
bool two_side;
bool multisample_enable;
+ bool force_persample_interp;
bool line_stipple_enable;
unsigned sprite_coord_enable;
unsigned pa_sc_line_stipple;
struct r600_atom *viewports;
struct r600_atom *stencil_ref;
struct r600_atom *spi_map;
+ struct r600_atom *spi_ps_input;
} s;
struct r600_atom *array[0];
};
if (sctx->scratch_buffer) {
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
sctx->scratch_buffer, RADEON_USAGE_READWRITE,
- RADEON_PRIO_SHADER_RESOURCE_RW);
+ RADEON_PRIO_SCRATCH_BUFFER);
}
sctx->emit_scratch_reloc = false;
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
t->buf_filled_size, RADEON_USAGE_READ,
- RADEON_PRIO_MIN);
+ RADEON_PRIO_SO_FILLED_SIZE);
}
/* draw packet */
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
(struct r600_resource *)info->indirect,
- RADEON_USAGE_READ, RADEON_PRIO_MIN);
+ RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT);
}
if (info->indexed) {
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
(struct r600_resource *)ib->buffer,
- RADEON_USAGE_READ, RADEON_PRIO_MIN);
+ RADEON_USAGE_READ, RADEON_PRIO_INDEX_BUFFER);
if (info->indirect) {
uint64_t indirect_va = r600_resource(info->indirect)->gpu_address;
}
}
- /* TODO: VI should read index buffers through TC, so this shouldn't be
- * needed on VI. */
- if (info->indexed && r600_resource(ib.buffer)->TC_L2_dirty) {
+ /* VI reads index buffers through TC L2. */
+ if (info->indexed && sctx->b.chip_class <= CIK &&
+ r600_resource(ib.buffer)->TC_L2_dirty) {
sctx->b.flags |= SI_CONTEXT_INV_TC_L2;
r600_resource(ib.buffer)->TC_L2_dirty = false;
}
struct r600_texture *rtex = (struct r600_texture *)surf->texture;
rtex->dirty_level_mask |= 1 << surf->u.tex.level;
+
+ if (rtex->surface.flags & RADEON_SURF_SBUFFER)
+ rtex->stencil_dirty_level_mask |= 1 << surf->u.tex.level;
}
if (sctx->framebuffer.compressed_cb_mask) {
struct pipe_surface *surf;
sctx->trace_id++;
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, sctx->trace_buf,
- RADEON_USAGE_READWRITE, RADEON_PRIO_MIN);
+ RADEON_USAGE_READWRITE, RADEON_PRIO_TRACE);
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
radeon_emit(cs, S_370_DST_SEL(V_370_MEMORY_SYNC) |
S_370_WR_CONFIRM(1) |
return;
va = shader->bo->gpu_address;
- si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
+ si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
/* We need at least 2 components for LS.
* VGPR0-3: (VertexID, RelAutoindex, ???, InstanceID). */
return;
va = shader->bo->gpu_address;
- si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
+ si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
num_user_sgprs = SI_TCS_NUM_USER_SGPR;
num_sgprs = shader->num_sgprs;
return;
va = shader->bo->gpu_address;
- si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
+ si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
if (shader->selector->type == PIPE_SHADER_VERTEX) {
vgpr_comp_cnt = shader->uses_instanceid ? 3 : 0;
S_028B90_ENABLE(gs_num_invocations > 0));
va = shader->bo->gpu_address;
- si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
+ si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
si_pm4_set_reg(pm4, R_00B220_SPI_SHADER_PGM_LO_GS, va >> 8);
si_pm4_set_reg(pm4, R_00B224_SPI_SHADER_PGM_HI_GS, va >> 40);
si_pm4_set_reg(pm4, R_028A84_VGT_PRIMITIVEID_EN, 0);
va = shader->bo->gpu_address;
- si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
+ si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
if (shader->is_gs_copy_shader) {
vgpr_comp_cnt = 0; /* only VertexID is needed for GS-COPY. */
struct si_pm4_state *pm4;
unsigned i, spi_ps_in_control;
unsigned num_sgprs, num_user_sgprs;
- unsigned spi_baryc_cntl = 0, spi_ps_input_ena;
+ unsigned spi_baryc_cntl = 0;
uint64_t va;
pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
S_0286D8_BC_OPTIMIZE_DISABLE(1);
si_pm4_set_reg(pm4, R_0286E0_SPI_BARYC_CNTL, spi_baryc_cntl);
- spi_ps_input_ena = shader->spi_ps_input_ena;
- /* we need to enable at least one of them, otherwise we hang the GPU */
- assert(G_0286CC_PERSP_SAMPLE_ENA(spi_ps_input_ena) ||
- G_0286CC_PERSP_CENTER_ENA(spi_ps_input_ena) ||
- G_0286CC_PERSP_CENTROID_ENA(spi_ps_input_ena) ||
- G_0286CC_PERSP_PULL_MODEL_ENA(spi_ps_input_ena) ||
- G_0286CC_LINEAR_SAMPLE_ENA(spi_ps_input_ena) ||
- G_0286CC_LINEAR_CENTER_ENA(spi_ps_input_ena) ||
- G_0286CC_LINEAR_CENTROID_ENA(spi_ps_input_ena) ||
- G_0286CC_LINE_STIPPLE_TEX_ENA(spi_ps_input_ena));
-
- si_pm4_set_reg(pm4, R_0286CC_SPI_PS_INPUT_ENA, spi_ps_input_ena);
- si_pm4_set_reg(pm4, R_0286D0_SPI_PS_INPUT_ADDR, spi_ps_input_ena);
si_pm4_set_reg(pm4, R_0286D8_SPI_PS_IN_CONTROL, spi_ps_in_control);
si_pm4_set_reg(pm4, R_028710_SPI_SHADER_Z_FORMAT, shader->spi_shader_z_format);
si_pm4_set_reg(pm4, R_02823C_CB_SHADER_MASK, shader->cb_shader_mask);
va = shader->bo->gpu_address;
- si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
+ si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
si_pm4_set_reg(pm4, R_00B020_SPI_SHADER_PGM_LO_PS, va >> 8);
si_pm4_set_reg(pm4, R_00B024_SPI_SHADER_PGM_HI_PS, va >> 40);
tgsi_scan_shader(state->tokens, &sel->info);
p_atomic_inc(&sscreen->b.num_shaders_created);
+ /* First set which opcode uses which (i,j) pair. */
+ if (sel->info.uses_persp_opcode_interp_centroid)
+ sel->info.uses_persp_centroid = true;
+
+ if (sel->info.uses_linear_opcode_interp_centroid)
+ sel->info.uses_linear_centroid = true;
+
+ if (sel->info.uses_persp_opcode_interp_offset ||
+ sel->info.uses_persp_opcode_interp_sample)
+ sel->info.uses_persp_center = true;
+
+ if (sel->info.uses_linear_opcode_interp_offset ||
+ sel->info.uses_linear_opcode_interp_sample)
+ sel->info.uses_linear_center = true;
+
+ /* Determine if the shader has to use a conditional assignment when
+ * emulating force_persample_interp.
+ */
+ sel->forces_persample_interp_for_persp =
+ sel->info.uses_persp_center +
+ sel->info.uses_persp_centroid +
+ sel->info.uses_persp_sample >= 2;
+
+ sel->forces_persample_interp_for_linear =
+ sel->info.uses_linear_center +
+ sel->info.uses_linear_centroid +
+ sel->info.uses_linear_sample >= 2;
+
switch (pipe_shader_type) {
case PIPE_SHADER_GEOMETRY:
sel->gs_output_prim =
assert(ps->nparam == num_written);
}
+static void si_emit_spi_ps_input(struct si_context *sctx, struct r600_atom *atom)
+{
+ struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+ struct si_shader *ps = sctx->ps_shader->current;
+ unsigned input_ena = ps->spi_ps_input_ena;
+
+ /* we need to enable at least one of them, otherwise we hang the GPU */
+ assert(G_0286CC_PERSP_SAMPLE_ENA(input_ena) ||
+ G_0286CC_PERSP_CENTER_ENA(input_ena) ||
+ G_0286CC_PERSP_CENTROID_ENA(input_ena) ||
+ G_0286CC_PERSP_PULL_MODEL_ENA(input_ena) ||
+ G_0286CC_LINEAR_SAMPLE_ENA(input_ena) ||
+ G_0286CC_LINEAR_CENTER_ENA(input_ena) ||
+ G_0286CC_LINEAR_CENTROID_ENA(input_ena) ||
+ G_0286CC_LINE_STIPPLE_TEX_ENA(input_ena));
+
+ if (sctx->force_persample_interp) {
+ unsigned num_persp = G_0286CC_PERSP_SAMPLE_ENA(input_ena) +
+ G_0286CC_PERSP_CENTER_ENA(input_ena) +
+ G_0286CC_PERSP_CENTROID_ENA(input_ena);
+ unsigned num_linear = G_0286CC_LINEAR_SAMPLE_ENA(input_ena) +
+ G_0286CC_LINEAR_CENTER_ENA(input_ena) +
+ G_0286CC_LINEAR_CENTROID_ENA(input_ena);
+
+ /* If only one set of (i,j) coordinates is used, we can disable
+ * CENTER/CENTROID, enable SAMPLE and it will load SAMPLE coordinates
+ * where CENTER/CENTROID are expected, effectively forcing per-sample
+ * interpolation.
+ */
+ if (num_persp == 1) {
+ input_ena &= C_0286CC_PERSP_CENTER_ENA;
+ input_ena &= C_0286CC_PERSP_CENTROID_ENA;
+ input_ena |= G_0286CC_PERSP_SAMPLE_ENA(1);
+ }
+ if (num_linear == 1) {
+ input_ena &= C_0286CC_LINEAR_CENTER_ENA;
+ input_ena &= C_0286CC_LINEAR_CENTROID_ENA;
+ input_ena |= G_0286CC_LINEAR_SAMPLE_ENA(1);
+ }
+
+ /* If at least 2 sets of coordinates are used, we can't use this
+ * trick and have to select SAMPLE using a conditional assignment
+ * in the shader with "force_persample_interp" being a shader constant.
+ */
+ }
+
+ radeon_set_context_reg_seq(cs, R_0286CC_SPI_PS_INPUT_ENA, 2);
+ radeon_emit(cs, input_ena);
+ radeon_emit(cs, input_ena);
+
+ if (ps->selector->forces_persample_interp_for_persp ||
+ ps->selector->forces_persample_interp_for_linear)
+ radeon_set_sh_reg(cs, R_00B030_SPI_SHADER_USER_DATA_PS_0 +
+ SI_SGPR_PS_STATE_BITS * 4,
+ sctx->force_persample_interp);
+}
+
+/**
+ * Writing CONFIG or UCONFIG VGT registers requires VGT_FLUSH before that.
+ */
+static void si_init_config_add_vgt_flush(struct si_context *sctx)
+{
+ if (sctx->init_config_has_vgt_flush)
+ return;
+
+ si_pm4_cmd_begin(sctx->init_config, PKT3_EVENT_WRITE);
+ si_pm4_cmd_add(sctx->init_config, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
+ si_pm4_cmd_end(sctx->init_config, false);
+ sctx->init_config_has_vgt_flush = true;
+}
+
/* Initialize state related to ESGS / GSVS ring buffers */
static void si_init_gs_rings(struct si_context *sctx)
{
return;
}
+ si_init_config_add_vgt_flush(sctx);
+
/* Append these registers to the init config state. */
if (sctx->b.chip_class >= CIK) {
if (sctx->b.chip_class >= VI) {
assert(((sctx->tf_ring->width0 / 4) & C_030938_SIZE) == 0);
+ si_init_config_add_vgt_flush(sctx);
+
/* Append these registers to the init config state. */
if (sctx->b.chip_class >= CIK) {
si_pm4_set_reg(sctx->init_config, R_030938_VGT_TF_RING_SIZE,
si_mark_atom_dirty(sctx, &sctx->spi_map);
}
+ if (si_pm4_state_changed(sctx, ps) ||
+ sctx->force_persample_interp != rs->force_persample_interp) {
+ sctx->force_persample_interp = rs->force_persample_interp;
+ si_mark_atom_dirty(sctx, &sctx->spi_ps_input);
+ }
+
if (si_pm4_state_changed(sctx, ls) ||
si_pm4_state_changed(sctx, hs) ||
si_pm4_state_changed(sctx, es) ||
void si_init_shader_functions(struct si_context *sctx)
{
si_init_atom(sctx, &sctx->spi_map, &sctx->atoms.s.spi_map, si_emit_spi_map);
+ si_init_atom(sctx, &sctx->spi_ps_input, &sctx->atoms.s.spi_ps_input, si_emit_spi_ps_input);
sctx->b.b.create_vs_state = si_create_vs_state;
sctx->b.b.create_tcs_state = si_create_tcs_state;
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
case PIPE_CAP_DEPTH_BOUNDS_TEST:
case PIPE_CAP_TGSI_TXQS:
+ case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
return 0;
}
/* should only get here on unhandled cases */
} else {
FREE(transfer);
}
-
+
return map;
}
struct svga_screen *ss = svga_screen(pipe->screen);
struct svga_context *svga = svga_context(pipe);
struct svga_buffer *sbuf = svga_buffer(transfer->resource);
-
+
pipe_mutex_lock(ss->swc_mutex);
-
+
assert(sbuf->map.count);
if (sbuf->map.count) {
--sbuf->map.count;
*/
SVGA_DBG(DEBUG_DMA, "flushing the whole buffer\n");
-
+
sbuf->dma.flags.discard = TRUE;
svga_buffer_add_range(sbuf, 0, sbuf->b.b.width0);
struct svga_buffer *sbuf = svga_buffer( buf );
assert(!p_atomic_read(&buf->reference.count));
-
+
assert(!sbuf->dma.pending);
- if(sbuf->handle)
+ if (sbuf->handle)
svga_buffer_destroy_host_surface(ss, sbuf);
-
- if(sbuf->uploaded.buffer)
+
+ if (sbuf->uploaded.buffer)
pipe_resource_reference(&sbuf->uploaded.buffer, NULL);
- if(sbuf->hwbuf)
+ if (sbuf->hwbuf)
svga_buffer_destroy_hw_storage(ss, sbuf);
-
- if(sbuf->swbuf && !sbuf->user)
+
+ if (sbuf->swbuf && !sbuf->user)
align_free(sbuf->swbuf);
-
+
ss->total_resource_bytes -= sbuf->size;
FREE(sbuf);
}
-struct u_resource_vtbl svga_buffer_vtbl =
+struct u_resource_vtbl svga_buffer_vtbl =
{
u_default_resource_get_handle, /* get_handle */
svga_buffer_destroy, /* resource_destroy */
{
struct svga_screen *ss = svga_screen(screen);
struct svga_buffer *sbuf;
-
+
sbuf = CALLOC_STRUCT(svga_buffer);
- if(!sbuf)
+ if (!sbuf)
goto error1;
-
+
sbuf->b.b = *template;
sbuf->b.vtbl = &svga_buffer_vtbl;
pipe_reference_init(&sbuf->b.b.reference, 1);
}
}
- if(svga_buffer_needs_hw_storage(template->bind)) {
+ if (svga_buffer_needs_hw_storage(template->bind)) {
/* If the buffer will be used for vertex/index/stream data, set all
* the flags so that the buffer will be accepted for all those uses.
sbuf->bind_flags |= PIPE_BIND_STREAM_OUTPUT;
}
- if(svga_buffer_create_host_surface(ss, sbuf) != PIPE_OK)
+ if (svga_buffer_create_host_surface(ss, sbuf) != PIPE_OK)
goto error2;
}
else {
sbuf->swbuf = align_malloc(sbuf->b.b.width0, 64);
- if(!sbuf->swbuf)
+ if (!sbuf->swbuf)
goto error2;
}
-
+
debug_reference(&sbuf->b.b.reference,
(debug_reference_descriptor)debug_describe_resource, 0);
sbuf->size = util_resource_size(&sbuf->b.b);
ss->total_resource_bytes += sbuf->size;
- return &sbuf->b.b;
+ return &sbuf->b.b;
error2:
FREE(sbuf);
return NULL;
}
+
struct pipe_resource *
svga_user_buffer_create(struct pipe_screen *screen,
void *ptr,
unsigned bind)
{
struct svga_buffer *sbuf;
-
+
sbuf = CALLOC_STRUCT(svga_buffer);
- if(!sbuf)
+ if (!sbuf)
goto no_sbuf;
-
+
pipe_reference_init(&sbuf->b.b.reference, 1);
sbuf->b.vtbl = &svga_buffer_vtbl;
sbuf->b.b.screen = screen;
debug_reference(&sbuf->b.b.reference,
(debug_reference_descriptor)debug_describe_resource, 0);
-
- return &sbuf->b.b;
+
+ return &sbuf->b.b;
no_sbuf:
return NULL;
{
char res[128];
debug_describe_resource(res, sv->texture);
- util_sprintf(buf, "svga_sampler_view<%s,[%u,%u]>", res, sv->min_lod, sv->max_lod);
+ util_sprintf(buf, "svga_sampler_view<%s,[%u,%u]>",
+ res, sv->min_lod, sv->max_lod);
}
+
struct svga_sampler_view *
svga_get_tex_sampler_view(struct pipe_context *pipe,
struct pipe_resource *pt,
{
struct svga_context *svga = svga_context(pipe);
struct svga_screen *ss = svga_screen(pipe->screen);
- struct svga_texture *tex = svga_texture(pt);
+ struct svga_texture *tex = svga_texture(pt);
struct svga_sampler_view *sv = NULL;
SVGA3dSurfaceFlags flags = SVGA3D_SURFACE_HINT_TEXTURE;
- SVGA3dSurfaceFormat format = svga_translate_format(ss, pt->format, PIPE_BIND_SAMPLER_VIEW);
+ SVGA3dSurfaceFormat format = svga_translate_format(ss, pt->format,
+ PIPE_BIND_SAMPLER_VIEW);
boolean view = TRUE;
assert(pt);
sv->key.cachable = 0;
sv->handle = tex->handle;
debug_reference(&sv->reference,
- (debug_reference_descriptor)svga_debug_describe_sampler_view, 0);
+ (debug_reference_descriptor)
+ svga_debug_describe_sampler_view, 0);
return sv;
}
pipe_mutex_unlock(ss->tex_mutex);
debug_reference(&sv->reference,
- (debug_reference_descriptor)svga_debug_describe_sampler_view, 0);
+ (debug_reference_descriptor)
+ svga_debug_describe_sampler_view, 0);
return sv;
}
+
void
-svga_validate_sampler_view(struct svga_context *svga, struct svga_sampler_view *v)
+svga_validate_sampler_view(struct svga_context *svga,
+ struct svga_sampler_view *v)
{
struct svga_texture *tex = svga_texture(v->texture);
unsigned numFaces;
age = tex->age;
- if(tex->b.b.target == PIPE_TEXTURE_CUBE)
+ if (tex->b.b.target == PIPE_TEXTURE_CUBE)
numFaces = 6;
else
numFaces = 1;
v->age = age;
}
+
void
svga_destroy_sampler_view_priv(struct svga_sampler_view *v)
{
struct svga_texture *tex = svga_texture(v->texture);
- if(v->handle != tex->handle) {
+ if (v->handle != tex->handle) {
struct svga_screen *ss = svga_screen(v->texture->screen);
SVGA_DBG(DEBUG_DMA, "unref sid %p (sampler view)\n", v->handle);
svga_screen_surface_destroy(ss, &v->key, &v->handle);
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
case PIPE_CAP_DEPTH_BOUNDS_TEST:
case PIPE_CAP_TGSI_TXQS:
+ case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
return 0;
}
assert(size == 0);
}
+ if (size % 16 != 0) {
+ /* GL's buffer range sizes can be any number of bytes but the
+ * SVGA3D device requires a multiple of 16 bytes.
+ */
+ const unsigned total_size = buffer->b.b.width0;
+
+ if (offset + align(size, 16) <= total_size) {
+ /* round up size to multiple of 16 */
+ size = align(size, 16);
+ }
+ else {
+ /* round down to mulitple of 16 (this may cause rendering problems
+ * but should avoid a device error).
+ */
+ size &= ~16;
+ }
+ }
+
assert(size % 16 == 0);
ret = SVGA3D_vgpu10_SetSingleConstantBuffer(svga->swc,
index,
pipe_surface_reference(&vc4->framebuffer.cbufs[0], NULL);
pipe_surface_reference(&vc4->framebuffer.zsbuf, NULL);
+ pipe_surface_reference(&vc4->color_write, NULL);
+ pipe_surface_reference(&vc4->color_read, NULL);
+
vc4_program_fini(pctx);
ralloc_free(vc4);
/** How many variants of this program were compiled, for shader-db. */
uint32_t compiled_variant_count;
struct pipe_shader_state base;
- const struct tgsi_token *twoside_tokens;
};
struct vc4_ubo_range {
}
const struct tgsi_token *tokens = key->shader_state->base.tokens;
- if (c->fs_key && c->fs_key->light_twoside) {
- if (!key->shader_state->twoside_tokens) {
- const struct tgsi_lowering_config lowering_config = {
- .color_two_side = true,
- };
- struct tgsi_shader_info info;
- key->shader_state->twoside_tokens =
- tgsi_transform_lowering(&lowering_config,
- key->shader_state->base.tokens,
- &info);
-
- /* If no transformation occurred, then NULL is
- * returned and we just use our original tokens.
- */
- if (!key->shader_state->twoside_tokens) {
- key->shader_state->twoside_tokens =
- key->shader_state->base.tokens;
- }
- }
- tokens = key->shader_state->twoside_tokens;
- }
if (vc4_debug & VC4_DEBUG_TGSI) {
fprintf(stderr, "%s prog %d/%d TGSI:\n",
nir_convert_to_ssa(c->s);
if (stage == QSTAGE_FRAG)
vc4_nir_lower_blend(c);
+ if (c->fs_key && c->fs_key->light_twoside)
+ nir_lower_two_sided_color(c->s);
vc4_nir_lower_io(c);
nir_lower_idiv(c->s);
nir_lower_load_const_to_scalar(c->s);
hash_table_foreach(vc4->vs_cache, entry)
delete_from_cache_if_matches(vc4->vs_cache, entry, so);
- if (so->twoside_tokens != so->base.tokens)
- free((void *)so->twoside_tokens);
free((void *)so->base.tokens);
free(so);
}
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
case PIPE_CAP_DEPTH_BOUNDS_TEST:
case PIPE_CAP_TGSI_TXQS:
+ case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
return 0;
/* Stream output. */
{
for (int i = 0; i < exec->bo_count; i++) {
struct drm_gem_cma_object *obj = exec->bo[i];
- struct vc4_bo *bo = to_vc4_bo(&obj->base)->bo;
+ struct drm_vc4_bo *drm_bo = to_vc4_bo(&obj->base);
+ struct vc4_bo *bo = drm_bo->bo;
memcpy(bo->map, obj->vaddr, bo->size);
+ if (drm_bo->validated_shader) {
+ free(drm_bo->validated_shader->texture_samples);
+ free(drm_bo->validated_shader);
+ }
free(obj);
}
PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR,
PIPE_CAP_DEPTH_BOUNDS_TEST,
PIPE_CAP_TGSI_TXQS,
+ PIPE_CAP_FORCE_PERSAMPLE_INTERP,
};
#define PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50 (1 << 0)
unsigned point_tri_clip:1; /** large points clipped as tris or points */
unsigned point_size_per_vertex:1; /**< size computed in vertex shader */
unsigned multisample:1; /* XXX maybe more ms state in future */
+ unsigned force_persample_interp:1;
unsigned line_smooth:1;
unsigned line_stipple_enable:1;
unsigned line_last_pixel:1;
* may occur as the stvis->color_format.
*/
switch(format) {
- case PIPE_FORMAT_B8G8R8A8_UNORM:
+ case PIPE_FORMAT_BGRA8888_UNORM:
depth = 32;
break;
- case PIPE_FORMAT_B8G8R8X8_UNORM:
+ case PIPE_FORMAT_BGRX8888_UNORM:
depth = 24;
break;
case PIPE_FORMAT_B5G6R5_UNORM:
case PIPE_FORMAT_B5G6R5_UNORM:
image_format = __DRI_IMAGE_FORMAT_RGB565;
break;
- case PIPE_FORMAT_B8G8R8X8_UNORM:
+ case PIPE_FORMAT_BGRX8888_UNORM:
image_format = __DRI_IMAGE_FORMAT_XRGB8888;
break;
- case PIPE_FORMAT_B8G8R8A8_UNORM:
+ case PIPE_FORMAT_BGRA8888_UNORM:
image_format = __DRI_IMAGE_FORMAT_ARGB8888;
break;
- case PIPE_FORMAT_R8G8B8A8_UNORM:
+ case PIPE_FORMAT_RGBA8888_UNORM:
image_format = __DRI_IMAGE_FORMAT_ABGR8888;
break;
default:
switch (format) {
case 32:
- pf = PIPE_FORMAT_B8G8R8A8_UNORM;
+ pf = PIPE_FORMAT_BGRA8888_UNORM;
break;
case 24:
- pf = PIPE_FORMAT_B8G8R8X8_UNORM;
+ pf = PIPE_FORMAT_BGRX8888_UNORM;
break;
case 16:
pf = PIPE_FORMAT_Z16_UNORM;
pf = PIPE_FORMAT_B5G6R5_UNORM;
break;
case __DRI_IMAGE_FORMAT_XRGB8888:
- pf = PIPE_FORMAT_B8G8R8X8_UNORM;
+ pf = PIPE_FORMAT_BGRX8888_UNORM;
break;
case __DRI_IMAGE_FORMAT_ARGB8888:
- pf = PIPE_FORMAT_B8G8R8A8_UNORM;
+ pf = PIPE_FORMAT_BGRA8888_UNORM;
break;
case __DRI_IMAGE_FORMAT_ABGR8888:
- pf = PIPE_FORMAT_R8G8B8A8_UNORM;
+ pf = PIPE_FORMAT_RGBA8888_UNORM;
break;
default:
pf = PIPE_FORMAT_NONE;
pf = PIPE_FORMAT_B5G6R5_UNORM;
break;
case __DRI_IMAGE_FORMAT_XRGB8888:
- pf = PIPE_FORMAT_B8G8R8X8_UNORM;
+ pf = PIPE_FORMAT_BGRX8888_UNORM;
break;
case __DRI_IMAGE_FORMAT_ARGB8888:
- pf = PIPE_FORMAT_B8G8R8A8_UNORM;
+ pf = PIPE_FORMAT_BGRA8888_UNORM;
break;
case __DRI_IMAGE_FORMAT_ABGR8888:
- pf = PIPE_FORMAT_R8G8B8A8_UNORM;
+ pf = PIPE_FORMAT_RGBA8888_UNORM;
break;
default:
pf = PIPE_FORMAT_NONE;
}
struct dri2_fence {
+ struct dri_screen *driscreen;
struct pipe_fence_handle *pipe_fence;
void *cl_event;
};
return NULL;
}
+ fence->driscreen = dri_screen(_ctx->driScreenPriv);
return fence;
}
return NULL;
}
+ fence->driscreen = driscreen;
return fence;
}
dri2_client_wait_sync(__DRIcontext *_ctx, void *_fence, unsigned flags,
uint64_t timeout)
{
- struct dri_screen *driscreen = dri_screen(_ctx->driScreenPriv);
- struct pipe_screen *screen = driscreen->base.screen;
struct dri2_fence *fence = (struct dri2_fence*)_fence;
+ struct dri_screen *driscreen = fence->driscreen;
+ struct pipe_screen *screen = driscreen->base.screen;
/* No need to flush. The context was flushed when the fence was created. */
if (format == __DRI_TEXTURE_FORMAT_RGB) {
/* only need to cover the formats recognized by dri_fill_st_visual */
switch (internal_format) {
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- internal_format = PIPE_FORMAT_B8G8R8X8_UNORM;
+ case PIPE_FORMAT_BGRA8888_UNORM:
+ internal_format = PIPE_FORMAT_BGRX8888_UNORM;
break;
- case PIPE_FORMAT_A8R8G8B8_UNORM:
- internal_format = PIPE_FORMAT_X8R8G8B8_UNORM;
+ case PIPE_FORMAT_ARGB8888_UNORM:
+ internal_format = PIPE_FORMAT_XRGB8888_UNORM;
break;
default:
break;
#define OUT_CS(cs, value) (cs)->buf[(cs)->cdw++] = (value)
-int amdgpu_get_reloc(struct amdgpu_cs *cs, struct amdgpu_winsys_bo *bo)
+int amdgpu_lookup_buffer(struct amdgpu_cs *cs, struct amdgpu_winsys_bo *bo)
{
unsigned hash = bo->unique_id & (Elements(cs->buffer_indices_hashlist)-1);
int i = cs->buffer_indices_hashlist[hash];
if (i == -1 || cs->buffers[i].bo == bo)
return i;
- /* Hash collision, look for the BO in the list of relocs linearly. */
+ /* Hash collision, look for the BO in the list of buffers linearly. */
for (i = cs->num_buffers - 1; i >= 0; i--) {
if (cs->buffers[i].bo == bo) {
- /* Put this reloc in the hash list.
+ /* Put this buffer in the hash list.
* This will prevent additional hash collisions if there are
- * several consecutive get_reloc calls for the same buffer.
+ * several consecutive lookup_buffer calls for the same buffer.
*
* Example: Assuming buffers A,B,C collide in the hash list,
- * the following sequence of relocs:
+ * the following sequence of buffers:
* AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC
* will collide here: ^ and here: ^,
* meaning that we should get very few collisions in the end. */
return -1;
}
-static unsigned amdgpu_add_reloc(struct amdgpu_cs *cs,
+static unsigned amdgpu_add_buffer(struct amdgpu_cs *cs,
struct amdgpu_winsys_bo *bo,
enum radeon_bo_usage usage,
enum radeon_bo_domain domains,
unsigned priority,
enum radeon_bo_domain *added_domains)
{
- struct amdgpu_cs_buffer *reloc;
+ struct amdgpu_cs_buffer *buffer;
unsigned hash = bo->unique_id & (Elements(cs->buffer_indices_hashlist)-1);
int i = -1;
- priority = MIN2(priority, 15);
+ assert(priority < 64);
*added_domains = 0;
- i = amdgpu_get_reloc(cs, bo);
+ i = amdgpu_lookup_buffer(cs, bo);
if (i >= 0) {
- reloc = &cs->buffers[i];
- reloc->usage |= usage;
- *added_domains = domains & ~reloc->domains;
- reloc->domains |= domains;
- cs->flags[i] = MAX2(cs->flags[i], priority);
+ buffer = &cs->buffers[i];
+ buffer->priority_usage |= 1llu << priority;
+ buffer->usage |= usage;
+ *added_domains = domains & ~buffer->domains;
+ buffer->domains |= domains;
+ cs->flags[i] = MAX2(cs->flags[i], priority / 4);
return i;
}
- /* New relocation, check if the backing array is large enough. */
+ /* New buffer, check if the backing array is large enough. */
if (cs->num_buffers >= cs->max_num_buffers) {
uint32_t size;
cs->max_num_buffers += 10;
cs->flags = realloc(cs->flags, cs->max_num_buffers);
}
- /* Initialize the new relocation. */
+ /* Initialize the new buffer. */
cs->buffers[cs->num_buffers].bo = NULL;
amdgpu_winsys_bo_reference(&cs->buffers[cs->num_buffers].bo, bo);
cs->handles[cs->num_buffers] = bo->bo;
- cs->flags[cs->num_buffers] = priority;
+ cs->flags[cs->num_buffers] = priority / 4;
p_atomic_inc(&bo->num_cs_references);
- reloc = &cs->buffers[cs->num_buffers];
- reloc->bo = bo;
- reloc->usage = usage;
- reloc->domains = domains;
+ buffer = &cs->buffers[cs->num_buffers];
+ buffer->bo = bo;
+ buffer->priority_usage = 1llu << priority;
+ buffer->usage = usage;
+ buffer->domains = domains;
cs->buffer_indices_hashlist[hash] = cs->num_buffers;
return cs->num_buffers++;
}
-static unsigned amdgpu_cs_add_reloc(struct radeon_winsys_cs *rcs,
+static unsigned amdgpu_cs_add_buffer(struct radeon_winsys_cs *rcs,
struct radeon_winsys_cs_handle *buf,
enum radeon_bo_usage usage,
enum radeon_bo_domain domains,
struct amdgpu_cs *cs = amdgpu_cs(rcs);
struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
enum radeon_bo_domain added_domains;
- unsigned index = amdgpu_add_reloc(cs, bo, usage, bo->initial_domain,
+ unsigned index = amdgpu_add_buffer(cs, bo, usage, bo->initial_domain,
priority, &added_domains);
if (added_domains & RADEON_DOMAIN_GTT)
return index;
}
-static int amdgpu_cs_get_reloc(struct radeon_winsys_cs *rcs,
+static int amdgpu_cs_lookup_buffer(struct radeon_winsys_cs *rcs,
struct radeon_winsys_cs_handle *buf)
{
struct amdgpu_cs *cs = amdgpu_cs(rcs);
- return amdgpu_get_reloc(cs, (struct amdgpu_winsys_bo*)buf);
+ return amdgpu_lookup_buffer(cs, (struct amdgpu_winsys_bo*)buf);
}
static boolean amdgpu_cs_validate(struct radeon_winsys_cs *rcs)
return status;
}
+static unsigned amdgpu_cs_get_buffer_list(struct radeon_winsys_cs *rcs,
+ struct radeon_bo_list_item *list)
+{
+ struct amdgpu_cs *cs = amdgpu_cs(rcs);
+ int i;
+
+ if (list) {
+ for (i = 0; i < cs->num_buffers; i++) {
+ pb_reference(&list[i].buf, &cs->buffers[i].bo->base);
+ list[i].vm_address = cs->buffers[i].bo->va;
+ list[i].priority_usage = cs->buffers[i].priority_usage;
+ }
+ }
+ return cs->num_buffers;
+}
+
static void amdgpu_cs_do_submission(struct amdgpu_cs *cs,
struct pipe_fence_handle **out_fence)
{
fprintf(stderr, "amdgpu: command stream overflowed\n");
}
- amdgpu_cs_add_reloc(rcs, (void*)cs->big_ib_winsys_buffer,
- RADEON_USAGE_READ, 0, RADEON_PRIO_MIN);
+ amdgpu_cs_add_buffer(rcs, (void*)cs->big_ib_winsys_buffer,
+ RADEON_USAGE_READ, 0, RADEON_PRIO_IB1);
/* If the CS is not empty or overflowed.... */
if (cs->base.cdw && cs->base.cdw <= cs->base.max_dw && !debug_get_option_noop()) {
ws->base.ctx_query_reset_status = amdgpu_ctx_query_reset_status;
ws->base.cs_create = amdgpu_cs_create;
ws->base.cs_destroy = amdgpu_cs_destroy;
- ws->base.cs_add_reloc = amdgpu_cs_add_reloc;
- ws->base.cs_get_reloc = amdgpu_cs_get_reloc;
+ ws->base.cs_add_buffer = amdgpu_cs_add_buffer;
+ ws->base.cs_lookup_buffer = amdgpu_cs_lookup_buffer;
ws->base.cs_validate = amdgpu_cs_validate;
ws->base.cs_memory_below_limit = amdgpu_cs_memory_below_limit;
+ ws->base.cs_get_buffer_list = amdgpu_cs_get_buffer_list;
ws->base.cs_flush = amdgpu_cs_flush;
ws->base.cs_is_buffer_referenced = amdgpu_bo_is_referenced;
ws->base.cs_sync_flush = amdgpu_cs_sync_flush;
struct amdgpu_cs_buffer {
struct amdgpu_winsys_bo *bo;
+ uint64_t priority_usage;
enum radeon_bo_usage usage;
enum radeon_bo_domain domains;
};
struct amdgpu_cs_request request;
struct amdgpu_cs_ib_info ib;
- /* Relocs. */
+ /* Buffers. */
unsigned max_num_buffers;
unsigned num_buffers;
amdgpu_bo_handle *handles;
*rdst = rsrc;
}
-int amdgpu_get_reloc(struct amdgpu_cs *csc, struct amdgpu_winsys_bo *bo);
+int amdgpu_lookup_buffer(struct amdgpu_cs *csc, struct amdgpu_winsys_bo *bo);
static inline struct amdgpu_cs *
amdgpu_cs(struct radeon_winsys_cs *base)
{
int num_refs = bo->num_cs_references;
return num_refs == bo->rws->num_cs ||
- (num_refs && amdgpu_get_reloc(cs, bo) != -1);
+ (num_refs && amdgpu_lookup_buffer(cs, bo) != -1);
}
static inline boolean
if (!bo->num_cs_references)
return FALSE;
- index = amdgpu_get_reloc(cs, bo);
+ index = amdgpu_lookup_buffer(cs, bo);
if (index == -1)
return FALSE;
/*
This file replaces libdrm's radeon_cs_gem with our own implemention.
It's optimized specifically for Radeon DRM.
- Reloc writes and space checking are faster and simpler than their
+ Adding buffers and space checking are faster and simpler than their
counterparts in libdrm (the time complexity of all the functions
is O(1) in nearly all scenarios, thanks to hashing).
It works like this:
- cs_add_reloc(cs, buf, read_domain, write_domain) adds a new relocation and
+ cs_add_buffer(cs, buf, read_domain, write_domain) adds a new relocation and
also adds the size of 'buf' to the used_gart and used_vram winsys variables
based on the domains, which are simply or'd for the accounting purposes.
The adding is skipped if the reloc is already present in the list, but it
(done in the pipe driver)
cs_write_reloc(cs, buf) just writes a reloc that has been added using
- cs_add_reloc. The read_domain and write_domain parameters have been removed,
- because we already specify them in cs_add_reloc.
+ cs_add_buffer. The read_domain and write_domain parameters have been removed,
+ because we already specify them in cs_add_buffer.
*/
#include "radeon_drm_cs.h"
csc->fd = ws->fd;
csc->nrelocs = 512;
- csc->relocs_bo = (struct radeon_bo**)
- CALLOC(1, csc->nrelocs * sizeof(struct radeon_bo*));
+ csc->relocs_bo = (struct radeon_bo_item*)
+ CALLOC(1, csc->nrelocs * sizeof(csc->relocs_bo[0]));
if (!csc->relocs_bo) {
return FALSE;
}
unsigned i;
for (i = 0; i < csc->crelocs; i++) {
- p_atomic_dec(&csc->relocs_bo[i]->num_cs_references);
- radeon_bo_reference(&csc->relocs_bo[i], NULL);
+ p_atomic_dec(&csc->relocs_bo[i].bo->num_cs_references);
+ radeon_bo_reference(&csc->relocs_bo[i].bo, NULL);
}
csc->crelocs = 0;
reloc->flags = MAX2(reloc->flags, priority);
}
-int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo)
+int radeon_lookup_buffer(struct radeon_cs_context *csc, struct radeon_bo *bo)
{
unsigned hash = bo->handle & (Elements(csc->reloc_indices_hashlist)-1);
int i = csc->reloc_indices_hashlist[hash];
/* not found or found */
- if (i == -1 || csc->relocs_bo[i] == bo)
+ if (i == -1 || csc->relocs_bo[i].bo == bo)
return i;
/* Hash collision, look for the BO in the list of relocs linearly. */
for (i = csc->crelocs - 1; i >= 0; i--) {
- if (csc->relocs_bo[i] == bo) {
+ if (csc->relocs_bo[i].bo == bo) {
/* Put this reloc in the hash list.
* This will prevent additional hash collisions if there are
- * several consecutive get_reloc calls for the same buffer.
+ * several consecutive lookup_buffer calls for the same buffer.
*
* Example: Assuming buffers A,B,C collide in the hash list,
* the following sequence of relocs:
return -1;
}
-static unsigned radeon_add_reloc(struct radeon_drm_cs *cs,
+static unsigned radeon_add_buffer(struct radeon_drm_cs *cs,
struct radeon_bo *bo,
enum radeon_bo_usage usage,
enum radeon_bo_domain domains,
enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0;
int i = -1;
- priority = MIN2(priority, 15);
+ assert(priority < 64);
*added_domains = 0;
- i = radeon_get_reloc(csc, bo);
+ i = radeon_lookup_buffer(csc, bo);
if (i >= 0) {
reloc = &csc->relocs[i];
- update_reloc(reloc, rd, wd, priority, added_domains);
+ update_reloc(reloc, rd, wd, priority / 4, added_domains);
+ csc->relocs_bo[i].priority_usage |= 1llu << priority;
- /* For async DMA, every add_reloc call must add a buffer to the list
+ /* For async DMA, every add_buffer call must add a buffer to the list
* no matter how many duplicates there are. This is due to the fact
* the DMA CS checker doesn't use NOP packets for offset patching,
* but always uses the i-th buffer from the list to patch the i-th
uint32_t size;
csc->nrelocs += 10;
- size = csc->nrelocs * sizeof(struct radeon_bo*);
+ size = csc->nrelocs * sizeof(csc->relocs_bo[0]);
csc->relocs_bo = realloc(csc->relocs_bo, size);
size = csc->nrelocs * sizeof(struct drm_radeon_cs_reloc);
}
/* Initialize the new relocation. */
- csc->relocs_bo[csc->crelocs] = NULL;
- radeon_bo_reference(&csc->relocs_bo[csc->crelocs], bo);
+ csc->relocs_bo[csc->crelocs].bo = NULL;
+ csc->relocs_bo[csc->crelocs].priority_usage = 1llu << priority;
+ radeon_bo_reference(&csc->relocs_bo[csc->crelocs].bo, bo);
p_atomic_inc(&bo->num_cs_references);
reloc = &csc->relocs[csc->crelocs];
reloc->handle = bo->handle;
reloc->read_domains = rd;
reloc->write_domain = wd;
- reloc->flags = priority;
+ reloc->flags = priority / 4;
csc->reloc_indices_hashlist[hash] = csc->crelocs;
return csc->crelocs++;
}
-static unsigned radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs,
+static unsigned radeon_drm_cs_add_buffer(struct radeon_winsys_cs *rcs,
struct radeon_winsys_cs_handle *buf,
enum radeon_bo_usage usage,
enum radeon_bo_domain domains,
struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
struct radeon_bo *bo = (struct radeon_bo*)buf;
enum radeon_bo_domain added_domains;
- unsigned index = radeon_add_reloc(cs, bo, usage, domains, priority, &added_domains);
+ unsigned index = radeon_add_buffer(cs, bo, usage, domains, priority,
+ &added_domains);
if (added_domains & RADEON_DOMAIN_GTT)
cs->csc->used_gart += bo->base.size;
return index;
}
-static int radeon_drm_cs_get_reloc(struct radeon_winsys_cs *rcs,
+static int radeon_drm_cs_lookup_buffer(struct radeon_winsys_cs *rcs,
struct radeon_winsys_cs_handle *buf)
{
struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
- return radeon_get_reloc(cs->csc, (struct radeon_bo*)buf);
+ return radeon_lookup_buffer(cs->csc, (struct radeon_bo*)buf);
}
static boolean radeon_drm_cs_validate(struct radeon_winsys_cs *rcs)
if (status) {
cs->csc->validated_crelocs = cs->csc->crelocs;
} else {
- /* Remove lately-added relocations. The validation failed with them
+ /* Remove lately-added buffers. The validation failed with them
* and the CS is about to be flushed because of that. Keep only
- * the already-validated relocations. */
+ * the already-validated buffers. */
unsigned i;
for (i = cs->csc->validated_crelocs; i < cs->csc->crelocs; i++) {
- p_atomic_dec(&cs->csc->relocs_bo[i]->num_cs_references);
- radeon_bo_reference(&cs->csc->relocs_bo[i], NULL);
+ p_atomic_dec(&cs->csc->relocs_bo[i].bo->num_cs_references);
+ radeon_bo_reference(&cs->csc->relocs_bo[i].bo, NULL);
}
cs->csc->crelocs = cs->csc->validated_crelocs;
return gtt < cs->ws->info.gart_size * 0.7;
}
+static unsigned radeon_drm_cs_get_buffer_list(struct radeon_winsys_cs *rcs,
+ struct radeon_bo_list_item *list)
+{
+ struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
+ int i;
+
+ if (list) {
+ for (i = 0; i < cs->csc->crelocs; i++) {
+ pb_reference(&list[i].buf, &cs->csc->relocs_bo[i].bo->base);
+ list[i].vm_address = cs->csc->relocs_bo[i].bo->va;
+ list[i].priority_usage = cs->csc->relocs_bo[i].priority_usage;
+ }
+ }
+ return cs->csc->crelocs;
+}
+
void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_drm_cs *cs, struct radeon_cs_context *csc)
{
unsigned i;
}
for (i = 0; i < csc->crelocs; i++)
- p_atomic_dec(&csc->relocs_bo[i]->num_active_ioctls);
+ p_atomic_dec(&csc->relocs_bo[i].bo->num_active_ioctls);
radeon_cs_context_cleanup(csc);
}
for (i = 0; i < crelocs; i++) {
/* Update the number of active asynchronous CS ioctls for the buffer. */
- p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls);
+ p_atomic_inc(&cs->cst->relocs_bo[i].bo->num_active_ioctls);
}
switch (cs->base.ring_type) {
if (!bo->num_cs_references)
return FALSE;
- index = radeon_get_reloc(cs->csc, bo);
+ index = radeon_lookup_buffer(cs->csc, bo);
if (index == -1)
return FALSE;
fence = cs->ws->base.buffer_create(&cs->ws->base, 1, 1, TRUE,
RADEON_DOMAIN_GTT, 0);
/* Add the fence as a dummy relocation. */
- cs->ws->base.cs_add_reloc(rcs, cs->ws->base.buffer_get_cs_handle(fence),
+ cs->ws->base.cs_add_buffer(rcs, cs->ws->base.buffer_get_cs_handle(fence),
RADEON_USAGE_READWRITE, RADEON_DOMAIN_GTT,
- RADEON_PRIO_MIN);
+ RADEON_PRIO_FENCE);
return (struct pipe_fence_handle*)fence;
}
ws->base.ctx_destroy = radeon_drm_ctx_destroy;
ws->base.cs_create = radeon_drm_cs_create;
ws->base.cs_destroy = radeon_drm_cs_destroy;
- ws->base.cs_add_reloc = radeon_drm_cs_add_reloc;
- ws->base.cs_get_reloc = radeon_drm_cs_get_reloc;
+ ws->base.cs_add_buffer = radeon_drm_cs_add_buffer;
+ ws->base.cs_lookup_buffer = radeon_drm_cs_lookup_buffer;
ws->base.cs_validate = radeon_drm_cs_validate;
ws->base.cs_memory_below_limit = radeon_drm_cs_memory_below_limit;
+ ws->base.cs_get_buffer_list = radeon_drm_cs_get_buffer_list;
ws->base.cs_flush = radeon_drm_cs_flush;
ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced;
ws->base.cs_sync_flush = radeon_drm_cs_sync_flush;
#include "radeon_drm_bo.h"
+struct radeon_bo_item {
+ struct radeon_bo *bo;
+ uint64_t priority_usage;
+};
+
struct radeon_cs_context {
uint32_t buf[16 * 1024];
uint32_t cs_trace_id;
- /* Relocs. */
+ /* Buffers. */
unsigned nrelocs;
unsigned crelocs;
unsigned validated_crelocs;
- struct radeon_bo **relocs_bo;
+ struct radeon_bo_item *relocs_bo;
struct drm_radeon_cs_reloc *relocs;
+ uint64_t *priority_usage;
int reloc_indices_hashlist[512];
struct radeon_bo *trace_buf;
};
-int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo);
+int radeon_lookup_buffer(struct radeon_cs_context *csc, struct radeon_bo *bo);
static inline struct radeon_drm_cs *
radeon_drm_cs(struct radeon_winsys_cs *base)
{
int num_refs = bo->num_cs_references;
return num_refs == bo->rws->num_cs ||
- (num_refs && radeon_get_reloc(cs->csc, bo) != -1);
+ (num_refs && radeon_lookup_buffer(cs->csc, bo) != -1);
}
static inline boolean
if (!bo->num_cs_references)
return FALSE;
- index = radeon_get_reloc(cs->csc, bo);
+ index = radeon_lookup_buffer(cs->csc, bo);
if (index == -1)
return FALSE;
}
memset(&args, 0, sizeof(args));
- args.handle = csc->relocs_bo[0]->handle;
+ args.handle = csc->relocs_bo[0].bo->handle;
for (i = 0; i < RADEON_CS_DUMP_AFTER_MS_TIMEOUT; i++) {
usleep(1);
lockup = drmCommandWriteRead(csc->fd, DRM_RADEON_GEM_BUSY, &args, sizeof(args));
fprintf(dump, "\n");
for (i = 0; i < csc->crelocs; i++) {
- unsigned j, ndw = (csc->relocs_bo[i]->base.size + 3) >> 2;
+ unsigned j, ndw = (csc->relocs_bo[i].bo->base.size + 3) >> 2;
- ptr = radeon_bo_do_map(csc->relocs_bo[i]);
+ ptr = radeon_bo_do_map(csc->relocs_bo[i].bo);
if (ptr) {
fprintf(dump, "static uint32_t bo_%04d_data[%d] = {\n ", i, ndw);
for (j = 0; j < ndw; j++) {
if (j && !(j % 8)) {
uint32_t offset = (j - 8) << 2;
- fprintf(dump, " /* [0x%08x] va[0x%016"PRIx64"] */\n ", offset, offset + csc->relocs_bo[i]->va);
+ fprintf(dump, " /* [0x%08x] va[0x%016"PRIx64"] */\n ", offset, offset + csc->relocs_bo[i].bo->va);
}
fprintf(dump, " 0x%08x,", ptr[j]);
}
fprintf(dump, "\n");
for (i = 0; i < csc->crelocs; i++) {
- unsigned ndw = (csc->relocs_bo[i]->base.size + 3) >> 2;
+ unsigned ndw = (csc->relocs_bo[i].bo->base.size + 3) >> 2;
uint32_t *ptr;
- ptr = radeon_bo_do_map(csc->relocs_bo[i]);
+ ptr = radeon_bo_do_map(csc->relocs_bo[i].bo);
if (ptr) {
fprintf(dump, " bo[%d] = bo_new(&ctx, %d, bo_%04d_data, 0x%016"PRIx64", 0x%08x);\n",
- i, ndw, i, csc->relocs_bo[i]->va, csc->relocs_bo[i]->base.alignment);
+ i, ndw, i, csc->relocs_bo[i].bo->va, csc->relocs_bo[i].bo->base.alignment);
} else {
fprintf(dump, " bo[%d] = bo_new(&ctx, %d, NULL, 0x%016"PRIx64", 0x%08x);\n",
- i, ndw, csc->relocs_bo[i]->va, csc->relocs_bo[i]->base.alignment);
+ i, ndw, csc->relocs_bo[i].bo->va, csc->relocs_bo[i].bo->base.alignment);
}
}
fprintf(dump, "\n");
opt_constant_variable.cpp \
opt_copy_propagation.cpp \
opt_copy_propagation_elements.cpp \
- opt_cse.cpp \
opt_dead_builtin_variables.cpp \
opt_dead_builtin_varyings.cpp \
opt_dead_code.cpp \
const glsl_type *block_array_type =
process_array_type(&loc, block_type, this->array_specifier, state);
- /* From section 4.3.9 (Interface Blocks) of the GLSL ES 3.10 spec:
+ /* From section 4.3.9 (Interface Blocks) of the GLSL ES 3.10 spec:
*
* * Arrays of arrays of blocks are not allowed
*/
if (!state->default_shader_storage_qualifier->merge_qualifier(& @1, state, $1)) {
YYERROR;
}
+
+ /* From the GLSL 4.50 spec, section 4.4.5:
+ *
+ * "It is a compile-time error to specify the binding identifier for
+ * the global scope or for block member declarations."
+ */
+ if (state->default_shader_storage_qualifier->flags.q.explicit_binding) {
+ _mesa_glsl_error(& @1, state,
+ "binding qualifier cannot be set for default layout");
+ }
+
$$ = NULL;
}
progress = do_constant_variable_unlinked(ir) || progress;
progress = do_constant_folding(ir) || progress;
progress = do_minmax_prune(ir) || progress;
- progress = do_cse(ir) || progress;
progress = do_rebalance_tree(ir) || progress;
progress = do_algebraic(ir, native_integers, options) || progress;
progress = do_lower_jumps(ir) || progress;
unsigned int array_len;
if (this->is_array()) {
- element_type = this->fields.array;
- array_len = this->length;
+ element_type = this->without_array();
+ array_len = this->arrays_of_arrays_size();
} else {
element_type = this;
array_len = 1;
}
if (this->is_array()) {
- if (this->fields.array->is_record())
- return this->length * this->fields.array->std430_size(row_major);
+ if (this->without_array()->is_record())
+ return this->arrays_of_arrays_size() *
+ this->without_array()->std430_size(row_major);
else
- return this->length * this->fields.array->std430_base_alignment(row_major);
+ return this->arrays_of_arrays_size() *
+ this->without_array()->std430_base_alignment(row_major);
}
if (this->is_record() || this->is_interface()) {
bool do_copy_propagation(exec_list *instructions);
bool do_copy_propagation_elements(exec_list *instructions);
bool do_constant_propagation(exec_list *instructions);
-bool do_cse(exec_list *instructions);
void do_dead_builtin_varyings(struct gl_context *ctx,
gl_shader *producer, gl_shader *consumer,
unsigned num_tfeedback_decls,
*/
bool initialized;
- struct gl_opaque_uniform_index sampler[MESA_SHADER_STAGES];
-
- struct gl_opaque_uniform_index image[MESA_SHADER_STAGES];
-
- struct gl_opaque_uniform_index subroutine[MESA_SHADER_STAGES];
+ struct gl_opaque_uniform_index opaque[MESA_SHADER_STAGES];
/**
* Storage used by the driver for the uniform
if (shader) {
if (storage->type->base_type == GLSL_TYPE_SAMPLER &&
- storage->sampler[sh].active) {
+ storage->opaque[sh].active) {
for (unsigned i = 0; i < elements; i++) {
- const unsigned index = storage->sampler[sh].index + i;
+ const unsigned index = storage->opaque[sh].index + i;
shader->SamplerUnits[index] = storage->storage[i].i;
}
} else if (storage->type->base_type == GLSL_TYPE_IMAGE &&
- storage->image[sh].active) {
+ storage->opaque[sh].active) {
for (unsigned i = 0; i < elements; i++) {
- const unsigned index = storage->image[sh].index + i;
+ const unsigned index = storage->opaque[sh].index + i;
shader->ImageUnits[index] = storage->storage[i].i;
}
}
for (int sh = 0; sh < MESA_SHADER_STAGES; sh++) {
gl_shader *shader = prog->_LinkedShaders[sh];
- if (shader && storage->sampler[sh].active) {
- unsigned index = storage->sampler[sh].index;
+ if (shader && storage->opaque[sh].active) {
+ unsigned index = storage->opaque[sh].index;
shader->SamplerUnits[index] = storage->storage[0].i;
}
struct gl_uniform_storage *uniform, const char *name)
{
if (base_type->is_sampler()) {
- uniform->sampler[shader_type].active = true;
+ uniform->opaque[shader_type].active = true;
/* Handle multiple samplers inside struct arrays */
if (this->record_array_count > 1) {
/* In this case, we've already seen this uniform so we just use
* the next sampler index recorded the last time we visited.
*/
- uniform->sampler[shader_type].index = index;
- index = inner_array_size + uniform->sampler[shader_type].index;
+ uniform->opaque[shader_type].index = index;
+ index = inner_array_size + uniform->opaque[shader_type].index;
this->record_next_sampler->put(index, name_copy);
ralloc_free(name_copy);
* structs. This allows the offset to be easily calculated for
* indirect indexing.
*/
- uniform->sampler[shader_type].index = this->next_sampler;
+ uniform->opaque[shader_type].index = this->next_sampler;
this->next_sampler +=
inner_array_size * this->record_array_count;
/* Store the next index for future passes over the struct array
*/
- index = uniform->sampler[shader_type].index + inner_array_size;
+ index = uniform->opaque[shader_type].index + inner_array_size;
this->record_next_sampler->put(index, name_copy);
ralloc_free(name_copy);
}
/* Increment the sampler by 1 for non-arrays and by the number of
* array elements for arrays.
*/
- uniform->sampler[shader_type].index = this->next_sampler;
+ uniform->opaque[shader_type].index = this->next_sampler;
this->next_sampler += MAX2(1, uniform->array_elements);
}
const gl_texture_index target = base_type->sampler_index();
const unsigned shadow = base_type->sampler_shadow;
- for (unsigned i = uniform->sampler[shader_type].index;
+ for (unsigned i = uniform->opaque[shader_type].index;
i < MIN2(this->next_sampler, MAX_SAMPLERS);
i++) {
this->targets[i] = target;
this->shader_samplers_used |= 1U << i;
this->shader_shadow_samplers |= shadow << i;
}
- } else {
- uniform->sampler[shader_type].index = ~0;
- uniform->sampler[shader_type].active = false;
}
}
struct gl_uniform_storage *uniform)
{
if (base_type->is_image()) {
- uniform->image[shader_type].index = this->next_image;
- uniform->image[shader_type].active = true;
+ uniform->opaque[shader_type].index = this->next_image;
+ uniform->opaque[shader_type].active = true;
/* Increment the image index by 1 for non-arrays and by the
* number of array elements for arrays.
*/
this->next_image += MAX2(1, uniform->array_elements);
- } else {
- uniform->image[shader_type].index = ~0;
- uniform->image[shader_type].active = false;
}
}
struct gl_uniform_storage *uniform)
{
if (base_type->is_subroutine()) {
- uniform->subroutine[shader_type].index = this->next_subroutine;
- uniform->subroutine[shader_type].active = true;
+ uniform->opaque[shader_type].index = this->next_subroutine;
+ uniform->opaque[shader_type].active = true;
/* Increment the subroutine index by 1 for non-arrays and by the
* number of array elements for arrays.
*/
this->next_subroutine += MAX2(1, uniform->array_elements);
- } else {
- uniform->subroutine[shader_type].index = ~0;
- uniform->subroutine[shader_type].active = false;
}
}
base_type = type;
}
+ /* Initialise opaque data */
+ this->uniforms[id].opaque[shader_type].index = ~0;
+ this->uniforms[id].opaque[shader_type].active = false;
+
/* This assigns uniform indices to sampler and image uniforms. */
handle_samplers(base_type, &this->uniforms[id], name);
handle_images(base_type, &this->uniforms[id]);
handle_subroutines(base_type, &this->uniforms[id]);
/* For array of arrays or struct arrays the base location may have
- * already been set so dont set it again.
+ * already been set so don't set it again.
*/
if (ubo_block_index == -1 && current_var->data.location == -1) {
current_var->data.location = id;
this->explicit_location + field_counter;
field_counter += entries;
} else {
- this->uniforms[id].remap_location = this->explicit_location;
+ this->uniforms[id].remap_location = this->explicit_location;
}
} else {
/* Initialize to to indicate that no location is set */
if (type->without_array()->is_matrix()) {
const glsl_type *matrix = type->without_array();
const unsigned N = matrix->base_type == GLSL_TYPE_DOUBLE ? 8 : 4;
- const unsigned items = row_major ? matrix->matrix_columns : matrix->vector_elements;
+ const unsigned items =
+ row_major ? matrix->matrix_columns : matrix->vector_elements;
assert(items <= 4);
if (packing == GLSL_INTERFACE_PACKING_STD430)
this->uniforms[id].matrix_stride = items < 3 ? items * N :
- glsl_align(items * N, 16);
+ glsl_align(items * N, 16);
else
this->uniforms[id].matrix_stride = glsl_align(items * N, 16);
this->uniforms[id].row_major = row_major;
assert(found);
(void) found;
const gl_uniform_storage *storage = &prog->UniformStorage[id];
- const unsigned index = storage->image[i].index;
+ const unsigned index = storage->opaque[i].index;
const GLenum access = (var->data.image_read_only ? GL_READ_ONLY :
var->data.image_write_only ? GL_WRITE_ONLY :
GL_READ_WRITE);
foreach_in_list(ir_instruction, node, prog->_LinkedShaders[i]->ir) {
ir_variable *const var = node->as_variable();
- if ((var == NULL) || (var->data.mode != ir_var_uniform && var->data.mode != ir_var_shader_storage))
+ if ((var == NULL) || (var->data.mode != ir_var_uniform &&
+ var->data.mode != ir_var_shader_storage))
continue;
parcel.set_and_process(prog, var);
prog->_LinkedShaders[i]->active_samplers = parcel.shader_samplers_used;
prog->_LinkedShaders[i]->shadow_samplers = parcel.shader_shadow_samplers;
- STATIC_ASSERT(sizeof(prog->_LinkedShaders[i]->SamplerTargets) == sizeof(parcel.targets));
+ STATIC_ASSERT(sizeof(prog->_LinkedShaders[i]->SamplerTargets) ==
+ sizeof(parcel.targets));
memcpy(prog->_LinkedShaders[i]->SamplerTargets, parcel.targets,
sizeof(prog->_LinkedShaders[i]->SamplerTargets));
}
if (!sh)
continue;
- if (!uniforms[i].subroutine[j].active)
+ if (!uniforms[i].opaque[j].active)
continue;
/* How many new entries for this uniform? */
if (!sh)
continue;
- if (!uniforms[i].subroutine[j].active)
+ if (!uniforms[i].opaque[j].active)
continue;
sh->SubroutineUniformRemapTable =
if (!ok) {
- ctx->Driver.DeleteShader(ctx, linked);
+ _mesa_delete_shader(ctx, linked);
return NULL;
}
if (input_stage == MESA_SHADER_STAGES && output_stage == 0)
return;
- if (!add_packed_varyings(shProg, input_stage))
- return;
- if (!add_packed_varyings(shProg, output_stage))
- return;
+ /* Program interface needs to expose varyings in case of SSO. */
+ if (shProg->SeparateShader) {
+ if (!add_packed_varyings(shProg, input_stage))
+ return;
+ if (!add_packed_varyings(shProg, output_stage))
+ return;
+ }
/* Add inputs and outputs to the resource list. */
if (!add_interface_variables(shProg, shProg->_LinkedShaders[input_stage]->ir,
continue;
for (int j = MESA_SHADER_VERTEX; j < MESA_SHADER_STAGES; j++) {
- if (!shProg->UniformStorage[i].subroutine[j].active)
+ if (!shProg->UniformStorage[i].opaque[j].active)
continue;
type = _mesa_shader_stage_to_subroutine_uniform((gl_shader_stage)j);
for (unsigned int i = 0; i < MESA_SHADER_STAGES; i++) {
if (prog->_LinkedShaders[i] != NULL)
- ctx->Driver.DeleteShader(ctx, prog->_LinkedShaders[i]);
+ _mesa_delete_shader(ctx, prog->_LinkedShaders[i]);
prog->_LinkedShaders[i] = NULL;
}
if (!prog->LinkStatus) {
if (sh)
- ctx->Driver.DeleteShader(ctx, sh);
+ _mesa_delete_shader(ctx, sh);
goto done;
}
}
if (!prog->LinkStatus) {
if (sh)
- ctx->Driver.DeleteShader(ctx, sh);
+ _mesa_delete_shader(ctx, sh);
goto done;
}
shader->info.separate_shader = shader_prog->SeparateShader;
shader->info.gs.vertices_out = sh->Geom.VerticesOut;
shader->info.gs.invocations = sh->Geom.Invocations;
+ shader->info.has_transform_feedback_varyings =
+ shader_prog->TransformFeedback.NumVarying > 0;
return shader;
}
/* Whether or not separate shader objects were used */
bool separate_shader;
+ /** Was this shader linked with any transform feedback varyings? */
+ bool has_transform_feedback_varyings;
+
struct {
/** The maximum number of vertices the geometry shader might write. */
unsigned vertices_out;
LOAD(uniform, 0, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
LOAD(ubo, 1, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
LOAD(input, 0, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+LOAD(per_vertex_input, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
LOAD(ssbo, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)
/*
*size = location;
}
+/**
+ * Returns true if we're processing a stage whose inputs are arrays indexed
+ * by a vertex number (such as geometry shader inputs).
+ */
static bool
-deref_has_indirect(nir_deref_var *deref)
+stage_uses_per_vertex_inputs(struct lower_io_state *state)
{
- for (nir_deref *tail = deref->deref.child; tail; tail = tail->child) {
- if (tail->deref_type == nir_deref_type_array) {
- nir_deref_array *arr = nir_deref_as_array(tail);
- if (arr->deref_array_type == nir_deref_array_type_indirect)
- return true;
- }
- }
-
- return false;
+ gl_shader_stage stage = state->builder.shader->stage;
+ return stage == MESA_SHADER_GEOMETRY;
}
static unsigned
-get_io_offset(nir_deref_var *deref, nir_instr *instr, nir_src *indirect,
+get_io_offset(nir_deref_var *deref, nir_instr *instr,
+ nir_ssa_def **vertex_index,
+ nir_ssa_def **out_indirect,
struct lower_io_state *state)
{
- bool found_indirect = false;
+ nir_ssa_def *indirect = NULL;
unsigned base_offset = 0;
nir_builder *b = &state->builder;
b->cursor = nir_before_instr(instr);
nir_deref *tail = &deref->deref;
+
+ /* For per-vertex input arrays (i.e. geometry shader inputs), keep the
+ * outermost array index separate. Process the rest normally.
+ */
+ if (vertex_index != NULL) {
+ tail = tail->child;
+ assert(tail->deref_type == nir_deref_type_array);
+ nir_deref_array *deref_array = nir_deref_as_array(tail);
+
+ nir_ssa_def *vtx = nir_imm_int(b, deref_array->base_offset);
+ if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
+ vtx = nir_iadd(b, vtx, nir_ssa_for_src(b, deref_array->indirect, 1));
+ }
+ *vertex_index = vtx;
+ }
+
while (tail->child != NULL) {
const struct glsl_type *parent_type = tail->type;
tail = tail->child;
nir_imul(b, nir_imm_int(b, size),
nir_ssa_for_src(b, deref_array->indirect, 1));
- if (found_indirect) {
- indirect->ssa =
- nir_iadd(b, nir_ssa_for_src(b, *indirect, 1), mul);
- } else {
- indirect->ssa = mul;
- }
- indirect->is_ssa = true;
- found_indirect = true;
+ indirect = indirect ? nir_iadd(b, indirect, mul) : mul;
}
} else if (tail->deref_type == nir_deref_type_struct) {
nir_deref_struct *deref_struct = nir_deref_as_struct(tail);
}
}
+ *out_indirect = indirect;
return base_offset;
}
static nir_intrinsic_op
-load_op(nir_variable_mode mode, bool has_indirect)
+load_op(struct lower_io_state *state,
+ nir_variable_mode mode, bool per_vertex, bool has_indirect)
{
nir_intrinsic_op op;
switch (mode) {
case nir_var_shader_in:
- op = has_indirect ? nir_intrinsic_load_input_indirect :
- nir_intrinsic_load_input;
+ if (per_vertex) {
+ op = has_indirect ? nir_intrinsic_load_per_vertex_input_indirect :
+ nir_intrinsic_load_per_vertex_input;
+ } else {
+ op = has_indirect ? nir_intrinsic_load_input_indirect :
+ nir_intrinsic_load_input;
+ }
break;
case nir_var_uniform:
op = has_indirect ? nir_intrinsic_load_uniform_indirect :
if (mode != nir_var_shader_in && mode != nir_var_uniform)
continue;
- bool has_indirect = deref_has_indirect(intrin->variables[0]);
+ bool per_vertex = stage_uses_per_vertex_inputs(state) &&
+ mode == nir_var_shader_in;
+
+ nir_ssa_def *indirect;
+ nir_ssa_def *vertex_index;
+
+ unsigned offset = get_io_offset(intrin->variables[0], &intrin->instr,
+ per_vertex ? &vertex_index : NULL,
+ &indirect, state);
nir_intrinsic_instr *load =
nir_intrinsic_instr_create(state->mem_ctx,
- load_op(mode, has_indirect));
+ load_op(state, mode, per_vertex,
+ indirect));
load->num_components = intrin->num_components;
- nir_src indirect;
- unsigned offset = get_io_offset(intrin->variables[0],
- &intrin->instr, &indirect, state);
-
unsigned location = intrin->variables[0]->var->data.driver_location;
if (mode == nir_var_uniform) {
load->const_index[0] = location;
load->const_index[0] = location + offset;
}
- if (has_indirect)
- load->src[0] = indirect;
+ if (per_vertex)
+ load->src[0] = nir_src_for_ssa(vertex_index);
+
+ if (indirect)
+ load->src[per_vertex ? 1 : 0] = nir_src_for_ssa(indirect);
if (intrin->dest.is_ssa) {
nir_ssa_dest_init(&load->instr, &load->dest,
if (intrin->variables[0]->var->data.mode != nir_var_shader_out)
continue;
- bool has_indirect = deref_has_indirect(intrin->variables[0]);
+ nir_ssa_def *indirect;
+
+ unsigned offset = get_io_offset(intrin->variables[0], &intrin->instr,
+ NULL, &indirect, state);
+ offset += intrin->variables[0]->var->data.driver_location;
nir_intrinsic_op store_op;
- if (has_indirect) {
+ if (indirect) {
store_op = nir_intrinsic_store_output_indirect;
} else {
store_op = nir_intrinsic_store_output;
nir_intrinsic_instr *store = nir_intrinsic_instr_create(state->mem_ctx,
store_op);
store->num_components = intrin->num_components;
-
- nir_src indirect;
- unsigned offset = get_io_offset(intrin->variables[0],
- &intrin->instr, &indirect, state);
- offset += intrin->variables[0]->var->data.driver_location;
-
store->const_index[0] = offset;
nir_src_copy(&store->src[0], &intrin->src[0], store);
- if (has_indirect)
- store->src[1] = indirect;
+ if (indirect)
+ store->src[1] = nir_src_for_ssa(indirect);
nir_instr_insert_before(&intrin->instr, &store->instr);
nir_instr_remove(&intrin->instr);
}
if (location > shader_program->NumUniformStorage - 1 ||
- !shader_program->UniformStorage[location].sampler[stage].active) {
+ !shader_program->UniformStorage[location].opaque[stage].active) {
assert(!"cannot return a sampler");
return;
}
instr->sampler_index +=
- shader_program->UniformStorage[location].sampler[stage].index;
+ shader_program->UniformStorage[location].opaque[stage].index;
instr->sampler = NULL;
}
break;
case nir_intrinsic_load_input:
case nir_intrinsic_load_input_indirect:
+ case nir_intrinsic_load_per_vertex_input:
+ case nir_intrinsic_load_per_vertex_input_indirect:
var_list = &state->shader->inputs;
break;
case nir_intrinsic_store_output:
/* First, move ownership of all the memory to a temporary context; assume dead. */
ralloc_adopt(rubbish, nir);
+ ralloc_steal(nir, (char *)nir->info.name);
+
/* Variables and registers are not dead. Steal them back. */
steal_list(nir, nir_variable, &nir->uniforms);
steal_list(nir, nir_variable, &nir->inputs);
+++ /dev/null
-/*
- * Copyright © 2013 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file opt_cse.cpp
- *
- * constant subexpression elimination at the GLSL IR level.
- *
- * Compare to brw_fs_cse.cpp for a more complete CSE implementation. This one
- * is generic and handles texture operations, but it's rather simple currently
- * and doesn't support modification of variables in the available expressions
- * list, so it can't do variables other than uniforms or shader inputs.
- */
-
-#include "ir.h"
-#include "ir_visitor.h"
-#include "ir_rvalue_visitor.h"
-#include "ir_basic_block.h"
-#include "ir_optimization.h"
-#include "ir_builder.h"
-#include "glsl_types.h"
-
-using namespace ir_builder;
-
-static bool debug = false;
-
-namespace {
-
-/**
- * This is the record of an available expression for common subexpression
- * elimination.
- */
-class ae_entry : public exec_node
-{
-public:
- ae_entry(ir_instruction *base_ir, ir_rvalue **val)
- : val(val), base_ir(base_ir)
- {
- assert(val);
- assert(*val);
- assert(base_ir);
-
- var = NULL;
- }
-
- void init(ir_instruction *base_ir, ir_rvalue **val)
- {
- this->val = val;
- this->base_ir = base_ir;
- this->var = NULL;
-
- assert(val);
- assert(*val);
- assert(base_ir);
- }
-
- /**
- * The pointer to the expression that we might be able to reuse
- *
- * Note the double pointer -- this is the place in the base_ir expression
- * tree that we would rewrite to move the expression out to a new variable
- * assignment.
- */
- ir_rvalue **val;
-
- /**
- * Root instruction in the basic block where the expression appeared.
- *
- * This is used so that we can insert the new variable declaration into the
- * instruction stream (since *val is just somewhere in base_ir's expression
- * tree).
- */
- ir_instruction *base_ir;
-
- /**
- * The variable that the expression has been stored in, if it's been CSEd
- * once already.
- */
- ir_variable *var;
-};
-
-class cse_visitor : public ir_rvalue_visitor {
-public:
- cse_visitor(exec_list *validate_instructions)
- : validate_instructions(validate_instructions)
- {
- progress = false;
- mem_ctx = ralloc_context(NULL);
- this->ae = new(mem_ctx) exec_list;
- }
- ~cse_visitor()
- {
- ralloc_free(mem_ctx);
- }
-
- virtual ir_visitor_status visit_enter(ir_function_signature *ir);
- virtual ir_visitor_status visit_enter(ir_loop *ir);
- virtual ir_visitor_status visit_enter(ir_if *ir);
- virtual ir_visitor_status visit_enter(ir_call *ir);
- virtual void handle_rvalue(ir_rvalue **rvalue);
-
- bool progress;
-
-private:
- void *mem_ctx;
-
- ir_rvalue *try_cse(ir_rvalue *rvalue);
- void add_to_ae(ir_rvalue **rvalue);
-
- /**
- * Move all nodes from the ae list to the free list
- */
- void empty_ae_list();
-
- /**
- * Get and initialize a new ae_entry
- *
- * This will either come from the free list or be freshly allocated.
- */
- ae_entry *get_ae_entry(ir_rvalue **rvalue);
-
- /** List of ae_entry: The available expressions to reuse */
- exec_list *ae;
-
- /**
- * The whole shader, so that we can validate_ir_tree in debug mode.
- *
- * This proved quite useful when trying to get the tree manipulation
- * right.
- */
- exec_list *validate_instructions;
-
- /**
- * List of available-for-use ae_entry objects.
- */
- exec_list free_ae_entries;
-};
-
-/**
- * Visitor to walk an expression tree to check that all variables referenced
- * are constants.
- */
-class is_cse_candidate_visitor : public ir_hierarchical_visitor
-{
-public:
-
- is_cse_candidate_visitor()
- : ok(true)
- {
- }
-
- virtual ir_visitor_status visit(ir_dereference_variable *ir);
-
- bool ok;
-};
-
-
-class contains_rvalue_visitor : public ir_rvalue_visitor
-{
-public:
-
- contains_rvalue_visitor(ir_rvalue *val)
- : val(val)
- {
- found = false;
- }
-
- virtual void handle_rvalue(ir_rvalue **rvalue);
-
- bool found;
-
-private:
- ir_rvalue *val;
-};
-
-} /* unnamed namespace */
-
-static void
-dump_ae(exec_list *ae)
-{
- int i = 0;
-
- printf("CSE: AE contents:\n");
- foreach_in_list(ae_entry, entry, ae) {
- printf("CSE: AE %2d (%p): ", i, entry);
- (*entry->val)->print();
- printf("\n");
-
- if (entry->var)
- printf("CSE: in var %p:\n", entry->var);
-
- i++;
- }
-}
-
-ir_visitor_status
-is_cse_candidate_visitor::visit(ir_dereference_variable *ir)
-{
- /* Currently, since we don't handle kills of the ae based on variables
- * getting assigned, we can only handle constant variables.
- */
- if (ir->var->data.read_only) {
- return visit_continue;
- } else {
- if (debug)
- printf("CSE: non-candidate: var %s is not read only\n", ir->var->name);
- ok = false;
- return visit_stop;
- }
-}
-
-void
-contains_rvalue_visitor::handle_rvalue(ir_rvalue **rvalue)
-{
- if (*rvalue == val)
- found = true;
-}
-
-static bool
-contains_rvalue(ir_rvalue *haystack, ir_rvalue *needle)
-{
- contains_rvalue_visitor v(needle);
- haystack->accept(&v);
- return v.found;
-}
-
-static bool
-is_cse_candidate(ir_rvalue *ir)
-{
- /* Our temporary variable assignment generation isn't ready to handle
- * anything bigger than a vector.
- */
- if (!ir->type->is_vector() && !ir->type->is_scalar()) {
- if (debug)
- printf("CSE: non-candidate: not a vector/scalar\n");
- return false;
- }
-
- /* Only handle expressions and textures currently. We may want to extend
- * to variable-index array dereferences at some point.
- */
- switch (ir->ir_type) {
- case ir_type_expression:
- case ir_type_texture:
- break;
- default:
- if (debug)
- printf("CSE: non-candidate: not an expression/texture\n");
- return false;
- }
-
- is_cse_candidate_visitor v;
-
- ir->accept(&v);
-
- return v.ok;
-}
-
-/**
- * Tries to find and return a reference to a previous computation of a given
- * expression.
- *
- * Walk the list of available expressions checking if any of them match the
- * rvalue, and if so, move the previous copy of the expression to a temporary
- * and return a reference of the temporary.
- */
-ir_rvalue *
-cse_visitor::try_cse(ir_rvalue *rvalue)
-{
- foreach_in_list(ae_entry, entry, ae) {
- if (debug) {
- printf("Comparing to AE %p: ", entry);
- (*entry->val)->print();
- printf("\n");
- }
-
- if (!rvalue->equals(*entry->val))
- continue;
-
- if (debug) {
- printf("CSE: Replacing: ");
- (*entry->val)->print();
- printf("\n");
- printf("CSE: with: ");
- rvalue->print();
- printf("\n");
- }
-
- if (!entry->var) {
- ir_instruction *base_ir = entry->base_ir;
-
- ir_variable *var = new(rvalue) ir_variable(rvalue->type,
- "cse",
- ir_var_temporary);
-
- /* Write the previous expression result into a new variable. */
- base_ir->insert_before(var);
- ir_assignment *assignment = assign(var, *entry->val);
- base_ir->insert_before(assignment);
-
- /* Replace the expression in the original tree with a deref of the
- * variable, but keep tracking the expression for further reuse.
- */
- *entry->val = new(rvalue) ir_dereference_variable(var);
- entry->val = &assignment->rhs;
-
- entry->var = var;
-
- /* Update the base_irs in the AE list. We have to be sure that
- * they're correct -- expressions from our base_ir that weren't moved
- * need to stay in this base_ir (so that later consumption of them
- * puts new variables between our new variable and our base_ir), but
- * expressions from our base_ir that we *did* move need base_ir
- * updated so that any further elimination from inside gets its new
- * assignments put before our new assignment.
- */
- foreach_in_list(ae_entry, fixup_entry, ae) {
- if (contains_rvalue(assignment->rhs, *fixup_entry->val))
- fixup_entry->base_ir = assignment;
- }
-
- if (debug)
- dump_ae(ae);
- }
-
- /* Replace the expression in our current tree with the variable. */
- return new(rvalue) ir_dereference_variable(entry->var);
- }
-
- return NULL;
-}
-
-void
-cse_visitor::empty_ae_list()
-{
- free_ae_entries.append_list(ae);
-}
-
-ae_entry *
-cse_visitor::get_ae_entry(ir_rvalue **rvalue)
-{
- ae_entry *entry = (ae_entry *) free_ae_entries.pop_head();
- if (entry) {
- entry->init(base_ir, rvalue);
- } else {
- entry = new(mem_ctx) ae_entry(base_ir, rvalue);
- }
-
- return entry;
-}
-
-/** Add the rvalue to the list of available expressions for CSE. */
-void
-cse_visitor::add_to_ae(ir_rvalue **rvalue)
-{
- if (debug) {
- printf("CSE: Add to AE: ");
- (*rvalue)->print();
- printf("\n");
- }
-
- ae->push_tail(get_ae_entry(rvalue));
-
- if (debug)
- dump_ae(ae);
-}
-
-void
-cse_visitor::handle_rvalue(ir_rvalue **rvalue)
-{
- if (!*rvalue)
- return;
-
- if (debug) {
- printf("CSE: handle_rvalue ");
- (*rvalue)->print();
- printf("\n");
- }
-
- if (!is_cse_candidate(*rvalue))
- return;
-
- ir_rvalue *new_rvalue = try_cse(*rvalue);
- if (new_rvalue) {
- *rvalue = new_rvalue;
- progress = true;
-
- if (debug)
- validate_ir_tree(validate_instructions);
- } else {
- add_to_ae(rvalue);
- }
-}
-
-ir_visitor_status
-cse_visitor::visit_enter(ir_if *ir)
-{
- handle_rvalue(&ir->condition);
-
- empty_ae_list();
- visit_list_elements(this, &ir->then_instructions);
-
- empty_ae_list();
- visit_list_elements(this, &ir->else_instructions);
-
- empty_ae_list();
- return visit_continue_with_parent;
-}
-
-ir_visitor_status
-cse_visitor::visit_enter(ir_function_signature *ir)
-{
- empty_ae_list();
- visit_list_elements(this, &ir->body);
-
- empty_ae_list();
- return visit_continue_with_parent;
-}
-
-ir_visitor_status
-cse_visitor::visit_enter(ir_loop *ir)
-{
- empty_ae_list();
- visit_list_elements(this, &ir->body_instructions);
-
- empty_ae_list();
- return visit_continue_with_parent;
-}
-
-ir_visitor_status
-cse_visitor::visit_enter(ir_call *)
-{
- /* Because call is an exec_list of ir_rvalues, handle_rvalue gets passed a
- * pointer to the (ir_rvalue *) on the stack. Since we save those pointers
- * in the AE list, we can't let handle_rvalue get called.
- */
- return visit_continue_with_parent;
-}
-
-/**
- * Does a (uniform-value) constant subexpression elimination pass on the code
- * present in the instruction stream.
- */
-bool
-do_cse(exec_list *instructions)
-{
- cse_visitor v(instructions);
-
- visit_list_elements(&v, instructions);
-
- return v.progress;
-}
return shader;
}
+void
+_mesa_delete_shader(struct gl_context *ctx, struct gl_shader *sh)
+{
+ free((void *)sh->Source);
+ free(sh->Label);
+ ralloc_free(sh);
+}
+
void
_mesa_clear_shader_program_data(struct gl_shader_program *shProg)
{
extern "C" struct gl_shader *
_mesa_new_shader(struct gl_context *ctx, GLuint name, GLenum type);
+extern "C" void
+_mesa_delete_shader(struct gl_context *ctx, struct gl_shader *sh);
+
extern "C" void
_mesa_clear_shader_program_data(struct gl_shader_program *);
prog->UniformStorage[index_to_set].array_elements = array_size;
prog->UniformStorage[index_to_set].initialized = false;
for (int sh = 0; sh < MESA_SHADER_STAGES; sh++) {
- prog->UniformStorage[index_to_set].sampler[sh].index = ~0;
- prog->UniformStorage[index_to_set].sampler[sh].active = false;
+ prog->UniformStorage[index_to_set].opaque[sh].index = ~0;
+ prog->UniformStorage[index_to_set].opaque[sh].active = false;
}
prog->UniformStorage[index_to_set].num_driver_storage = 0;
prog->UniformStorage[index_to_set].driver_storage = NULL;
prog->UniformStorage[i].array_elements = 0;
prog->UniformStorage[i].initialized = false;
for (int sh = 0; sh < MESA_SHADER_STAGES; sh++) {
- prog->UniformStorage[i].sampler[sh].index = ~0;
- prog->UniformStorage[i].sampler[sh].active = false;
+ prog->UniformStorage[i].opaque[sh].index = ~0;
+ prog->UniformStorage[i].opaque[sh].active = false;
}
prog->UniformStorage[i].num_driver_storage = 0;
prog->UniformStorage[i].driver_storage = NULL;
#define __GLX_PUT_CHAR(offset,a) \
*((INT8 *) (pc + offset)) = a
-#ifndef _CRAY
#define __GLX_PUT_SHORT(offset,a) \
*((INT16 *) (pc + offset)) = a
#define __GLX_PUT_FLOAT(offset,a) \
*((FLOAT32 *) (pc + offset)) = a
-#else
-#define __GLX_PUT_SHORT(offset,a) \
- { GLubyte *cp = (pc+offset); \
- int shift = (64-16) - ((int)(cp) >> (64-6)); \
- *(int *)cp = (*(int *)cp & ~(0xffff << shift)) | ((a & 0xffff) << shift); }
-
-#define __GLX_PUT_LONG(offset,a) \
- { GLubyte *cp = (pc+offset); \
- int shift = (64-32) - ((int)(cp) >> (64-6)); \
- *(int *)cp = (*(int *)cp & ~(0xffffffff << shift)) | ((a & 0xffffffff) << shift); }
-
-#define __GLX_PUT_FLOAT(offset,a) \
- gl_put_float((pc + offset),a)
-
-#define __GLX_PUT_DOUBLE(offset,a) \
- gl_put_double(pc + offset, a)
-
-extern void gl_put_float( /*GLubyte *, struct cray_single */ );
-extern void gl_put_double( /*GLubyte *, struct cray_double */ );
-#endif
-
-#ifndef _CRAY
-
#ifdef __GLX_ALIGN64
/*
** This can certainly be done better for a particular machine
*((FLOAT64 *) (pc + offset)) = a
#endif
-#endif
-
#define __GLX_PUT_CHAR_ARRAY(offset,a,alen) \
__GLX_MEM_COPY(pc + offset, a, alen * __GLX_SIZE_INT8)
-#ifndef _CRAY
#define __GLX_PUT_SHORT_ARRAY(offset,a,alen) \
__GLX_MEM_COPY(pc + offset, a, alen * __GLX_SIZE_INT16)
#define __GLX_PUT_DOUBLE_ARRAY(offset,a,alen) \
__GLX_MEM_COPY(pc + offset, a, alen * __GLX_SIZE_FLOAT64)
-#else
-#define __GLX_PUT_SHORT_ARRAY(offset,a,alen) \
- gl_put_short_array((GLubyte *)(pc + offset), a, alen * __GLX_SIZE_INT16)
-
-#define __GLX_PUT_LONG_ARRAY(offset,a,alen) \
- gl_put_long_array((GLubyte *)(pc + offset), (long *)a, alen * __GLX_SIZE_INT32)
-
-#define __GLX_PUT_FLOAT_ARRAY(offset,a,alen) \
- gl_put_float_array((GLubyte *)(pc + offset), (float *)a, alen * __GLX_SIZE_FLOAT32)
-
-#define __GLX_PUT_DOUBLE_ARRAY(offset,a,alen) \
- gl_put_double_array((GLubyte *)(pc + offset), (double *)a, alen * __GLX_SIZE_FLOAT64)
-
-extern gl_put_short_array(GLubyte *, short *, int);
-extern gl_put_long_array(GLubyte *, long *, int);
-extern gl_put_float_array(GLubyte *, float *, int);
-extern gl_put_double_array(GLubyte *, double *, int);
-
-#endif /* _CRAY */
#endif /* !__GLX_packrender_h__ */
#define __GLX_SINGLE_PUT_CHAR(offset,a) \
*((INT8 *) (pc + offset)) = a
-#ifndef CRAY
#define __GLX_SINGLE_PUT_SHORT(offset,a) \
*((INT16 *) (pc + offset)) = a
#define __GLX_SINGLE_PUT_FLOAT(offset,a) \
*((FLOAT32 *) (pc + offset)) = a
-#else
-#define __GLX_SINGLE_PUT_SHORT(offset,a) \
- { GLubyte *cp = (pc+offset); \
- int shift = (64-16) - ((int)(cp) >> (64-6)); \
- *(int *)cp = (*(int *)cp & ~(0xffff << shift)) | ((a & 0xffff) << shift); }
-
-#define __GLX_SINGLE_PUT_LONG(offset,a) \
- { GLubyte *cp = (pc+offset); \
- int shift = (64-32) - ((int)(cp) >> (64-6)); \
- *(int *)cp = (*(int *)cp & ~(0xffffffff << shift)) | ((a & 0xffffffff) << shift); }
-
-#define __GLX_SINGLE_PUT_FLOAT(offset,a) \
- gl_put_float(pc + offset, a)
-#endif
-
/* Read support macros */
#define __GLX_SINGLE_READ_XREPLY() \
(void) _XReply(dpy, (xReply*) &reply, 0, False)
#define __GLX_SINGLE_GET_SIZE(a) \
a = (GLint) reply.size
-#ifndef _CRAY
#define __GLX_SINGLE_GET_CHAR(p) \
*p = *(GLbyte *)&reply.pad3;
#define __GLX_SINGLE_GET_FLOAT(p) \
*p = *(GLfloat *)&reply.pad3;
-#else
-#define __GLX_SINGLE_GET_CHAR(p) \
- *p = reply.pad3 >> 24;
-
-#define __GLX_SINGLE_GET_SHORT(p) \
- {int t = reply.pad3 >> 16; \
- *p = (t & 0x8000) ? (t | ~0xffff) : (t & 0xffff);}
-
-#define __GLX_SINGLE_GET_LONG(p) \
- {int t = reply.pad3; \
- *p = (t & 0x80000000) ? (t | ~0xffffffff) : (t & 0xffffffff);}
-
-#define PAD3OFFSET 16
-#define __GLX_SINGLE_GET_FLOAT(p) \
- *p = gl_ntoh_float((GLubyte *)&reply + PAD3OFFSET);
-
-#define __GLX_SINGLE_GET_DOUBLE(p) \
- *p = gl_ntoh_double((GLubyte *)&reply + PAD3OFFSET);
-
-extern float gl_ntoh_float(GLubyte *);
-extern float gl_ntoh_double(GLubyte *);
-#endif
-
-#ifndef _CRAY
-
#ifdef __GLX_ALIGN64
#define __GLX_SINGLE_GET_DOUBLE(p) \
__GLX_MEM_COPY(p, &reply.pad3, 8)
*p = *(GLdouble *)&reply.pad3
#endif
-#endif
-
/* Get an array of typed data */
#define __GLX_SINGLE_GET_VOID_ARRAY(a,alen) \
{ \
#define __GLX_SINGLE_GET_LONG_ARRAY(a,alen) \
_XRead(dpy,(char *)a,alen*__GLX_SIZE_INT32);
-#ifndef _CRAY
#define __GLX_SINGLE_GET_FLOAT_ARRAY(a,alen) \
_XRead(dpy,(char *)a,alen*__GLX_SIZE_FLOAT32);
#define __GLX_SINGLE_GET_DOUBLE_ARRAY(a,alen) \
_XRead(dpy,(char *)a,alen*__GLX_SIZE_FLOAT64);
-#else
-#define __GLX_SINGLE_GET_FLOAT_ARRAY(a,alen) \
- gl_get_float_array(dpy,a,alen);
-
-#define __GLX_SINGLE_GET_DOUBLE_ARRAY(a,alen) \
- gl_get_double_array(dpy, a, alen);
-
-extern void gl_get_float_array(Display * dpy, float *a, int alen);
-extern void gl_get_double_array(Display * dpy, double *a, int alen);
-#endif
-
#endif /* !__GLX_packsingle_h__ */
<param name="label" type="GLchar *"/>
</function>
+ <!-- ES extension has different suffixes -->
+ <function name="DebugMessageControlKHR" alias="DebugMessageControl" es1="1.0" es2="2.0">
+ <param name="source" type="GLenum"/>
+ <param name="type" type="GLenum"/>
+ <param name="severity" type="GLenum"/>
+ <param name="count" type="GLsizei" counter="true"/>
+ <param name="ids" type="const GLuint *" count="count"/>
+ <param name="enabled" type="GLboolean"/>
+ </function>
+
+ <function name="DebugMessageInsertKHR" alias="DebugMessageInsert" es1="1.0" es2="2.0">
+ <param name="source" type="GLenum"/>
+ <param name="type" type="GLenum"/>
+ <param name="id" type="GLuint"/>
+ <param name="severity" type="GLenum"/>
+ <param name="length" type="GLsizei"/>
+ <param name="buf" type="const GLchar *"/>
+ </function>
+
+ <function name="DebugMessageCallbackKHR" alias="DebugMessageCallback" es1="1.0" es2="2.0">
+ <param name="callback" type="GLDEBUGPROC"/>
+ <param name="userParam" type="const GLvoid *"/>
+ </function>
+
+ <function name="GetDebugMessageLogKHR" alias="GetDebugMessageLog" es1="1.0" es2="2.0">
+ <return type="GLuint"/>
+ <param name="count" type="GLuint"/>
+ <param name="bufsize" type="GLsizei"/>
+ <param name="sources" type="GLenum *" output="true"/>
+ <param name="types" type="GLenum *" output="true"/>
+ <param name="ids" type="GLuint *" output="true"/>
+ <param name="severities" type="GLenum *" output="true"/>
+ <param name="lengths" type="GLsizei *" output="true"/>
+ <param name="messageLog" type="GLchar *" output="true"/>
+ </function>
+
+ <function name="PushDebugGroupKHR" alias="PushDebugGroup" es1="1.0" es2="2.0">
+ <param name="source" type="GLenum"/>
+ <param name="id" type="GLuint"/>
+ <param name="length" type="GLsizei"/>
+ <param name="message" type="const GLchar *"/>
+ </function>
+
+ <function name="PopDebugGroupKHR" alias="PopDebugGroup" es1="1.0" es2="2.0"/>
+
+ <function name="ObjectLabelKHR" alias="ObjectLabel" es1="1.0" es2="2.0">
+ <param name="identifier" type="GLenum"/>
+ <param name="name" type="GLuint"/>
+ <param name="length" type="GLsizei"/>
+ <param name="label" type="const GLchar *"/>
+ </function>
+
+ <function name="GetObjectLabelKHR" alias="GetObjectLabel" es1="1.0" es2="2.0">
+ <param name="identifier" type="GLenum"/>
+ <param name="name" type="GLuint"/>
+ <param name="bufSize" type="GLsizei"/>
+ <param name="length" type="GLsizei *"/>
+ <param name="label" type="GLchar *"/>
+ </function>
+
+ <function name="ObjectPtrLabelKHR" alias="ObjectPtrLabel" es1="1.0" es2="2.0">
+ <param name="ptr" type="const GLvoid *"/>
+ <param name="length" type="GLsizei"/>
+ <param name="label" type="const GLchar *"/>
+ </function>
+
+ <function name="GetObjectPtrLabelKHR" alias="GetObjectPtrLabel" es1="1.0" es2="2.0">
+ <param name="ptr" type="const GLvoid *"/>
+ <param name="bufSize" type="GLsizei"/>
+ <param name="length" type="GLsizei *"/>
+ <param name="label" type="GLchar *"/>
+ </function>
+
</category>
</OpenGLAPI>
driver->GetString = NULL; /* REQUIRED! */
driver->UpdateState = NULL; /* REQUIRED! */
- driver->ResizeBuffers = _mesa_resize_framebuffer;
driver->Finish = NULL;
driver->Flush = NULL;
/* framebuffer/image functions */
driver->Clear = _swrast_Clear;
- driver->Accum = _mesa_accum;
driver->RasterPos = _tnl_RasterPos;
driver->DrawPixels = _swrast_DrawPixels;
driver->ReadPixels = _mesa_readpixels;
driver->DepthRange = NULL;
driver->Enable = NULL;
driver->Fogfv = NULL;
- driver->Hint = NULL;
driver->Lightfv = NULL;
driver->LightModelfv = NULL;
driver->LineStipple = NULL;
driver->DiscardFramebuffer = NULL;
_mesa_init_texture_barrier_functions(driver);
-
- /* APPLE_vertex_array_object */
- driver->NewArrayObject = _mesa_new_vao;
- driver->DeleteArrayObject = _mesa_delete_vao;
- driver->BindArrayObject = NULL;
-
_mesa_init_shader_object_functions(driver);
-
_mesa_init_transform_feedback_functions(driver);
-
_mesa_init_sampler_object_functions(driver);
/* T&L stuff */
driver->SaveNeedFlush = 0;
driver->ProgramStringNotify = _tnl_program_string;
- driver->FlushVertices = NULL;
- driver->SaveFlushVertices = NULL;
- driver->NotifySaveBegin = NULL;
driver->LightingSpaceChange = NULL;
- /* display list */
- driver->NewList = NULL;
- driver->EndList = NULL;
- driver->BeginCallList = NULL;
- driver->EndCallList = NULL;
-
/* GL_ARB_texture_storage */
driver->AllocTextureStorage = _mesa_AllocTextureStorage_sw;
ctx->Color.Blend[0].SrcA,
ctx->Color.Blend[0].DstA);
- if (ctx->Driver.ColorMaskIndexed) {
- GLuint i;
- for (i = 0; i < ctx->Const.MaxDrawBuffers; i++) {
- ctx->Driver.ColorMaskIndexed(ctx, i,
- ctx->Color.ColorMask[i][RCOMP],
- ctx->Color.ColorMask[i][GCOMP],
- ctx->Color.ColorMask[i][BCOMP],
- ctx->Color.ColorMask[i][ACOMP]);
- }
- }
- else {
- ctx->Driver.ColorMask(ctx,
- ctx->Color.ColorMask[0][RCOMP],
- ctx->Color.ColorMask[0][GCOMP],
- ctx->Color.ColorMask[0][BCOMP],
- ctx->Color.ColorMask[0][ACOMP]);
- }
+ ctx->Driver.ColorMask(ctx,
+ ctx->Color.ColorMask[0][RCOMP],
+ ctx->Color.ColorMask[0][GCOMP],
+ ctx->Color.ColorMask[0][BCOMP],
+ ctx->Color.ColorMask[0][ACOMP]);
ctx->Driver.CullFace(ctx, ctx->Polygon.CullFaceMode);
ctx->Driver.DepthFunc(ctx, ctx->Depth.Func);
/**
* Called via ctx->Driver.GenerateMipmap()
- * Note: We don't yet support 3D textures, 1D/2D array textures or texture
- * borders.
+ * Note: We don't yet support 3D textures, or texture borders.
*/
void
_mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target,
#include "utils.h"
#include "xmlpool.h"
#include "main/mtypes.h"
+#include "main/framebuffer.h"
#include "main/version.h"
#include "main/errors.h"
#include "main/macros.h"
{
struct gl_framebuffer *fb = (struct gl_framebuffer *) dPriv->driverPrivate;
if (fb && (dPriv->w != fb->Width || dPriv->h != fb->Height)) {
- ctx->Driver.ResizeBuffers(ctx, fb, dPriv->w, dPriv->h);
+ _mesa_resize_framebuffer(ctx, fb, dPriv->w, dPriv->h);
/* if the driver needs the hw lock for ResizeBuffers, the drawable
might have changed again by now */
assert(fb->Width == dPriv->w);
#include "utils.h"
#include "dri_util.h"
-
-uint64_t
-driParseDebugString(const char *debug,
- const struct dri_debug_control *control)
-{
- uint64_t flag = 0;
-
- if (debug != NULL) {
- for (; control->string != NULL; control++) {
- if (!strcmp(debug, "all")) {
- flag |= control->flag;
-
- } else {
- const char *s = debug;
- unsigned n;
-
- for (; n = strcspn(s, ", "), *s; s += MAX2(1, n)) {
- if (strlen(control->string) == n &&
- !strncmp(control->string, s, n))
- flag |= control->flag;
- }
- }
- }
- }
-
- return flag;
-}
-
-
-
/**
* Create the \c GL_RENDERER string for DRI drivers.
*
#include <GL/internal/dri_interface.h>
#include "main/context.h"
-struct dri_debug_control {
- const char * string;
- uint64_t flag;
-};
-
-extern uint64_t driParseDebugString( const char * debug,
- const struct dri_debug_control * control );
-
extern unsigned driGetRendererString( char * buffer,
const char * hardware_name, GLuint agp_mode );
else if (ctx->Polygon.CullFaceMode != GL_FRONT_AND_BACK) {
mode = CULLMODE_CW;
+ if (ctx->DrawBuffer && _mesa_is_user_fbo(ctx->DrawBuffer))
+ mode ^= (CULLMODE_CW ^ CULLMODE_CCW);
if (ctx->Polygon.CullFaceMode == GL_FRONT)
mode ^= (CULLMODE_CW ^ CULLMODE_CCW);
if (ctx->Polygon.FrontFace != GL_CCW)
#include "main/mtypes.h"
#include "main/imports.h"
#include "main/macros.h"
-#include "main/colormac.h"
#include "main/renderbuffer.h"
#include "main/framebuffer.h"
static void
intelDrawBuffer(struct gl_context * ctx, GLenum mode)
{
- if (ctx->DrawBuffer && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
+ if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer)) {
struct intel_context *const intel = intel_context(ctx);
- const bool was_front_buffer_rendering =
- intel->is_front_buffer_rendering;
- intel->is_front_buffer_rendering = (mode == GL_FRONT_LEFT)
- || (mode == GL_FRONT) || (mode == GL_FRONT_AND_BACK);
-
- /* If we weren't front-buffer rendering before but we are now,
- * invalidate our DRI drawable so we'll ask for new buffers
+ /* If we might be front-buffer rendering on this buffer for the first
+ * time, invalidate our DRI drawable so we'll ask for new buffers
* (including the fake front) before we start rendering again.
*/
- if (!was_front_buffer_rendering && intel->is_front_buffer_rendering)
- dri2InvalidateDrawable(intel->driContext->driDrawablePriv);
+ dri2InvalidateDrawable(intel->driContext->driDrawablePriv);
}
intel_draw_buffer(ctx);
static void
intelReadBuffer(struct gl_context * ctx, GLenum mode)
{
- if (ctx->ReadBuffer && _mesa_is_winsys_fbo(ctx->ReadBuffer)) {
+ if (_mesa_is_front_buffer_reading(ctx->ReadBuffer)) {
struct intel_context *const intel = intel_context(ctx);
- const bool was_front_buffer_reading =
- intel->is_front_buffer_reading;
-
- intel->is_front_buffer_reading = (mode == GL_FRONT_LEFT)
- || (mode == GL_FRONT);
- /* If we weren't front-buffer reading before but we are now,
- * invalidate our DRI drawable so we'll ask for new buffers
+ /* If we might be front-buffer reading on this buffer for the first
+ * time, invalidate our DRI drawable so we'll ask for new buffers
* (including the fake front) before we start reading again.
*/
- if (!was_front_buffer_reading && intel->is_front_buffer_reading)
- dri2InvalidateDrawable(intel->driContext->driReadablePriv);
+ dri2InvalidateDrawable(intel->driContext->driReadablePriv);
}
}
#include "intel_context.h"
struct intel_context;
-struct intel_framebuffer;
extern void intel_check_front_buffer_rendering(struct intel_context *intel);
#include "intel_mipmap_tree.h"
#include "utils.h"
+#include "util/debug.h"
#include "util/ralloc.h"
int INTEL_DEBUG = (0);
* that will happen next will probably dirty the front buffer. So
* mark it as dirty here.
*/
- if (intel->is_front_buffer_rendering)
+ if (_mesa_is_front_buffer_drawing(intel->ctx.DrawBuffer))
intel->front_buffer_dirty = true;
/* Wait for the swapbuffers before the one we just emitted, so we
intelCalcViewport(ctx);
}
-static const struct dri_debug_control debug_control[] = {
+static const struct debug_control debug_control[] = {
{ "tex", DEBUG_TEXTURE},
{ "state", DEBUG_STATE},
{ "blit", DEBUG_BLIT},
intel_flush(ctx);
intel_flush_front(ctx);
- if (intel->is_front_buffer_rendering)
+ if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer))
intel->need_throttle = true;
}
ctx->Const.MinLineWidth = 1.0;
ctx->Const.MinLineWidthAA = 1.0;
- ctx->Const.MaxLineWidth = 5.0;
- ctx->Const.MaxLineWidthAA = 5.0;
+ ctx->Const.MaxLineWidth = 7.0;
+ ctx->Const.MaxLineWidthAA = 7.0;
ctx->Const.LineWidthGranularity = 0.5;
ctx->Const.MinPointSize = 1.0;
intelInitExtensions(ctx);
- INTEL_DEBUG = driParseDebugString(getenv("INTEL_DEBUG"), debug_control);
+ INTEL_DEBUG = parse_debug_string(getenv("INTEL_DEBUG"), debug_control);
if (INTEL_DEBUG & DEBUG_BUFMGR)
dri_bufmgr_set_debug(intel->bufmgr, true);
if (INTEL_DEBUG & DEBUG_PERF)
back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
memset(attachments, 0, sizeof(attachments));
- if ((intel->is_front_buffer_rendering ||
- intel->is_front_buffer_reading ||
+ if ((_mesa_is_front_buffer_drawing(fb) ||
+ _mesa_is_front_buffer_reading(fb) ||
!back_rb) && front_rb) {
/* If a fake front buffer is in use, then querying for
* __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
else
return;
- if ((intel->is_front_buffer_rendering || intel->is_front_buffer_reading || !back_rb) && front_rb)
+ if (front_rb && (_mesa_is_front_buffer_drawing(fb) ||
+ _mesa_is_front_buffer_reading(fb) || !back_rb)) {
buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
+ }
if (back_rb)
buffer_mask |= __DRI_IMAGE_BUFFER_BACK;
*/
bool front_buffer_dirty;
- /**
- * Track whether front-buffer rendering is currently enabled
- *
- * A separate flag is used to track this in order to support MRT more
- * easily.
- */
- bool is_front_buffer_rendering;
- /**
- * Track whether front-buffer is the current read target.
- *
- * This is closely associated with is_front_buffer_rendering, but may
- * be set separately. The DRI2 fake front buffer must be referenced
- * either way.
- */
- bool is_front_buffer_reading;
-
bool use_early_z;
__DRIcontext *driContext;
return NULL;
}
-/**
- * Create a new framebuffer object.
- */
-static struct gl_framebuffer *
-intel_new_framebuffer(struct gl_context * ctx, GLuint name)
-{
- /* Only drawable state in intel_framebuffer at this time, just use Mesa's
- * class
- */
- return _mesa_new_framebuffer(ctx, name);
-}
-
-
/** Called by gl_renderbuffer::Delete() */
static void
intel_delete_renderbuffer(struct gl_context *ctx, struct gl_renderbuffer *rb)
void
intel_fbo_init(struct intel_context *intel)
{
- intel->ctx.Driver.NewFramebuffer = intel_new_framebuffer;
intel->ctx.Driver.NewRenderbuffer = intel_new_renderbuffer;
intel->ctx.Driver.MapRenderbuffer = intel_map_renderbuffer;
intel->ctx.Driver.UnmapRenderbuffer = intel_unmap_renderbuffer;
void
intelInitPixelFuncs(struct dd_function_table *functions)
{
- functions->Accum = _mesa_accum;
functions->Bitmap = intelBitmap;
functions->CopyPixels = intelCopyPixels;
functions->DrawPixels = intelDrawPixels;
* dma buffers. Use strip/fan hardware primitives where possible.
* Try to simulate missing primitives with indexed vertices.
*/
-#define HAVE_POINTS 0 /* Has it, but can't use because subpixel has to
- * be adjusted for points on the INTEL/I845G
- */
+#define HAVE_POINTS 1
#define HAVE_LINES 1
#define HAVE_LINE_STRIPS 1
#define HAVE_TRIANGLES 1
#define HAVE_ELTS 0
static const uint32_t hw_prim[GL_POLYGON + 1] = {
- 0,
- PRIM3D_LINELIST,
- PRIM3D_LINESTRIP,
- PRIM3D_LINESTRIP,
- PRIM3D_TRILIST,
- PRIM3D_TRISTRIP,
- PRIM3D_TRIFAN,
- 0,
- 0,
- PRIM3D_POLY
+ [GL_POINTS] = PRIM3D_POINTLIST,
+ [GL_LINES ] = PRIM3D_LINELIST,
+ [GL_LINE_LOOP] = PRIM3D_LINESTRIP,
+ [GL_LINE_STRIP] = PRIM3D_LINESTRIP,
+ [GL_TRIANGLES] = PRIM3D_TRILIST,
+ [GL_TRIANGLE_STRIP] = PRIM3D_TRISTRIP,
+ [GL_TRIANGLE_FAN] = PRIM3D_TRIFAN,
+ [GL_QUADS] = 0,
+ [GL_QUAD_STRIP] = 0,
+ [GL_POLYGON] = PRIM3D_POLY,
};
static const GLenum reduced_prim[GL_POLYGON + 1] = {
- GL_POINTS,
- GL_LINES,
- GL_LINES,
- GL_LINES,
- GL_TRIANGLES,
- GL_TRIANGLES,
- GL_TRIANGLES,
- GL_TRIANGLES,
- GL_TRIANGLES,
- GL_TRIANGLES
+ [GL_POINTS] = GL_POINTS,
+ [GL_LINES] = GL_LINES,
+ [GL_LINE_LOOP] = GL_LINES,
+ [GL_LINE_STRIP] = GL_LINES,
+ [GL_TRIANGLES] = GL_TRIANGLES,
+ [GL_TRIANGLE_STRIP] = GL_TRIANGLES,
+ [GL_TRIANGLE_FAN] = GL_TRIANGLES,
+ [GL_QUADS] = GL_TRIANGLES,
+ [GL_QUAD_STRIP] = GL_TRIANGLES,
+ [GL_POLYGON] = GL_TRIANGLES,
};
static const int scale_prim[GL_POLYGON + 1] = {
- 0, /* fallback case */
- 1,
- 2,
- 2,
- 1,
- 3,
- 3,
- 0, /* fallback case */
- 0, /* fallback case */
- 3
+ [GL_POINTS] = 1,
+ [GL_LINES] = 1,
+ [GL_LINE_LOOP] = 2,
+ [GL_LINE_STRIP] = 2,
+ [GL_TRIANGLES] = 1,
+ [GL_TRIANGLE_STRIP] = 3,
+ [GL_TRIANGLE_FAN] = 3,
+ [GL_QUADS] = 0, /* fallback case */
+ [GL_QUAD_STRIP] = 0, /* fallback case */
+ [GL_POLYGON] = 3,
};
#include "main/context.h"
#include "main/macros.h"
#include "main/enums.h"
-#include "main/colormac.h"
#include "main/dd.h"
#include "intel_screen.h"
GLuint *vb = intel_get_prim_space(intel, 1);
int j;
- /* Adjust for sub pixel position -- still required for conform. */
- *(float *) &vb[0] = v0->v.x;
- *(float *) &vb[1] = v0->v.y;
- for (j = 2; j < vertsize; j++)
- vb[j] = v0->ui[j];
+ COPY_DWORDS(j, vb, vertsize, v0);
}
***********************************************************************/
static const GLuint hw_prim[GL_POLYGON + 1] = {
- PRIM3D_POINTLIST,
- PRIM3D_LINELIST,
- PRIM3D_LINELIST,
- PRIM3D_LINELIST,
- PRIM3D_TRILIST,
- PRIM3D_TRILIST,
- PRIM3D_TRILIST,
- PRIM3D_TRILIST,
- PRIM3D_TRILIST,
- PRIM3D_TRILIST
+ [GL_POINTS] = PRIM3D_POINTLIST,
+ [GL_LINES] = PRIM3D_LINELIST,
+ [GL_LINE_LOOP] = PRIM3D_LINELIST,
+ [GL_LINE_STRIP] = PRIM3D_LINELIST,
+ [GL_TRIANGLES] = PRIM3D_TRILIST,
+ [GL_TRIANGLE_STRIP] = PRIM3D_TRILIST,
+ [GL_TRIANGLE_FAN] = PRIM3D_TRILIST,
+ [GL_QUADS] = PRIM3D_TRILIST,
+ [GL_QUAD_STRIP] = PRIM3D_TRILIST,
+ [GL_POLYGON] = PRIM3D_TRILIST,
};
#define RASTERIZE(x) intelRasterPrimitive( ctx, x, hw_prim[x] )
{
struct intel_context *intel = intel_context(ctx);
TNLcontext *tnl = TNL_CONTEXT(ctx);
- struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
GLuint prim = intel->render_primitive;
/* Render the new vertices as an unclipped polygon.
*/
- {
- GLuint *tmp = VB->Elts;
- VB->Elts = (GLuint *) elts;
- tnl->Driver.Render.PrimTabElts[GL_POLYGON] (ctx, 0, n,
- PRIM_BEGIN | PRIM_END);
- VB->Elts = tmp;
- }
+ _tnl_RenderClippedPolygon(ctx, elts, n);
/* Restore the render primitive
*/
tnl->Driver.Render.PrimitiveNotify(ctx, prim);
}
-static void
-intelRenderClippedLine(struct gl_context * ctx, GLuint ii, GLuint jj)
-{
- TNLcontext *tnl = TNL_CONTEXT(ctx);
-
- tnl->Driver.Render.Line(ctx, ii, jj);
-}
-
static void
intelFastRenderClippedPoly(struct gl_context * ctx, const GLuint * elts, GLuint n)
{
const GLuint *start = (const GLuint *) V(elts[0]);
int i, j;
- for (i = 2; i < n; i++) {
- COPY_DWORDS(j, vb, vertsize, V(elts[i - 1]));
- COPY_DWORDS(j, vb, vertsize, V(elts[i]));
- COPY_DWORDS(j, vb, vertsize, start);
+ if (ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION) {
+ for (i = 2; i < n; i++) {
+ COPY_DWORDS(j, vb, vertsize, V(elts[i - 1]));
+ COPY_DWORDS(j, vb, vertsize, V(elts[i]));
+ COPY_DWORDS(j, vb, vertsize, start);
+ }
+ } else {
+ for (i = 2; i < n; i++) {
+ COPY_DWORDS(j, vb, vertsize, start);
+ COPY_DWORDS(j, vb, vertsize, V(elts[i - 1]));
+ COPY_DWORDS(j, vb, vertsize, V(elts[i]));
+ }
}
}
else {
tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts;
tnl->Driver.Render.PrimTabElts = _tnl_render_tab_elts;
- tnl->Driver.Render.ClippedLine = intelRenderClippedLine;
+ tnl->Driver.Render.ClippedLine = _tnl_RenderClippedLine;
tnl->Driver.Render.ClippedPolygon = intelRenderClippedPoly;
}
}
}
static const GLenum reduced_prim[GL_POLYGON + 1] = {
- GL_POINTS,
- GL_LINES,
- GL_LINES,
- GL_LINES,
- GL_TRIANGLES,
- GL_TRIANGLES,
- GL_TRIANGLES,
- GL_TRIANGLES,
- GL_TRIANGLES,
- GL_TRIANGLES
+ [GL_POINTS] = GL_POINTS,
+ [GL_LINES] = GL_LINES,
+ [GL_LINE_LOOP] = GL_LINES,
+ [GL_LINE_STRIP] = GL_LINES,
+ [GL_TRIANGLES] = GL_TRIANGLES,
+ [GL_TRIANGLE_STRIP] = GL_TRIANGLES,
+ [GL_TRIANGLE_FAN] = GL_TRIANGLES,
+ [GL_QUADS] = GL_TRIANGLES,
+ [GL_QUAD_STRIP] = GL_TRIANGLES,
+ [GL_POLYGON] = GL_TRIANGLES
};
noinst_LTLIBRARIES = libi965_dri.la libi965_compiler.la
libi965_dri_la_SOURCES = $(i965_FILES)
-libi965_dri_la_LIBADD = $(INTEL_LIBS)
+libi965_dri_la_LIBADD = libi965_compiler.la $(INTEL_LIBS)
+
+libi965_compiler_la_SOURCES = $(i965_compiler_FILES)
TEST_LIBS = \
- libi965_dri.la \
- ../common/libdricommon.la \
- ../common/libxmlconfig.la \
- ../common/libmegadriver_stub.la \
+ libi965_compiler.la \
../../../libmesa.la \
- $(DRI_LIB_DEPS) \
- $(CLOCK_LIB) \
+ -lpthread -ldl \
../common/libdri_test_stubs.la
-libi965_compiler_la_SOURCES = $(i965_FILES)
-libi965_compiler_la_LIBADD = $(INTEL_LIBS) \
- ../common/libdricommon.la \
- ../common/libxmlconfig.la \
- ../common/libmegadriver_stub.la \
- ../../../libmesa.la \
- $(DRI_LIB_DEPS) \
- $(CLOCK_LIB) \
- ../common/libdri_test_stubs.la -lm
-
TESTS = \
test_fs_cmod_propagation \
test_fs_saturate_propagation \
-i965_FILES = \
- brw_binding_tables.c \
- brw_blorp_blit.cpp \
- brw_blorp_blit_eu.cpp \
- brw_blorp_blit_eu.h \
- brw_blorp.cpp \
- brw_blorp.h \
- brw_cc.c \
+i965_compiler_FILES = \
brw_cfg.cpp \
brw_cfg.h \
- brw_clear.c \
- brw_clip.c \
- brw_clip.h \
- brw_clip_line.c \
- brw_clip_point.c \
- brw_clip_state.c \
- brw_clip_tri.c \
- brw_clip_unfilled.c \
- brw_clip_util.c \
- brw_compute.c \
- brw_conditional_render.c \
- brw_context.c \
- brw_context.h \
- brw_cs.c \
- brw_cs.h \
brw_cubemap_normalize.cpp \
- brw_curbe.c \
brw_dead_control_flow.cpp \
brw_dead_control_flow.h \
brw_defines.h \
brw_device_info.c \
brw_device_info.h \
brw_disasm.c \
- brw_draw.c \
- brw_draw.h \
- brw_draw_upload.c \
brw_eu.c \
brw_eu_compact.c \
brw_eu_emit.c \
brw_eu.h \
brw_eu_util.c \
- brw_ff_gs.c \
- brw_ff_gs_emit.c \
- brw_ff_gs.h \
brw_fs_builder.h \
brw_fs_channel_expressions.cpp \
brw_fs_cmod_propagation.cpp \
brw_fs_live_variables.cpp \
brw_fs_live_variables.h \
brw_fs_nir.cpp \
- brw_fs_peephole_predicated_break.cpp \
brw_fs_reg_allocate.cpp \
brw_fs_register_coalesce.cpp \
brw_fs_saturate_propagation.cpp \
brw_fs_validate.cpp \
brw_fs_vector_splitting.cpp \
brw_fs_visitor.cpp \
- brw_gs.c \
- brw_gs.h \
- brw_gs_state.c \
- brw_gs_surface_state.c \
brw_inst.h \
brw_interpolation_map.c \
brw_ir_allocator.h \
brw_ir_vec4.h \
brw_lower_texture_gradients.cpp \
brw_lower_unnormalized_offset.cpp \
- brw_meta_fast_clear.c \
- brw_meta_stencil_blit.c \
- brw_meta_updownsample.c \
- brw_meta_util.c \
- brw_meta_util.h \
- brw_misc_state.c \
- brw_multisample_state.h \
brw_nir.h \
brw_nir.c \
brw_nir_analyze_boolean_resolves.c \
brw_nir_uniforms.cpp \
- brw_object_purgeable.c \
brw_packed_float.c \
- brw_performance_monitor.c \
- brw_pipe_control.c \
- brw_primitive_restart.c \
- brw_program.c \
- brw_program.h \
- brw_queryobj.c \
+ brw_predicated_break.cpp \
brw_reg.h \
- brw_reset.c \
- brw_sampler_state.c \
brw_schedule_instructions.cpp \
- brw_sf.c \
- brw_sf_emit.c \
- brw_sf.h \
- brw_sf_state.c \
brw_shader.cpp \
brw_shader.h \
- brw_state_batch.c \
- brw_state_cache.c \
- brw_state_dump.c \
- brw_state.h \
- brw_state_upload.c \
- brw_structs.h \
brw_surface_formats.c \
- brw_tex.c \
- brw_tex_layout.c \
- brw_urb.c \
brw_util.c \
brw_util.h \
brw_vec4_builder.h \
brw_vec4_surface_builder.h \
brw_vec4_visitor.cpp \
brw_vec4_vs_visitor.cpp \
+ brw_vue_map.c \
+ brw_wm_iz.cpp \
+ gen6_gs_visitor.cpp \
+ gen6_gs_visitor.h \
+ intel_asm_annotation.c \
+ intel_asm_annotation.h \
+ intel_debug.c \
+ intel_debug.h \
+ intel_reg.h \
+ intel_resolve_map.c \
+ intel_resolve_map.h
+
+i965_FILES = \
+ brw_binding_tables.c \
+ brw_blorp_blit.cpp \
+ brw_blorp_blit_eu.cpp \
+ brw_blorp_blit_eu.h \
+ brw_blorp.cpp \
+ brw_blorp.h \
+ brw_cc.c \
+ brw_clear.c \
+ brw_clip.c \
+ brw_clip.h \
+ brw_clip_line.c \
+ brw_clip_point.c \
+ brw_clip_state.c \
+ brw_clip_tri.c \
+ brw_clip_unfilled.c \
+ brw_clip_util.c \
+ brw_compute.c \
+ brw_conditional_render.c \
+ brw_context.c \
+ brw_context.h \
+ brw_cs.c \
+ brw_cs.h \
+ brw_curbe.c \
+ brw_draw.c \
+ brw_draw.h \
+ brw_draw_upload.c \
+ brw_ff_gs.c \
+ brw_ff_gs_emit.c \
+ brw_ff_gs.h \
+ brw_gs.c \
+ brw_gs.h \
+ brw_gs_state.c \
+ brw_gs_surface_state.c \
+ brw_link.cpp \
+ brw_meta_fast_clear.c \
+ brw_meta_stencil_blit.c \
+ brw_meta_updownsample.c \
+ brw_meta_util.c \
+ brw_meta_util.h \
+ brw_misc_state.c \
+ brw_multisample_state.h \
+ brw_object_purgeable.c \
+ brw_performance_monitor.c \
+ brw_pipe_control.c \
+ brw_program.c \
+ brw_program.h \
+ brw_primitive_restart.c \
+ brw_queryobj.c \
+ brw_reset.c \
+ brw_sampler_state.c \
+ brw_sf.c \
+ brw_sf_emit.c \
+ brw_sf.h \
+ brw_sf_state.c \
+ brw_state_batch.c \
+ brw_state_cache.c \
+ brw_state_dump.c \
+ brw_state.h \
+ brw_state_upload.c \
+ brw_structs.h \
+ brw_tex.c \
+ brw_tex_layout.c \
+ brw_urb.c \
brw_vs.c \
brw_vs.h \
brw_vs_state.c \
brw_vs_surface_state.c \
- brw_vue_map.c \
brw_wm.c \
brw_wm.h \
- brw_wm_iz.cpp \
brw_wm_state.c \
brw_wm_surface_state.c \
gen6_blorp.cpp \
gen6_depth_state.c \
gen6_depthstencil.c \
gen6_gs_state.c \
- gen6_gs_visitor.cpp \
- gen6_gs_visitor.h \
gen6_multisample_state.c \
gen6_queryobj.c \
gen6_sampler_state.c \
gen8_viewport_state.c \
gen8_vs_state.c \
gen8_wm_depth_stencil.c \
- intel_asm_annotation.c \
- intel_asm_annotation.h \
intel_batchbuffer.c \
intel_batchbuffer.h \
intel_blit.c \
intel_buffers.c \
intel_buffers.h \
intel_copy_image.c \
- intel_debug.c \
- intel_debug.h \
intel_extensions.c \
intel_fbo.c \
intel_fbo.h \
intel_pixel_draw.c \
intel_pixel.h \
intel_pixel_read.c \
- intel_reg.h \
- intel_resolve_map.c \
- intel_resolve_map.h \
intel_screen.c \
intel_screen.h \
intel_state.c \
/* _NEW_POLYGON, _NEW_BUFFERS */
key.offset_units = ctx->Polygon.OffsetUnits * ctx->DrawBuffer->_MRD * 2;
key.offset_factor = ctx->Polygon.OffsetFactor * ctx->DrawBuffer->_MRD;
+ key.offset_clamp = ctx->Polygon.OffsetClamp * ctx->DrawBuffer->_MRD;
}
if (!ctx->Polygon._FrontBit) {
GLfloat offset_factor;
GLfloat offset_units;
+ GLfloat offset_clamp;
};
GLfloat bc = dir.y * iz;
offset = ctx->Polygon.OffsetUnits * DEPTH_SCALE;
offset += MAX2( abs(ac), abs(bc) ) * ctx->Polygon.OffsetFactor;
+ if (ctx->Polygon.OffsetClamp && isfinite(ctx->Polygon.OffsetClamp)) {
+ if (ctx->Polygon.OffsetClamp < 0)
+ offset = MAX2( offset, ctx->Polygon.OffsetClamp );
+ else
+ offset = MIN2( offset, ctx->Polygon.OffsetClamp );
+ }
offset *= MRD;
*/
static void compute_offset( struct brw_clip_compile *c )
brw_MUL(p, vec1(off), vec1(off), brw_imm_f(c->key.offset_factor));
brw_ADD(p, vec1(off), vec1(off), brw_imm_f(c->key.offset_units));
+ if (c->key.offset_clamp && isfinite(c->key.offset_clamp)) {
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ c->key.offset_clamp < 0 ? BRW_CONDITIONAL_GE : BRW_CONDITIONAL_L,
+ vec1(off),
+ brw_imm_f(c->key.offset_clamp));
+ brw_SEL(p, vec1(off), vec1(off), brw_imm_f(c->key.offset_clamp));
+ }
}
#include "main/version.h"
#include "main/vtxfmt.h"
#include "main/texobj.h"
+#include "main/framebuffer.h"
#include "vbo/vbo_context.h"
ctx->Const.MaxUniformBlockSize = 65536;
for (int i = 0; i < MESA_SHADER_STAGES; i++) {
struct gl_program_constants *prog = &ctx->Const.Program[i];
- prog->MaxUniformBlocks = 12;
+ prog->MaxUniformBlocks = BRW_MAX_UBO;
prog->MaxCombinedUniformComponents =
prog->MaxUniformComponents +
ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks;
ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = 0;
if (_mesa_extension_override_enables.ARB_compute_shader) {
ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
- ctx->Const.MaxUniformBufferBindings += 12;
+ ctx->Const.MaxUniformBufferBindings += BRW_MAX_UBO;
} else {
ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = 0;
}
/* FIXME: Tessellation stages are not yet supported in i965, so
* MaxCombinedShaderStorageBlocks doesn't take them into account.
*/
- ctx->Const.Program[MESA_SHADER_VERTEX].MaxShaderStorageBlocks = 12;
- ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxShaderStorageBlocks = 12;
+ ctx->Const.Program[MESA_SHADER_VERTEX].MaxShaderStorageBlocks = BRW_MAX_SSBO;
+ ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxShaderStorageBlocks = BRW_MAX_SSBO;
ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxShaderStorageBlocks = 0;
ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxShaderStorageBlocks = 0;
- ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxShaderStorageBlocks = 12;
- ctx->Const.Program[MESA_SHADER_COMPUTE].MaxShaderStorageBlocks = 12;
- ctx->Const.MaxCombinedShaderStorageBlocks = 12 * 3;
- ctx->Const.MaxShaderStorageBufferBindings = 36;
+ ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxShaderStorageBlocks = BRW_MAX_SSBO;
+ ctx->Const.Program[MESA_SHADER_COMPUTE].MaxShaderStorageBlocks = BRW_MAX_SSBO;
+ ctx->Const.MaxCombinedShaderStorageBlocks = BRW_MAX_SSBO * 3;
+ ctx->Const.MaxShaderStorageBufferBindings = BRW_MAX_SSBO * 3;
if (_mesa_extension_override_enables.ARB_compute_shader)
- ctx->Const.MaxShaderStorageBufferBindings += 12;
+ ctx->Const.MaxShaderStorageBufferBindings += BRW_MAX_SSBO;
if (brw->gen >= 6) {
ctx->Const.MaxVarying = 32;
* that will happen next will probably dirty the front buffer. So
* mark it as dirty here.
*/
- if (brw_is_front_buffer_drawing(ctx->DrawBuffer))
+ if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer))
brw->front_buffer_dirty = true;
}
back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
memset(attachments, 0, sizeof(attachments));
- if ((brw_is_front_buffer_drawing(fb) ||
- brw_is_front_buffer_reading(fb) ||
+ if ((_mesa_is_front_buffer_drawing(fb) ||
+ _mesa_is_front_buffer_reading(fb) ||
!back_rb) && front_rb) {
/* If a fake front buffer is in use, then querying for
* __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
drawable->w, drawable->h,
buffer->pitch);
- if (brw_is_front_buffer_drawing(fb) &&
+ if (_mesa_is_front_buffer_drawing(fb) &&
(buffer->attachment == __DRI_BUFFER_FRONT_LEFT ||
buffer->attachment == __DRI_BUFFER_FAKE_FRONT_LEFT) &&
rb->Base.Base.NumSamples > 1) {
buffer->width, buffer->height,
buffer->pitch);
- if (brw_is_front_buffer_drawing(fb) &&
+ if (_mesa_is_front_buffer_drawing(fb) &&
buffer_type == __DRI_IMAGE_BUFFER_FRONT &&
rb->Base.Base.NumSamples > 1) {
intel_renderbuffer_upsample(intel, rb);
else
return;
- if (front_rb && (brw_is_front_buffer_drawing(fb) ||
- brw_is_front_buffer_reading(fb) || !back_rb)) {
+ if (front_rb && (_mesa_is_front_buffer_drawing(fb) ||
+ _mesa_is_front_buffer_reading(fb) || !back_rb)) {
buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
}
unsigned simd_size;
bool uses_barrier;
bool uses_num_work_groups;
+ unsigned local_invocation_id_regs;
struct {
/** @{
/** Max number of render targets in a shader */
#define BRW_MAX_DRAW_BUFFERS 8
+/** Max number of UBOs in a shader */
+#define BRW_MAX_UBO 12
+
+/** Max number of SSBOs in a shader */
+#define BRW_MAX_SSBO 12
+
+/** Max number of combined UBOs and SSBOs in a shader */
+#define BRW_MAX_COMBINED_UBO_SSBO (BRW_MAX_UBO + BRW_MAX_SSBO)
+
/** Max number of atomic counter buffer objects in a shader */
#define BRW_MAX_ABO 16
#define BRW_MAX_SURFACES (BRW_MAX_DRAW_BUFFERS + \
BRW_MAX_TEX_UNIT * 2 + /* normal, gather */ \
- 12 + /* ubo */ \
+ BRW_MAX_UBO + \
+ BRW_MAX_SSBO + \
BRW_MAX_ABO + \
BRW_MAX_IMAGES + \
2 + /* shader time, pull constants */ \
*/
drm_intel_bo *multisampled_null_render_target_bo;
uint32_t fast_clear_op;
+
+ float offset_clamp;
} wm;
struct {
*/
void brwInitFragProgFuncs( struct dd_function_table *functions );
-int brw_get_scratch_size(int size);
+/* Per-thread scratch space is a power-of-two multiple of 1KB. */
+static inline int
+brw_get_scratch_size(int size)
+{
+ return util_next_power_of_two(size | 1023);
+}
void brw_get_scratch_bo(struct brw_context *brw,
drm_intel_bo **scratch_bo, int size);
void brw_init_shader_time(struct brw_context *brw);
start_time = get_time();
}
+ if (unlikely(INTEL_DEBUG & DEBUG_CS))
+ brw_dump_ir("compute", prog, &cs->base, &cp->program.Base);
+
+ int st_index = -1;
+ if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+ st_index = brw_get_shader_time_index(brw, prog, &cp->program.Base, ST_CS);
+
program = brw_cs_emit(brw, mem_ctx, key, &prog_data,
- &cp->program, prog, &program_size);
+ &cp->program, prog, st_index, &program_size);
if (program == NULL) {
ralloc_free(mem_ctx);
return false;
struct brw_cs_prog_data *prog_data,
struct gl_compute_program *cp,
struct gl_shader_program *prog,
+ int shader_time_index,
unsigned *final_assembly_size);
-unsigned
-brw_cs_prog_local_id_payload_dwords(unsigned dispatch_width);
+void
+brw_cs_fill_local_id_payload(const struct brw_cs_prog_data *cs_prog_data,
+ void *buffer, uint32_t threads, uint32_t stride);
#ifdef __cplusplus
}
#include "main/enums.h"
#include "main/macros.h"
#include "main/transformfeedback.h"
+#include "main/framebuffer.h"
#include "tnl/tnl.h"
#include "vbo/vbo_context.h"
#include "swrast/swrast.h"
#define FILE_DEBUG_FLAG DEBUG_PRIMS
static const GLuint prim_to_hw_prim[GL_TRIANGLE_STRIP_ADJACENCY+1] = {
- _3DPRIM_POINTLIST,
- _3DPRIM_LINELIST,
- _3DPRIM_LINELOOP,
- _3DPRIM_LINESTRIP,
- _3DPRIM_TRILIST,
- _3DPRIM_TRISTRIP,
- _3DPRIM_TRIFAN,
- _3DPRIM_QUADLIST,
- _3DPRIM_QUADSTRIP,
- _3DPRIM_POLYGON,
- _3DPRIM_LINELIST_ADJ,
- _3DPRIM_LINESTRIP_ADJ,
- _3DPRIM_TRILIST_ADJ,
- _3DPRIM_TRISTRIP_ADJ,
+ [GL_POINTS] =_3DPRIM_POINTLIST,
+ [GL_LINES] = _3DPRIM_LINELIST,
+ [GL_LINE_LOOP] = _3DPRIM_LINELOOP,
+ [GL_LINE_STRIP] = _3DPRIM_LINESTRIP,
+ [GL_TRIANGLES] = _3DPRIM_TRILIST,
+ [GL_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
+ [GL_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
+ [GL_QUADS] = _3DPRIM_QUADLIST,
+ [GL_QUAD_STRIP] = _3DPRIM_QUADSTRIP,
+ [GL_POLYGON] = _3DPRIM_POLYGON,
+ [GL_LINES_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
+ [GL_LINE_STRIP_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
+ [GL_TRIANGLES_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
+ [GL_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
};
static const GLenum reduced_prim[GL_POLYGON+1] = {
- GL_POINTS,
- GL_LINES,
- GL_LINES,
- GL_LINES,
- GL_TRIANGLES,
- GL_TRIANGLES,
- GL_TRIANGLES,
- GL_TRIANGLES,
- GL_TRIANGLES,
- GL_TRIANGLES
+ [GL_POINTS] = GL_POINTS,
+ [GL_LINES] = GL_LINES,
+ [GL_LINE_LOOP] = GL_LINES,
+ [GL_LINE_STRIP] = GL_LINES,
+ [GL_TRIANGLES] = GL_TRIANGLES,
+ [GL_TRIANGLE_STRIP] = GL_TRIANGLES,
+ [GL_TRIANGLE_FAN] = GL_TRIANGLES,
+ [GL_QUADS] = GL_TRIANGLES,
+ [GL_QUAD_STRIP] = GL_TRIANGLES,
+ [GL_POLYGON] = GL_TRIANGLES
};
uint32_t
struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
struct gl_renderbuffer_attachment *depth_att = &fb->Attachment[BUFFER_DEPTH];
- if (brw_is_front_buffer_drawing(fb))
+ if (_mesa_is_front_buffer_drawing(fb))
front_irb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
if (front_irb)
inst->regs_written = regs_written;
if (devinfo->gen < 7) {
- inst->base_mrf = 13;
+ inst->base_mrf = FIRST_PULL_LOAD_MRF(devinfo->gen);
inst->header_size = 1;
if (devinfo->gen == 4)
inst->mlen = 3;
bool
fs_visitor::remove_duplicate_mrf_writes()
{
- fs_inst *last_mrf_move[16];
+ fs_inst *last_mrf_move[BRW_MAX_MRF(devinfo->gen)];
bool progress = false;
/* Need to update the MRF tracking for compressed instructions. */
* else does except for register spill/unspill, which generates and
* uses its MRF within a single IR instruction.
*/
- inst->base_mrf = 14;
+ inst->base_mrf = FIRST_PULL_LOAD_MRF(devinfo->gen) + 1;
inst->mlen = 1;
}
}
payload.num_regs = 2;
}
+/**
+ * We are building the local ID push constant data using the simplest possible
+ * method. We simply push the local IDs directly as they should appear in the
+ * registers for the uvec3 gl_LocalInvocationID variable.
+ *
+ * Therefore, for SIMD8, we use 3 full registers, and for SIMD16 we use 6
+ * registers worth of push constant space.
+ *
+ * Note: Any updates to brw_cs_prog_local_id_payload_dwords,
+ * fill_local_id_payload or fs_visitor::emit_cs_local_invocation_id_setup need
+ * to coordinated.
+ *
+ * FINISHME: There are a few easy optimizations to consider.
+ *
+ * 1. If gl_WorkGroupSize x, y or z is 1, we can just use zero, and there is
+ * no need for using push constant space for that dimension.
+ *
+ * 2. Since GL_MAX_COMPUTE_WORK_GROUP_SIZE is currently 1024 or less, we can
+ * easily use 16-bit words rather than 32-bit dwords in the push constant
+ * data.
+ *
+ * 3. If gl_WorkGroupSize x, y or z is small, then we can use bytes for
+ * conveying the data, and thereby reduce push constant usage.
+ *
+ */
void
fs_visitor::setup_cs_payload()
{
assert(devinfo->gen >= 7);
+ brw_cs_prog_data *prog_data = (brw_cs_prog_data*) this->prog_data;
payload.num_regs = 1;
if (nir->info.system_values_read & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
- const unsigned local_id_dwords =
- brw_cs_prog_local_id_payload_dwords(dispatch_width);
- assert((local_id_dwords & 0x7) == 0);
- const unsigned local_id_regs = local_id_dwords / 8;
+ prog_data->local_invocation_id_regs = dispatch_width * 3 / 8;
payload.local_invocation_id_reg = payload.num_regs;
- payload.num_regs += local_id_regs;
+ payload.num_regs += prog_data->local_invocation_id_regs;
}
}
OPT(opt_algebraic);
OPT(opt_cse);
OPT(opt_copy_propagate);
- OPT(opt_peephole_predicated_break);
+ OPT(opt_predicated_break, this);
OPT(opt_cmod_propagation);
OPT(dead_code_eliminate);
OPT(opt_peephole_sel);
struct brw_wm_prog_data *prog_data,
struct gl_fragment_program *fp,
struct gl_shader_program *prog,
+ int shader_time_index8, int shader_time_index16,
unsigned *final_assembly_size)
{
- struct brw_shader *shader = NULL;
- if (prog)
- shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
-
- if (unlikely(INTEL_DEBUG & DEBUG_WM) && shader->base.ir)
- brw_dump_ir("fragment", prog, &shader->base, &fp->Base);
-
- int st_index8 = -1, st_index16 = -1;
- if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
- st_index8 = brw_get_shader_time_index(brw, prog, &fp->Base, ST_FS8);
- st_index16 = brw_get_shader_time_index(brw, prog, &fp->Base, ST_FS16);
- }
-
/* Now the main event: Visit the shader IR and generate our FS IR for it.
*/
fs_visitor v(brw->intelScreen->compiler, brw, mem_ctx, key,
- &prog_data->base, &fp->Base, fp->Base.nir, 8, st_index8);
+ &prog_data->base, &fp->Base, fp->Base.nir, 8, shader_time_index8);
if (!v.run_fs(false /* do_rep_send */)) {
if (prog) {
prog->LinkStatus = false;
cfg_t *simd16_cfg = NULL;
fs_visitor v2(brw->intelScreen->compiler, brw, mem_ctx, key,
- &prog_data->base, &fp->Base, fp->Base.nir, 16, st_index16);
+ &prog_data->base, &fp->Base, fp->Base.nir, 16, shader_time_index16);
if (likely(!(INTEL_DEBUG & DEBUG_NO16) || brw->use_rep_send)) {
if (!v.simd16_unsupported) {
/* Try a SIMD16 compile */
return g.get_assembly(final_assembly_size);
}
+void
+brw_cs_fill_local_id_payload(const struct brw_cs_prog_data *prog_data,
+ void *buffer, uint32_t threads, uint32_t stride)
+{
+ if (prog_data->local_invocation_id_regs == 0)
+ return;
+
+ /* 'stride' should be an integer number of registers, that is, a multiple
+ * of 32 bytes.
+ */
+ assert(stride % 32 == 0);
+
+ unsigned x = 0, y = 0, z = 0;
+ for (unsigned t = 0; t < threads; t++) {
+ uint32_t *param = (uint32_t *) buffer + stride * t / 4;
+
+ for (unsigned i = 0; i < prog_data->simd_size; i++) {
+ param[0 * prog_data->simd_size + i] = x;
+ param[1 * prog_data->simd_size + i] = y;
+ param[2 * prog_data->simd_size + i] = z;
+
+ x++;
+ if (x == prog_data->local_size[0]) {
+ x = 0;
+ y++;
+ if (y == prog_data->local_size[1]) {
+ y = 0;
+ z++;
+ if (z == prog_data->local_size[2])
+ z = 0;
+ }
+ }
+ }
+ }
+}
+
fs_reg *
fs_visitor::emit_cs_local_invocation_id_setup()
{
struct brw_cs_prog_data *prog_data,
struct gl_compute_program *cp,
struct gl_shader_program *prog,
+ int shader_time_index,
unsigned *final_assembly_size)
{
- struct brw_shader *shader =
- (struct brw_shader *) prog->_LinkedShaders[MESA_SHADER_COMPUTE];
-
- if (unlikely(INTEL_DEBUG & DEBUG_CS))
- brw_dump_ir("compute", prog, &shader->base, &cp->Base);
-
prog_data->local_size[0] = cp->LocalSize[0];
prog_data->local_size[1] = cp->LocalSize[1];
prog_data->local_size[2] = cp->LocalSize[2];
unsigned local_workgroup_size =
cp->LocalSize[0] * cp->LocalSize[1] * cp->LocalSize[2];
+ unsigned max_cs_threads = brw->intelScreen->compiler->devinfo->max_cs_threads;
cfg_t *cfg = NULL;
const char *fail_msg = NULL;
- int st_index = -1;
- if (INTEL_DEBUG & DEBUG_SHADER_TIME)
- st_index = brw_get_shader_time_index(brw, prog, &cp->Base, ST_CS);
-
/* Now the main event: Visit the shader IR and generate our CS IR for it.
*/
fs_visitor v8(brw->intelScreen->compiler, brw, mem_ctx, key,
- &prog_data->base, &cp->Base, cp->Base.nir, 8, st_index);
+ &prog_data->base, &cp->Base, cp->Base.nir, 8, shader_time_index);
if (!v8.run_cs()) {
fail_msg = v8.fail_msg;
- } else if (local_workgroup_size <= 8 * brw->max_cs_threads) {
+ } else if (local_workgroup_size <= 8 * max_cs_threads) {
cfg = v8.cfg;
prog_data->simd_size = 8;
}
fs_visitor v16(brw->intelScreen->compiler, brw, mem_ctx, key,
- &prog_data->base, &cp->Base, cp->Base.nir, 16, st_index);
+ &prog_data->base, &cp->Base, cp->Base.nir, 16, shader_time_index);
if (likely(!(INTEL_DEBUG & DEBUG_NO16)) &&
!fail_msg && !v8.simd16_unsupported &&
- local_workgroup_size <= 16 * brw->max_cs_threads) {
+ local_workgroup_size <= 16 * max_cs_threads) {
/* Try a SIMD16 compile */
v16.import_uniforms(&v8);
if (!v16.run_cs()) {
+++ /dev/null
-/*
- * Copyright © 2013 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "brw_fs.h"
-#include "brw_cfg.h"
-
-using namespace brw;
-
-/** @file brw_fs_peephole_predicated_break.cpp
- *
- * Loops are often structured as
- *
- * loop:
- * CMP.f0
- * (+f0) IF
- * BREAK
- * ENDIF
- * ...
- * WHILE loop
- *
- * This peephole pass removes the IF and ENDIF instructions and predicates the
- * BREAK, dropping two instructions from the loop body.
- *
- * If the loop was a DO { ... } WHILE loop, it looks like
- *
- * loop:
- * ...
- * CMP.f0
- * (+f0) IF
- * BREAK
- * ENDIF
- * WHILE loop
- *
- * and we can remove the BREAK instruction and predicate the WHILE.
- */
-
-bool
-fs_visitor::opt_peephole_predicated_break()
-{
- bool progress = false;
-
- foreach_block (block, cfg) {
- if (block->start_ip != block->end_ip)
- continue;
-
- /* BREAK and CONTINUE instructions, by definition, can only be found at
- * the ends of basic blocks.
- */
- fs_inst *jump_inst = (fs_inst *)block->end();
- if (jump_inst->opcode != BRW_OPCODE_BREAK &&
- jump_inst->opcode != BRW_OPCODE_CONTINUE)
- continue;
-
- fs_inst *if_inst = (fs_inst *)block->prev()->end();
- if (if_inst->opcode != BRW_OPCODE_IF)
- continue;
-
- fs_inst *endif_inst = (fs_inst *)block->next()->start();
- if (endif_inst->opcode != BRW_OPCODE_ENDIF)
- continue;
-
- bblock_t *jump_block = block;
- bblock_t *if_block = jump_block->prev();
- bblock_t *endif_block = jump_block->next();
-
- /* For Sandybridge with IF with embedded comparison we need to emit an
- * instruction to set the flag register.
- */
- if (devinfo->gen == 6 && if_inst->conditional_mod) {
- const fs_builder ibld(this, if_block, if_inst);
- ibld.CMP(ibld.null_reg_d(), if_inst->src[0], if_inst->src[1],
- if_inst->conditional_mod);
- jump_inst->predicate = BRW_PREDICATE_NORMAL;
- } else {
- jump_inst->predicate = if_inst->predicate;
- jump_inst->predicate_inverse = if_inst->predicate_inverse;
- }
-
- bblock_t *earlier_block = if_block;
- if (if_block->start_ip == if_block->end_ip) {
- earlier_block = if_block->prev();
- }
-
- if_inst->remove(if_block);
-
- bblock_t *later_block = endif_block;
- if (endif_block->start_ip == endif_block->end_ip) {
- later_block = endif_block->next();
- }
- endif_inst->remove(endif_block);
-
- if (!earlier_block->ends_with_control_flow()) {
- earlier_block->children.make_empty();
- earlier_block->add_successor(cfg->mem_ctx, jump_block);
- }
-
- if (!later_block->starts_with_control_flow()) {
- later_block->parents.make_empty();
- }
- jump_block->add_successor(cfg->mem_ctx, later_block);
-
- if (earlier_block->can_combine_with(jump_block)) {
- earlier_block->combine_with(jump_block);
-
- block = earlier_block;
- }
-
- /* Now look at the first instruction of the block following the BREAK. If
- * it's a WHILE, we can delete the break, predicate the WHILE, and join
- * the two basic blocks.
- */
- bblock_t *while_block = earlier_block->next();
- fs_inst *while_inst = (fs_inst *)while_block->start();
-
- if (jump_inst->opcode == BRW_OPCODE_BREAK &&
- while_inst->opcode == BRW_OPCODE_WHILE &&
- while_inst->predicate == BRW_PREDICATE_NONE) {
- jump_inst->remove(earlier_block);
- while_inst->predicate = jump_inst->predicate;
- while_inst->predicate_inverse = !jump_inst->predicate_inverse;
-
- earlier_block->children.make_empty();
- earlier_block->add_successor(cfg->mem_ctx, while_block);
-
- assert(earlier_block->can_combine_with(while_block));
- earlier_block->combine_with(while_block);
-
- earlier_block->next()->parents.make_empty();
- earlier_block->add_successor(cfg->mem_ctx, earlier_block->next());
- }
-
- progress = true;
- }
-
- if (progress)
- invalidate_live_intervals();
-
- return progress;
-}
#include "glsl/glsl_types.h"
#include "glsl/ir_optimization.h"
-#define FIRST_SPILL_MRF(gen) (gen == 6 ? 21 : 13)
-
using namespace brw;
static void
if (movs == 0)
continue;
- enum brw_predicate predicate;
- bool predicate_inverse;
- if (devinfo->gen == 6 && if_inst->conditional_mod) {
- /* For Sandybridge with IF with embedded comparison */
- predicate = BRW_PREDICATE_NORMAL;
- predicate_inverse = false;
- } else {
- /* Separate CMP and IF instructions */
- predicate = if_inst->predicate;
- predicate_inverse = if_inst->predicate_inverse;
- }
-
/* Generate SEL instructions for pairs of MOVs to a common destination. */
for (int i = 0; i < movs; i++) {
if (!then_mov[i] || !else_mov[i])
if (movs == 0)
continue;
- /* Emit a CMP if our IF used the embedded comparison */
- if (devinfo->gen == 6 && if_inst->conditional_mod) {
- const fs_builder ibld(this, block, if_inst);
- ibld.CMP(ibld.null_reg_d(), if_inst->src[0], if_inst->src[1],
- if_inst->conditional_mod);
- }
-
for (int i = 0; i < movs; i++) {
const fs_builder ibld = fs_builder(this, then_block, then_mov[i])
.at(block, if_inst);
ibld.MOV(src0, then_mov[i]->src[0]);
}
- set_predicate_inv(predicate, predicate_inverse,
+ set_predicate_inv(if_inst->predicate, if_inst->predicate_inverse,
ibld.SEL(then_mov[i]->dst, src0,
else_mov[i]->src[0]));
}
}
bool
-brw_compile_gs_prog(struct brw_context *brw,
+brw_codegen_gs_prog(struct brw_context *brw,
struct gl_shader_program *prog,
struct brw_geometry_program *gp,
- struct brw_gs_prog_key *key,
- struct brw_gs_compile_output *output)
+ struct brw_gs_prog_key *key)
{
+ struct brw_stage_state *stage_state = &brw->gs.base;
struct brw_gs_compile c;
memset(&c, 0, sizeof(c));
c.key = *key;
c.gp = gp;
- /* We get the bind map as input in the output struct...*/
- c.prog_data.base.base.map_entries = output->prog_data.base.base.map_entries;
- memcpy(c.prog_data.base.base.bind_map, output->prog_data.base.base.bind_map,
- sizeof(c.prog_data.base.base.bind_map));
-
c.prog_data.include_primitive_id =
(gp->program.Base.InputsRead & VARYING_BIT_PRIMITIVE_ID) != 0;
*/
c.prog_data.base.urb_read_length = (c.input_vue_map.num_slots + 1) / 2;
+ if (unlikely(INTEL_DEBUG & DEBUG_GS))
+ brw_dump_ir("geometry", prog, gs, NULL);
+
+ int st_index = -1;
+ if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+ st_index = brw_get_shader_time_index(brw, prog, NULL, ST_GS);
+
void *mem_ctx = ralloc_context(NULL);
unsigned program_size;
const unsigned *program =
- brw_gs_emit(brw, prog, &c, mem_ctx, &program_size);
+ brw_gs_emit(brw, prog, &c, mem_ctx, st_index, &program_size);
if (program == NULL) {
ralloc_free(mem_ctx);
return false;
}
- output->mem_ctx = mem_ctx;
- output->program = program;
- output->program_size = program_size;
- memcpy(&output->prog_data, &c.prog_data,
- sizeof(output->prog_data));
-
- return true;
-}
-
-bool
-brw_codegen_gs_prog(struct brw_context *brw,
- struct gl_shader_program *prog,
- struct brw_geometry_program *gp,
- struct brw_gs_prog_key *key)
-{
- struct brw_gs_compile_output output;
- struct brw_stage_state *stage_state = &brw->gs.base;
-
- if (brw_compile_gs_prog(brw, prog, gp, key, &output))
- return false;
-
- if (output.prog_data.base.base.total_scratch) {
+ /* Scratch space is used for register spilling */
+ if (c.prog_data.base.base.total_scratch) {
brw_get_scratch_bo(brw, &stage_state->scratch_bo,
- output.prog_data.base.base.total_scratch *
+ c.prog_data.base.base.total_scratch *
brw->max_gs_threads);
}
brw_upload_cache(&brw->cache, BRW_CACHE_GS_PROG,
- key, sizeof(*key),
- output.program, output.program_size,
- &output.prog_data, sizeof(output.prog_data),
+ &c.key, sizeof(c.key),
+ program, program_size,
+ &c.prog_data, sizeof(c.prog_data),
&stage_state->prog_offset, &brw->gs.prog_data);
- ralloc_free(output.mem_ctx);
+ ralloc_free(mem_ctx);
return true;
}
struct gl_shader_program;
struct gl_program;
-struct brw_gs_compile_output {
- void *mem_ctx;
- const void *program;
- uint32_t program_size;
- struct brw_gs_prog_data prog_data;
-};
-
-struct brw_gs_prog_key;
-
-bool
-brw_compile_gs_prog(struct brw_context *brw,
- struct gl_shader_program *prog,
- struct brw_geometry_program *gp,
- struct brw_gs_prog_key *key,
- struct brw_gs_compile_output *output);
-
-bool brw_gs_prog_data_compare(const void *a, const void *b);
-
void
brw_upload_gs_prog(struct brw_context *brw);
/** Maximum SEND message length */
#define BRW_MAX_MSG_LENGTH 15
+/** First MRF register used by pull loads */
+#define FIRST_SPILL_MRF(gen) ((gen) == 6 ? 21 : 13)
+
+/** First MRF register used by spills */
+#define FIRST_PULL_LOAD_MRF(gen) ((gen) == 6 ? 16 : 13)
+
/* brw_context.h has a forward declaration of brw_inst, so name the struct. */
typedef struct brw_inst {
uint64_t data[2];
--- /dev/null
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "main/macros.h"
+#include "brw_context.h"
+#include "brw_vs.h"
+#include "brw_gs.h"
+#include "brw_fs.h"
+#include "brw_cfg.h"
+#include "brw_nir.h"
+#include "glsl/ir_optimization.h"
+#include "glsl/glsl_parser_extras.h"
+#include "main/shaderapi.h"
+
+/**
+ * Performs a compile of the shader stages even when we don't know
+ * what non-orthogonal state will be set, in the hope that it reflects
+ * the eventual NOS used, and thus allows us to produce link failures.
+ */
+static bool
+brw_shader_precompile(struct gl_context *ctx,
+ struct gl_shader_program *sh_prog)
+{
+ struct gl_shader *vs = sh_prog->_LinkedShaders[MESA_SHADER_VERTEX];
+ struct gl_shader *gs = sh_prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
+ struct gl_shader *fs = sh_prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
+ struct gl_shader *cs = sh_prog->_LinkedShaders[MESA_SHADER_COMPUTE];
+
+ if (fs && !brw_fs_precompile(ctx, sh_prog, fs->Program))
+ return false;
+
+ if (gs && !brw_gs_precompile(ctx, sh_prog, gs->Program))
+ return false;
+
+ if (vs && !brw_vs_precompile(ctx, sh_prog, vs->Program))
+ return false;
+
+ if (cs && !brw_cs_precompile(ctx, sh_prog, cs->Program))
+ return false;
+
+ return true;
+}
+
+static void
+brw_lower_packing_builtins(struct brw_context *brw,
+ gl_shader_stage shader_type,
+ exec_list *ir)
+{
+ int ops = LOWER_PACK_SNORM_2x16
+ | LOWER_UNPACK_SNORM_2x16
+ | LOWER_PACK_UNORM_2x16
+ | LOWER_UNPACK_UNORM_2x16;
+
+ if (is_scalar_shader_stage(brw->intelScreen->compiler, shader_type)) {
+ ops |= LOWER_UNPACK_UNORM_4x8
+ | LOWER_UNPACK_SNORM_4x8
+ | LOWER_PACK_UNORM_4x8
+ | LOWER_PACK_SNORM_4x8;
+ }
+
+ if (brw->gen >= 7) {
+ /* Gen7 introduced the f32to16 and f16to32 instructions, which can be
+ * used to execute packHalf2x16 and unpackHalf2x16. For AOS code, no
+ * lowering is needed. For SOA code, the Half2x16 ops must be
+ * scalarized.
+ */
+ if (is_scalar_shader_stage(brw->intelScreen->compiler, shader_type)) {
+ ops |= LOWER_PACK_HALF_2x16_TO_SPLIT
+ | LOWER_UNPACK_HALF_2x16_TO_SPLIT;
+ }
+ } else {
+ ops |= LOWER_PACK_HALF_2x16
+ | LOWER_UNPACK_HALF_2x16;
+ }
+
+ lower_packing_builtins(ir, ops);
+}
+
+static void
+process_glsl_ir(gl_shader_stage stage,
+ struct brw_context *brw,
+ struct gl_shader_program *shader_prog,
+ struct gl_shader *shader)
+{
+ struct gl_context *ctx = &brw->ctx;
+ const struct gl_shader_compiler_options *options =
+ &ctx->Const.ShaderCompilerOptions[shader->Stage];
+
+ /* Temporary memory context for any new IR. */
+ void *mem_ctx = ralloc_context(NULL);
+
+ ralloc_adopt(mem_ctx, shader->ir);
+
+ /* lower_packing_builtins() inserts arithmetic instructions, so it
+ * must precede lower_instructions().
+ */
+ brw_lower_packing_builtins(brw, shader->Stage, shader->ir);
+ do_mat_op_to_vec(shader->ir);
+ const int bitfield_insert = brw->gen >= 7 ? BITFIELD_INSERT_TO_BFM_BFI : 0;
+ lower_instructions(shader->ir,
+ MOD_TO_FLOOR |
+ DIV_TO_MUL_RCP |
+ SUB_TO_ADD_NEG |
+ EXP_TO_EXP2 |
+ LOG_TO_LOG2 |
+ bitfield_insert |
+ LDEXP_TO_ARITH |
+ CARRY_TO_ARITH |
+ BORROW_TO_ARITH);
+
+ /* Pre-gen6 HW can only nest if-statements 16 deep. Beyond this,
+ * if-statements need to be flattened.
+ */
+ if (brw->gen < 6)
+ lower_if_to_cond_assign(shader->ir, 16);
+
+ do_lower_texture_projection(shader->ir);
+ brw_lower_texture_gradients(brw, shader->ir);
+ do_vec_index_to_cond_assign(shader->ir);
+ lower_vector_insert(shader->ir, true);
+ lower_offset_arrays(shader->ir);
+ brw_do_lower_unnormalized_offset(shader->ir);
+ lower_noise(shader->ir);
+ lower_quadop_vector(shader->ir, false);
+
+ bool lowered_variable_indexing =
+ lower_variable_index_to_cond_assign((gl_shader_stage)stage,
+ shader->ir,
+ options->EmitNoIndirectInput,
+ options->EmitNoIndirectOutput,
+ options->EmitNoIndirectTemp,
+ options->EmitNoIndirectUniform);
+
+ if (unlikely(brw->perf_debug && lowered_variable_indexing)) {
+ perf_debug("Unsupported form of variable indexing in %s; falling "
+ "back to very inefficient code generation\n",
+ _mesa_shader_stage_to_abbrev(shader->Stage));
+ }
+
+ lower_ubo_reference(shader, shader->ir);
+
+ bool progress;
+ do {
+ progress = false;
+
+ if (is_scalar_shader_stage(brw->intelScreen->compiler, shader->Stage)) {
+ brw_do_channel_expressions(shader->ir);
+ brw_do_vector_splitting(shader->ir);
+ }
+
+ progress = do_lower_jumps(shader->ir, true, true,
+ true, /* main return */
+ false, /* continue */
+ false /* loops */
+ ) || progress;
+
+ progress = do_common_optimization(shader->ir, true, true,
+ options, ctx->Const.NativeIntegers) || progress;
+ } while (progress);
+
+ validate_ir_tree(shader->ir);
+
+ /* Now that we've finished altering the linked IR, reparent any live IR back
+ * to the permanent memory context, and free the temporary one (discarding any
+ * junk we optimized away).
+ */
+ reparent_ir(shader->ir, shader->ir);
+ ralloc_free(mem_ctx);
+
+ if (ctx->_Shader->Flags & GLSL_DUMP) {
+ fprintf(stderr, "\n");
+ fprintf(stderr, "GLSL IR for linked %s program %d:\n",
+ _mesa_shader_stage_to_string(shader->Stage),
+ shader_prog->Name);
+ _mesa_print_ir(stderr, shader->ir, NULL);
+ fprintf(stderr, "\n");
+ }
+}
+
+GLboolean
+brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
+{
+ struct brw_context *brw = brw_context(ctx);
+ const struct brw_compiler *compiler = brw->intelScreen->compiler;
+ unsigned int stage;
+
+ for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) {
+ struct gl_shader *shader = shProg->_LinkedShaders[stage];
+ if (!shader)
+ continue;
+
+ struct gl_program *prog =
+ ctx->Driver.NewProgram(ctx, _mesa_shader_stage_to_program(stage),
+ shader->Name);
+ if (!prog)
+ return false;
+ prog->Parameters = _mesa_new_parameter_list();
+
+ _mesa_copy_linked_program_data((gl_shader_stage) stage, shProg, prog);
+
+ process_glsl_ir((gl_shader_stage) stage, brw, shProg, shader);
+
+ /* Make a pass over the IR to add state references for any built-in
+ * uniforms that are used. This has to be done now (during linking).
+ * Code generation doesn't happen until the first time this shader is
+ * used for rendering. Waiting until then to generate the parameters is
+ * too late. At that point, the values for the built-in uniforms won't
+ * get sent to the shader.
+ */
+ foreach_in_list(ir_instruction, node, shader->ir) {
+ ir_variable *var = node->as_variable();
+
+ if ((var == NULL) || (var->data.mode != ir_var_uniform)
+ || (strncmp(var->name, "gl_", 3) != 0))
+ continue;
+
+ const ir_state_slot *const slots = var->get_state_slots();
+ assert(slots != NULL);
+
+ for (unsigned int i = 0; i < var->get_num_state_slots(); i++) {
+ _mesa_add_state_reference(prog->Parameters,
+ (gl_state_index *) slots[i].tokens);
+ }
+ }
+
+ do_set_program_inouts(shader->ir, prog, shader->Stage);
+
+ prog->SamplersUsed = shader->active_samplers;
+ prog->ShadowSamplers = shader->shadow_samplers;
+ _mesa_update_shader_textures_used(shProg, prog);
+
+ _mesa_reference_program(ctx, &shader->Program, prog);
+
+ brw_add_texrect_params(prog);
+
+ prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage,
+ is_scalar_shader_stage(compiler, stage));
+
+ _mesa_reference_program(ctx, &prog, NULL);
+ }
+
+ if ((ctx->_Shader->Flags & GLSL_DUMP) && shProg->Name != 0) {
+ for (unsigned i = 0; i < shProg->NumShaders; i++) {
+ const struct gl_shader *sh = shProg->Shaders[i];
+ if (!sh)
+ continue;
+
+ fprintf(stderr, "GLSL %s shader %d source for linked program %d:\n",
+ _mesa_shader_stage_to_string(sh->Stage),
+ i, shProg->Name);
+ fprintf(stderr, "%s", sh->Source);
+ fprintf(stderr, "\n");
+ }
+ }
+
+ if (brw->precompile && !brw_shader_precompile(ctx, shProg))
+ return false;
+
+ return true;
+}
brw_emit_select_pipeline(brw, BRW_RENDER_PIPELINE);
brw->last_pipeline = BRW_RENDER_PIPELINE;
- if (brw->gen < 6) {
- /* Disable depth offset clamping. */
- BEGIN_BATCH(2);
- OUT_BATCH(_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP << 16 | (2 - 2));
- OUT_BATCH_F(0.0);
- ADVANCE_BATCH();
- }
-
if (brw->gen >= 8) {
BEGIN_BATCH(3);
OUT_BATCH(CMD_STATE_SIP << 16 | (3 - 2));
static void
brw_nir_lower_inputs(nir_shader *nir, bool is_scalar)
{
- nir_assign_var_locations(&nir->inputs, &nir->num_inputs,
- is_scalar ? type_size_scalar : type_size_vec4);
+ switch (nir->stage) {
+ case MESA_SHADER_GEOMETRY:
+ foreach_list_typed(nir_variable, var, node, &nir->inputs) {
+ var->data.driver_location = var->data.location;
+ }
+ break;
+ default:
+ nir_assign_var_locations(&nir->inputs, &nir->num_inputs,
+ is_scalar ? type_size_scalar : type_size_vec4);
+ break;
+ }
}
static void
--- /dev/null
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_cfg.h"
+
+using namespace brw;
+
+/** @file brw_predicated_break.cpp
+ *
+ * Loops are often structured as
+ *
+ * loop:
+ * CMP.f0
+ * (+f0) IF
+ * BREAK
+ * ENDIF
+ * ...
+ * WHILE loop
+ *
+ * This peephole pass removes the IF and ENDIF instructions and predicates the
+ * BREAK, dropping two instructions from the loop body.
+ *
+ * If the loop was a DO { ... } WHILE loop, it looks like
+ *
+ * loop:
+ * ...
+ * CMP.f0
+ * (+f0) IF
+ * BREAK
+ * ENDIF
+ * WHILE loop
+ *
+ * and we can remove the BREAK instruction and predicate the WHILE.
+ */
+
+bool
+opt_predicated_break(backend_shader *s)
+{
+ bool progress = false;
+
+ foreach_block (block, s->cfg) {
+ if (block->start_ip != block->end_ip)
+ continue;
+
+ /* BREAK and CONTINUE instructions, by definition, can only be found at
+ * the ends of basic blocks.
+ */
+ backend_instruction *jump_inst = block->end();
+ if (jump_inst->opcode != BRW_OPCODE_BREAK &&
+ jump_inst->opcode != BRW_OPCODE_CONTINUE)
+ continue;
+
+ backend_instruction *if_inst = block->prev()->end();
+ if (if_inst->opcode != BRW_OPCODE_IF)
+ continue;
+
+ backend_instruction *endif_inst = block->next()->start();
+ if (endif_inst->opcode != BRW_OPCODE_ENDIF)
+ continue;
+
+ bblock_t *jump_block = block;
+ bblock_t *if_block = jump_block->prev();
+ bblock_t *endif_block = jump_block->next();
+
+ jump_inst->predicate = if_inst->predicate;
+ jump_inst->predicate_inverse = if_inst->predicate_inverse;
+
+ bblock_t *earlier_block = if_block;
+ if (if_block->start_ip == if_block->end_ip) {
+ earlier_block = if_block->prev();
+ }
+
+ if_inst->remove(if_block);
+
+ bblock_t *later_block = endif_block;
+ if (endif_block->start_ip == endif_block->end_ip) {
+ later_block = endif_block->next();
+ }
+ endif_inst->remove(endif_block);
+
+ if (!earlier_block->ends_with_control_flow()) {
+ earlier_block->children.make_empty();
+ earlier_block->add_successor(s->cfg->mem_ctx, jump_block);
+ }
+
+ if (!later_block->starts_with_control_flow()) {
+ later_block->parents.make_empty();
+ }
+ jump_block->add_successor(s->cfg->mem_ctx, later_block);
+
+ if (earlier_block->can_combine_with(jump_block)) {
+ earlier_block->combine_with(jump_block);
+
+ block = earlier_block;
+ }
+
+ /* Now look at the first instruction of the block following the BREAK. If
+ * it's a WHILE, we can delete the break, predicate the WHILE, and join
+ * the two basic blocks.
+ */
+ bblock_t *while_block = earlier_block->next();
+ backend_instruction *while_inst = while_block->start();
+
+ if (jump_inst->opcode == BRW_OPCODE_BREAK &&
+ while_inst->opcode == BRW_OPCODE_WHILE &&
+ while_inst->predicate == BRW_PREDICATE_NONE) {
+ jump_inst->remove(earlier_block);
+ while_inst->predicate = jump_inst->predicate;
+ while_inst->predicate_inverse = !jump_inst->predicate_inverse;
+
+ earlier_block->children.make_empty();
+ earlier_block->add_successor(s->cfg->mem_ctx, while_block);
+
+ assert(earlier_block->can_combine_with(while_block));
+ earlier_block->combine_with(while_block);
+
+ earlier_block->next()->parents.make_empty();
+ earlier_block->add_successor(s->cfg->mem_ctx, earlier_block->next());
+ }
+
+ progress = true;
+ }
+
+ if (progress)
+ s->invalidate_live_intervals();
+
+ return progress;
+}
}
}
-/* Per-thread scratch space is a power-of-two multiple of 1KB. */
-int
-brw_get_scratch_size(int size)
-{
- int i;
-
- for (i = 1024; i < size; i *= 2)
- ;
-
- return i;
-}
-
void
brw_get_scratch_bo(struct brw_context *brw,
drm_intel_bo **scratch_bo, int size)
brw->shader_time.bo = NULL;
}
-void
-brw_mark_surface_used(struct brw_stage_prog_data *prog_data,
- unsigned surf_index)
-{
- assert(surf_index < BRW_MAX_SURFACES);
-
- prog_data->binding_table.size_bytes =
- MAX2(prog_data->binding_table.size_bytes, (surf_index + 1) * 4);
-}
-
void
brw_stage_prog_data_free(const void *p)
{
va_end(args);
}
-static bool
+bool
is_scalar_shader_stage(const struct brw_compiler *compiler, int stage)
{
switch (stage) {
return &shader->base;
}
-/**
- * Performs a compile of the shader stages even when we don't know
- * what non-orthogonal state will be set, in the hope that it reflects
- * the eventual NOS used, and thus allows us to produce link failures.
- */
-static bool
-brw_shader_precompile(struct gl_context *ctx,
- struct gl_shader_program *sh_prog)
-{
- struct gl_shader *vs = sh_prog->_LinkedShaders[MESA_SHADER_VERTEX];
- struct gl_shader *gs = sh_prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
- struct gl_shader *fs = sh_prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
- struct gl_shader *cs = sh_prog->_LinkedShaders[MESA_SHADER_COMPUTE];
-
- if (fs && !brw_fs_precompile(ctx, sh_prog, fs->Program))
- return false;
-
- if (gs && !brw_gs_precompile(ctx, sh_prog, gs->Program))
- return false;
-
- if (vs && !brw_vs_precompile(ctx, sh_prog, vs->Program))
- return false;
-
- if (cs && !brw_cs_precompile(ctx, sh_prog, cs->Program))
- return false;
-
- return true;
-}
-
-static void
-brw_lower_packing_builtins(struct brw_context *brw,
- gl_shader_stage shader_type,
- exec_list *ir)
-{
- int ops = LOWER_PACK_SNORM_2x16
- | LOWER_UNPACK_SNORM_2x16
- | LOWER_PACK_UNORM_2x16
- | LOWER_UNPACK_UNORM_2x16;
-
- if (is_scalar_shader_stage(brw->intelScreen->compiler, shader_type)) {
- ops |= LOWER_UNPACK_UNORM_4x8
- | LOWER_UNPACK_SNORM_4x8
- | LOWER_PACK_UNORM_4x8
- | LOWER_PACK_SNORM_4x8;
- }
-
- if (brw->gen >= 7) {
- /* Gen7 introduced the f32to16 and f16to32 instructions, which can be
- * used to execute packHalf2x16 and unpackHalf2x16. For AOS code, no
- * lowering is needed. For SOA code, the Half2x16 ops must be
- * scalarized.
- */
- if (is_scalar_shader_stage(brw->intelScreen->compiler, shader_type)) {
- ops |= LOWER_PACK_HALF_2x16_TO_SPLIT
- | LOWER_UNPACK_HALF_2x16_TO_SPLIT;
- }
- } else {
- ops |= LOWER_PACK_HALF_2x16
- | LOWER_UNPACK_HALF_2x16;
- }
-
- lower_packing_builtins(ir, ops);
-}
-
-static void
-process_glsl_ir(gl_shader_stage stage,
- struct brw_context *brw,
- struct gl_shader_program *shader_prog,
- struct gl_shader *shader)
-{
- struct gl_context *ctx = &brw->ctx;
- const struct gl_shader_compiler_options *options =
- &ctx->Const.ShaderCompilerOptions[shader->Stage];
-
- /* Temporary memory context for any new IR. */
- void *mem_ctx = ralloc_context(NULL);
-
- ralloc_adopt(mem_ctx, shader->ir);
-
- /* lower_packing_builtins() inserts arithmetic instructions, so it
- * must precede lower_instructions().
- */
- brw_lower_packing_builtins(brw, shader->Stage, shader->ir);
- do_mat_op_to_vec(shader->ir);
- const int bitfield_insert = brw->gen >= 7 ? BITFIELD_INSERT_TO_BFM_BFI : 0;
- lower_instructions(shader->ir,
- MOD_TO_FLOOR |
- DIV_TO_MUL_RCP |
- SUB_TO_ADD_NEG |
- EXP_TO_EXP2 |
- LOG_TO_LOG2 |
- bitfield_insert |
- LDEXP_TO_ARITH |
- CARRY_TO_ARITH |
- BORROW_TO_ARITH);
-
- /* Pre-gen6 HW can only nest if-statements 16 deep. Beyond this,
- * if-statements need to be flattened.
- */
- if (brw->gen < 6)
- lower_if_to_cond_assign(shader->ir, 16);
-
- do_lower_texture_projection(shader->ir);
- brw_lower_texture_gradients(brw, shader->ir);
- do_vec_index_to_cond_assign(shader->ir);
- lower_vector_insert(shader->ir, true);
- lower_offset_arrays(shader->ir);
- brw_do_lower_unnormalized_offset(shader->ir);
- lower_noise(shader->ir);
- lower_quadop_vector(shader->ir, false);
-
- bool lowered_variable_indexing =
- lower_variable_index_to_cond_assign((gl_shader_stage)stage,
- shader->ir,
- options->EmitNoIndirectInput,
- options->EmitNoIndirectOutput,
- options->EmitNoIndirectTemp,
- options->EmitNoIndirectUniform);
-
- if (unlikely(brw->perf_debug && lowered_variable_indexing)) {
- perf_debug("Unsupported form of variable indexing in %s; falling "
- "back to very inefficient code generation\n",
- _mesa_shader_stage_to_abbrev(shader->Stage));
- }
-
- lower_ubo_reference(shader, shader->ir);
-
- bool progress;
- do {
- progress = false;
-
- if (is_scalar_shader_stage(brw->intelScreen->compiler, shader->Stage)) {
- brw_do_channel_expressions(shader->ir);
- brw_do_vector_splitting(shader->ir);
- }
-
- progress = do_lower_jumps(shader->ir, true, true,
- true, /* main return */
- false, /* continue */
- false /* loops */
- ) || progress;
-
- progress = do_common_optimization(shader->ir, true, true,
- options, ctx->Const.NativeIntegers) || progress;
- } while (progress);
-
- validate_ir_tree(shader->ir);
-
- /* Now that we've finished altering the linked IR, reparent any live IR back
- * to the permanent memory context, and free the temporary one (discarding any
- * junk we optimized away).
- */
- reparent_ir(shader->ir, shader->ir);
- ralloc_free(mem_ctx);
-
- if (ctx->_Shader->Flags & GLSL_DUMP) {
- fprintf(stderr, "\n");
- fprintf(stderr, "GLSL IR for linked %s program %d:\n",
- _mesa_shader_stage_to_string(shader->Stage),
- shader_prog->Name);
- _mesa_print_ir(stderr, shader->ir, NULL);
- fprintf(stderr, "\n");
- }
-}
-
-GLboolean
-brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
+void
+brw_mark_surface_used(struct brw_stage_prog_data *prog_data,
+ unsigned surf_index)
{
- struct brw_context *brw = brw_context(ctx);
- const struct brw_compiler *compiler = brw->intelScreen->compiler;
- unsigned int stage;
-
- for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) {
- struct gl_shader *shader = shProg->_LinkedShaders[stage];
- if (!shader)
- continue;
-
- struct gl_program *prog =
- ctx->Driver.NewProgram(ctx, _mesa_shader_stage_to_program(stage),
- shader->Name);
- if (!prog)
- return false;
- prog->Parameters = _mesa_new_parameter_list();
-
- _mesa_copy_linked_program_data((gl_shader_stage) stage, shProg, prog);
-
- process_glsl_ir((gl_shader_stage) stage, brw, shProg, shader);
-
- /* Make a pass over the IR to add state references for any built-in
- * uniforms that are used. This has to be done now (during linking).
- * Code generation doesn't happen until the first time this shader is
- * used for rendering. Waiting until then to generate the parameters is
- * too late. At that point, the values for the built-in uniforms won't
- * get sent to the shader.
- */
- foreach_in_list(ir_instruction, node, shader->ir) {
- ir_variable *var = node->as_variable();
-
- if ((var == NULL) || (var->data.mode != ir_var_uniform)
- || (strncmp(var->name, "gl_", 3) != 0))
- continue;
-
- const ir_state_slot *const slots = var->get_state_slots();
- assert(slots != NULL);
-
- for (unsigned int i = 0; i < var->get_num_state_slots(); i++) {
- _mesa_add_state_reference(prog->Parameters,
- (gl_state_index *) slots[i].tokens);
- }
- }
-
- do_set_program_inouts(shader->ir, prog, shader->Stage);
-
- prog->SamplersUsed = shader->active_samplers;
- prog->ShadowSamplers = shader->shadow_samplers;
- _mesa_update_shader_textures_used(shProg, prog);
-
- _mesa_reference_program(ctx, &shader->Program, prog);
-
- brw_add_texrect_params(prog);
+ assert(surf_index < BRW_MAX_SURFACES);
- prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage,
- is_scalar_shader_stage(compiler, stage));
-
- _mesa_reference_program(ctx, &prog, NULL);
- }
-
- if ((ctx->_Shader->Flags & GLSL_DUMP) && shProg->Name != 0) {
- for (unsigned i = 0; i < shProg->NumShaders; i++) {
- const struct gl_shader *sh = shProg->Shaders[i];
- if (!sh)
- continue;
-
- fprintf(stderr, "GLSL %s shader %d source for linked program %d:\n",
- _mesa_shader_stage_to_string(sh->Stage),
- i, shProg->Name);
- fprintf(stderr, "%s", sh->Source);
- fprintf(stderr, "\n");
- }
- }
-
- if (brw->precompile && !brw_shader_precompile(ctx, shProg))
- return false;
-
- return true;
+ prog_data->binding_table.size_bytes =
+ MAX2(prog_data->binding_table.size_bytes, (surf_index + 1) * 4);
}
-
enum brw_reg_type
brw_type_for_base_type(const struct glsl_type *type)
{
if (cfg) {
int ip = 0;
foreach_block_and_inst(block, backend_instruction, inst, cfg) {
- fprintf(file, "%4d: ", ip++);
+ if (!unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER))
+ fprintf(file, "%4d: ", ip++);
dump_instruction(inst, file);
}
} else {
int ip = 0;
foreach_in_list(backend_instruction, inst, &instructions) {
- fprintf(file, "%4d: ", ip++);
+ if (!unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER))
+ fprintf(file, "%4d: ", ip++);
dump_instruction(inst, file);
}
}
next_binding_table_offset += num_textures;
if (shader) {
+ assert(shader->NumUniformBlocks <= BRW_MAX_COMBINED_UBO_SSBO);
stage_prog_data->binding_table.ubo_start = next_binding_table_offset;
next_binding_table_offset += shader->NumUniformBlocks;
} else {
&stage_prog_data->param[param_start_index];
for (unsigned i = 0; i < MAX2(storage->array_elements, 1); i++) {
- const unsigned image_idx = storage->image[stage].index + i;
+ const unsigned image_idx = storage->opaque[stage].index + i;
const brw_image_param *image_param =
&stage_prog_data->image_param[image_idx];
stage_prog_data->binding_table.image_start + image_idx);
}
}
+
+/**
+ * Decide which set of clip planes should be used when clipping via
+ * gl_Position or gl_ClipVertex.
+ */
+gl_clip_plane *brw_select_clip_planes(struct gl_context *ctx)
+{
+ if (ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX]) {
+ /* There is currently a GLSL vertex shader, so clip according to GLSL
+ * rules, which means compare gl_ClipVertex (or gl_Position, if
+ * gl_ClipVertex wasn't assigned) against the eye-coordinate clip planes
+ * that were stored in EyeUserPlane at the time the clip planes were
+ * specified.
+ */
+ return ctx->Transform.EyeUserPlane;
+ } else {
+ /* Either we are using fixed function or an ARB vertex program. In
+ * either case the clip planes are going to be compared against
+ * gl_Position (which is in clip coordinates) so we have to clip using
+ * _ClipUserPlane, which was transformed into clip coordinates by Mesa
+ * core.
+ */
+ return ctx->Transform._ClipUserPlane;
+ }
+}
+
SCHEDULE_POST,
};
-class backend_shader {
+struct backend_shader {
protected:
backend_shader(const struct brw_compiler *compiler,
unsigned param_start_index,
const gl_uniform_storage *storage);
+#else
+struct backend_shader;
#endif /* __cplusplus */
enum brw_reg_type brw_type_for_base_type(const struct glsl_type *type);
bool brw_negate_immediate(enum brw_reg_type type, struct brw_reg *reg);
bool brw_abs_immediate(enum brw_reg_type type, struct brw_reg *reg);
+bool opt_predicated_break(struct backend_shader *s);
+
#ifdef __cplusplus
extern "C" {
#endif
int type_size_scalar(const struct glsl_type *type);
int type_size_vec4(const struct glsl_type *type);
+bool is_scalar_shader_stage(const struct brw_compiler *compiler, int stage);
+
#ifdef __cplusplus
}
#endif
pass_num = 0;
iteration++;
+ OPT(opt_predicated_break, this);
OPT(opt_reduce_swizzle);
OPT(dead_code_eliminate);
OPT(dead_control_flow_eliminate, this);
struct brw_vs_prog_data *prog_data,
struct gl_vertex_program *vp,
struct gl_shader_program *prog,
+ int shader_time_index,
unsigned *final_assembly_size)
{
const unsigned *assembly = NULL;
- struct brw_shader *shader = NULL;
- if (prog)
- shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX];
-
- int st_index = -1;
- if (INTEL_DEBUG & DEBUG_SHADER_TIME)
- st_index = brw_get_shader_time_index(brw, prog, &vp->Base, ST_VS);
-
- if (unlikely(INTEL_DEBUG & DEBUG_VS) && shader->base.ir)
- brw_dump_ir("vertex", prog, &shader->base, &vp->Base);
-
if (brw->intelScreen->compiler->scalar_vs) {
prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8;
fs_visitor v(brw->intelScreen->compiler, brw,
mem_ctx, key, &prog_data->base.base,
NULL, /* prog; Only used for TEXTURE_RECTANGLE on gen < 8 */
- vp->Base.nir, 8, st_index);
+ vp->Base.nir, 8, shader_time_index);
if (!v.run_vs(brw_select_clip_planes(&brw->ctx))) {
if (prog) {
prog->LinkStatus = false;
vec4_vs_visitor v(brw->intelScreen->compiler, brw, key, prog_data,
vp->Base.nir, brw_select_clip_planes(&brw->ctx),
- mem_ctx, st_index,
+ mem_ctx, shader_time_index,
!_mesa_is_gles3(&brw->ctx));
if (!v.run()) {
if (prog) {
void *mem_ctx,
bool no_spills,
int shader_time_index);
- ~vec4_visitor();
+ virtual ~vec4_visitor();
dst_reg dst_null_f()
{
int implied_mrf_writes(vec4_instruction *inst);
- void emit_vp_sop(enum brw_conditional_mod condmod, dst_reg dst,
- src_reg src0, src_reg src1, src_reg one);
-
vec4_instruction *emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
src_reg src0, src_reg src1);
*/
src_reg emit_uniformize(const src_reg &src);
- /**
- * Emit the correct dot-product instruction for the type of arguments
- */
- void emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements);
-
src_reg fix_3src_operand(const src_reg &src);
src_reg resolve_source_modifiers(const src_reg &src);
void
vec4_gs_visitor::nir_setup_inputs()
{
- nir_inputs = ralloc_array(mem_ctx, src_reg, nir->num_inputs);
-
- foreach_list_typed(nir_variable, var, node, &nir->inputs) {
- int offset = var->data.driver_location;
- if (var->type->base_type == GLSL_TYPE_ARRAY) {
- /* Geometry shader inputs are arrays, but they use an unusual array
- * layout: instead of all array elements for a given geometry shader
- * input being stored consecutively, all geometry shader inputs are
- * interleaved into one giant array. At this stage of compilation, we
- * assume that the stride of the array is BRW_VARYING_SLOT_COUNT.
- * Later, setup_attributes() will remap our accesses to the actual
- * input array.
- */
- assert(var->type->length > 0);
- int length = var->type->length;
- int size = type_size_vec4(var->type) / length;
- for (int i = 0; i < length; i++) {
- int location = var->data.location + i * BRW_VARYING_SLOT_COUNT;
- for (int j = 0; j < size; j++) {
- src_reg src = src_reg(ATTR, location + j, var->type);
- src = retype(src, brw_type_for_base_type(var->type));
- nir_inputs[offset] = src;
- offset++;
- }
- }
- } else {
- int size = type_size_vec4(var->type);
- for (int i = 0; i < size; i++) {
- src_reg src = src_reg(ATTR, var->data.location + i, var->type);
- src = retype(src, brw_type_for_base_type(var->type));
- nir_inputs[offset] = src;
- offset++;
- }
- }
- }
}
void
src_reg src;
switch (instr->intrinsic) {
+ case nir_intrinsic_load_per_vertex_input_indirect:
+ assert(!"EmitNoIndirectInput should prevent this.");
+ case nir_intrinsic_load_per_vertex_input: {
+ /* The EmitNoIndirectInput flag guarantees our vertex index will
+ * be constant. We should handle indirects someday.
+ */
+ nir_const_value *vertex = nir_src_as_const_value(instr->src[0]);
+
+ /* Make up a type...we have no way of knowing... */
+ const glsl_type *const type = glsl_type::ivec(instr->num_components);
+
+ src = src_reg(ATTR, BRW_VARYING_SLOT_COUNT * vertex->u[0] +
+ instr->const_index[0], type);
+ dest = get_nir_dest(instr->dest, src.type);
+ dest.writemask = brw_writemask_for_size(instr->num_components);
+ emit(MOV(dest, src));
+ break;
+ }
+
+ case nir_intrinsic_load_input:
+ case nir_intrinsic_load_input_indirect:
+ unreachable("nir_lower_io should have produced per_vertex intrinsics");
+
case nir_intrinsic_emit_vertex_with_counter: {
this->vertex_count =
retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD);
vec4_gs_visitor::vec4_gs_visitor(const struct brw_compiler *compiler,
void *log_data,
struct brw_gs_compile *c,
- struct gl_shader_program *prog,
nir_shader *shader,
void *mem_ctx,
bool no_spills,
: vec4_visitor(compiler, log_data, &c->key.tex,
&c->prog_data.base, shader, mem_ctx,
no_spills, shader_time_index),
- shader_prog(prog),
c(c)
{
}
* be recorded by transform feedback, we can simply discard all geometry
* bound to these streams when transform feedback is disabled.
*/
- if (stream_id > 0 && shader_prog->TransformFeedback.NumVarying == 0)
+ if (stream_id > 0 && !nir->info.has_transform_feedback_varyings)
return;
/* If we're outputting 32 control data bits or less, then we can wait
struct gl_shader_program *prog,
struct brw_gs_compile *c,
void *mem_ctx,
+ int shader_time_index,
unsigned *final_assembly_size)
{
struct gl_shader *shader = prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
- if (unlikely(INTEL_DEBUG & DEBUG_GS))
- brw_dump_ir("geometry", prog, shader, NULL);
-
- int st_index = -1;
- if (INTEL_DEBUG & DEBUG_SHADER_TIME)
- st_index = brw_get_shader_time_index(brw, prog, NULL, ST_GS);
-
if (brw->gen >= 7) {
/* Compile the geometry shader in DUAL_OBJECT dispatch mode, if we can do
* so without spilling. If the GS invocations count > 1, then we can't use
c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
vec4_gs_visitor v(brw->intelScreen->compiler, brw,
- c, prog, shader->Program->nir,
- mem_ctx, true /* no_spills */, st_index);
+ c, shader->Program->nir,
+ mem_ctx, true /* no_spills */, shader_time_index);
if (v.run()) {
return generate_assembly(brw, prog, &c->gp->program.Base,
&c->prog_data.base, mem_ctx, v.cfg,
if (brw->gen >= 7)
gs = new vec4_gs_visitor(brw->intelScreen->compiler, brw,
- c, prog, shader->Program->nir,
+ c, shader->Program->nir,
mem_ctx, false /* no_spills */,
- st_index);
+ shader_time_index);
else
gs = new gen6_gs_visitor(brw->intelScreen->compiler, brw,
c, prog, shader->Program->nir,
mem_ctx, false /* no_spills */,
- st_index);
+ shader_time_index);
if (!gs->run()) {
prog->LinkStatus = false;
struct gl_shader_program *prog,
struct brw_gs_compile *c,
void *mem_ctx,
+ int shader_time_index,
unsigned *final_assembly_size);
#ifdef __cplusplus
vec4_gs_visitor(const struct brw_compiler *compiler,
void *log_data,
struct brw_gs_compile *c,
- struct gl_shader_program *prog,
nir_shader *shader,
void *mem_ctx,
bool no_spills,
void emit_control_data_bits();
void set_stream_control_data_bits(unsigned stream_id);
- struct gl_shader_program *shader_prog;
-
src_reg vertex_count;
src_reg control_data_bits;
const struct brw_gs_compile * const c;
#include "glsl/ir_uniform.h"
#include "program/sampler.h"
-#define FIRST_SPILL_MRF(gen) (gen == 6 ? 21 : 13)
-
namespace brw {
vec4_instruction::vec4_instruction(enum opcode opcode, const dst_reg &dst,
return inst;
}
-void
-vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements)
-{
- static enum opcode dot_opcodes[] = {
- BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4
- };
-
- emit(dot_opcodes[elements - 2], dst, src0, src1);
-}
-
src_reg
vec4_visitor::fix_3src_operand(const src_reg &src)
{
dst,
surf_index,
offset_reg);
- pull->base_mrf = FIRST_SPILL_MRF(devinfo->gen) + 1;
+ pull->base_mrf = FIRST_PULL_LOAD_MRF(devinfo->gen) + 1;
pull->mlen = 1;
}
#include "util/ralloc.h"
-/**
- * Decide which set of clip planes should be used when clipping via
- * gl_Position or gl_ClipVertex.
- */
-gl_clip_plane *brw_select_clip_planes(struct gl_context *ctx)
-{
- if (ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX]) {
- /* There is currently a GLSL vertex shader, so clip according to GLSL
- * rules, which means compare gl_ClipVertex (or gl_Position, if
- * gl_ClipVertex wasn't assigned) against the eye-coordinate clip planes
- * that were stored in EyeUserPlane at the time the clip planes were
- * specified.
- */
- return ctx->Transform.EyeUserPlane;
- } else {
- /* Either we are using fixed function or an ARB vertex program. In
- * either case the clip planes are going to be compared against
- * gl_Position (which is in clip coordinates) so we have to clip using
- * _ClipUserPlane, which was transformed into clip coordinates by Mesa
- * core.
- */
- return ctx->Transform._ClipUserPlane;
- }
-}
-
bool
brw_codegen_vs_prog(struct brw_context *brw,
struct gl_shader_program *prog,
start_time = get_time();
}
+ if (unlikely(INTEL_DEBUG & DEBUG_VS))
+ brw_dump_ir("vertex", prog, &vs->base, &vp->program.Base);
+
+ int st_index = -1;
+ if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+ st_index = brw_get_shader_time_index(brw, prog, &vp->program.Base, ST_VS);
+
/* Emit GEN4 code.
*/
program = brw_vs_emit(brw, mem_ctx, key, &prog_data,
- &vp->program, prog, &program_size);
+ &vp->program, prog, st_index, &program_size);
if (program == NULL) {
ralloc_free(mem_ctx);
return false;
struct brw_vs_prog_data *prog_data,
struct gl_vertex_program *vp,
struct gl_shader_program *shader_prog,
+ int shader_time_index,
unsigned *program_size);
void brw_vs_debug_recompile(struct brw_context *brw,
struct gl_shader_program *prog,
private:
int setup_attributes(int payload_reg);
- void setup_vp_regs();
void setup_uniform_clipplane_values();
void emit_clip_distances(dst_reg reg, int offset);
- dst_reg get_vp_dst_reg(const prog_dst_register &dst);
- src_reg get_vp_src_reg(const prog_src_register &src);
const struct brw_vs_prog_key *const key;
struct brw_vs_prog_data * const vs_prog_data;
- src_reg *vp_temp_regs;
- src_reg vp_addr_reg;
gl_clip_plane *clip_planes;
* Return a bitfield where bit n is set if barycentric interpolation mode n
* (see enum brw_wm_barycentric_interp_mode) is needed by the fragment shader.
*/
-unsigned
+static unsigned
brw_compute_barycentric_interp_modes(const struct brw_device_info *devinfo,
bool shade_model_flat,
bool persample_shading,
start_time = get_time();
}
+ if (unlikely(INTEL_DEBUG & DEBUG_WM))
+ brw_dump_ir("fragment", prog, &fs->base, &fp->program.Base);
+
+ int st_index8 = -1, st_index16 = -1;
+ if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
+ st_index8 = brw_get_shader_time_index(brw, prog, &fp->program.Base, ST_FS8);
+ st_index16 = brw_get_shader_time_index(brw, prog, &fp->program.Base, ST_FS16);
+ }
+
program = brw_wm_fs_emit(brw, mem_ctx, key, &prog_data,
- &fp->program, prog, &program_size);
+ &fp->program, prog, st_index8, st_index16, &program_size);
if (program == NULL) {
ralloc_free(mem_ctx);
return false;
struct brw_wm_prog_data *prog_data,
struct gl_fragment_program *fp,
struct gl_shader_program *prog,
+ int shader_time_index8,
+ int shader_time_index16,
unsigned *final_assembly_size);
GLboolean brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog);
void
brw_upload_wm_prog(struct brw_context *brw);
-struct nir_shader;
-
-unsigned
-brw_compute_barycentric_interp_modes(const struct brw_device_info *devinfo,
- bool shade_model_flat,
- bool persample_shading,
- struct nir_shader *shader);
-
#ifdef __cplusplus
} // extern "C"
#endif
+#include "intel_batchbuffer.h"
#include "intel_fbo.h"
#include "brw_context.h"
#include "brw_state.h"
}
brw->ctx.NewDriverState |= BRW_NEW_GEN4_UNIT_STATE;
+
+ /* _NEW_POLGYON */
+ if (brw->wm.offset_clamp != ctx->Polygon.OffsetClamp) {
+ BEGIN_BATCH(2);
+ OUT_BATCH(_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP << 16 | (2 - 2));
+ OUT_BATCH_F(ctx->Polygon.OffsetClamp);
+ ADVANCE_BATCH();
+
+ brw->wm.offset_clamp = ctx->Polygon.OffsetClamp;
+ }
}
const struct brw_tracked_state brw_wm_unit = {
emit(GS_OPCODE_SET_DWORD_2, dst_reg(MRF, mrf), flags_data);
}
+static int
+align_interleaved_urb_mlen(int mlen)
+{
+ /* URB data written (does not include the message header reg) must
+ * be a multiple of 256 bits, or 2 VS registers. See vol5c.5,
+ * section 5.4.3.2.2: URB_INTERLEAVED.
+ */
+ if ((mlen % 2) != 1)
+ mlen++;
+ return mlen;
+}
+
void
gen6_gs_visitor::emit_urb_write_opcode(bool complete, int base_mrf,
int last_mrf, int urb_offset)
}
inst->base_mrf = base_mrf;
- /* URB data written (does not include the message header reg) must
- * be a multiple of 256 bits, or 2 VS registers. See vol5c.5,
- * section 5.4.3.2.2: URB_INTERLEAVED.
- */
- int mlen = last_mrf - base_mrf;
- if ((mlen % 2) != 1)
- mlen++;
- inst->mlen = mlen;
+ inst->mlen = align_interleaved_urb_mlen(last_mrf - base_mrf);
inst->offset = urb_offset;
}
/* In the process of generating our URB write message contents, we
* may need to unspill a register or load from an array. Those
- * reads would use MRFs 14-15.
+ * reads would use MRFs 21..23
*/
- int max_usable_mrf = 13;
+ int max_usable_mrf = FIRST_SPILL_MRF(devinfo->gen);
/* Issue the FF_SYNC message and obtain the initial VUE handle. */
emit(CMP(dst_null_d(), this->vertex_count, 0u, BRW_CONDITIONAL_G));
this->vertex_output_offset, 1u));
/* If this was max_usable_mrf, we can't fit anything more into
- * this URB WRITE.
+ * this URB WRITE. Same if we reached the max. message length.
*/
- if (mrf > max_usable_mrf) {
+ if (mrf > max_usable_mrf ||
+ align_interleaved_urb_mlen(mrf - base_mrf + 1) > BRW_MAX_MSG_LENGTH) {
slot++;
break;
}
void *mem_ctx,
bool no_spills,
int shader_time_index) :
- vec4_gs_visitor(comp, log_data, c, prog, shader, mem_ctx, no_spills,
- shader_time_index) {}
+ vec4_gs_visitor(comp, log_data, c, shader, mem_ctx, no_spills,
+ shader_time_index),
+ shader_prog(prog)
+ {
+ }
protected:
virtual void emit_prolog();
void xfb_setup();
int get_vertex_output_offset_for_varying(int vertex, int varying);
+ const struct gl_shader_program *shader_prog;
+
src_reg vertex_output;
src_reg vertex_output_offset;
src_reg temp;
unsigned local_id_dwords = 0;
- if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
- local_id_dwords =
- brw_cs_prog_local_id_payload_dwords(cs_prog_data->simd_size);
- }
+ if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID)
+ local_id_dwords = cs_prog_data->local_invocation_id_regs * 8;
unsigned push_constant_data_size =
(prog_data->nr_params + local_id_dwords) * sizeof(gl_constant_value);
};
-/**
- * We are building the local ID push constant data using the simplest possible
- * method. We simply push the local IDs directly as they should appear in the
- * registers for the uvec3 gl_LocalInvocationID variable.
- *
- * Therefore, for SIMD8, we use 3 full registers, and for SIMD16 we use 6
- * registers worth of push constant space.
- *
- * Note: Any updates to brw_cs_prog_local_id_payload_dwords,
- * fill_local_id_payload or fs_visitor::emit_cs_local_invocation_id_setup need
- * to coordinated.
- *
- * FINISHME: There are a few easy optimizations to consider.
- *
- * 1. If gl_WorkGroupSize x, y or z is 1, we can just use zero, and there is
- * no need for using push constant space for that dimension.
- *
- * 2. Since GL_MAX_COMPUTE_WORK_GROUP_SIZE is currently 1024 or less, we can
- * easily use 16-bit words rather than 32-bit dwords in the push constant
- * data.
- *
- * 3. If gl_WorkGroupSize x, y or z is small, then we can use bytes for
- * conveying the data, and thereby reduce push constant usage.
- *
- */
-unsigned
-brw_cs_prog_local_id_payload_dwords(unsigned dispatch_width)
-{
- return 3 * dispatch_width;
-}
-
-
-static void
-fill_local_id_payload(const struct brw_cs_prog_data *cs_prog_data,
- void *buffer, unsigned *x, unsigned *y, unsigned *z)
-{
- uint32_t *param = (uint32_t *)buffer;
- for (unsigned i = 0; i < cs_prog_data->simd_size; i++) {
- param[0 * cs_prog_data->simd_size + i] = *x;
- param[1 * cs_prog_data->simd_size + i] = *y;
- param[2 * cs_prog_data->simd_size + i] = *z;
-
- (*x)++;
- if (*x == cs_prog_data->local_size[0]) {
- *x = 0;
- (*y)++;
- if (*y == cs_prog_data->local_size[1]) {
- *y = 0;
- (*z)++;
- if (*z == cs_prog_data->local_size[2])
- *z = 0;
- }
- }
- }
-}
-
-
/**
* Creates a region containing the push constants for the CS on gen7+.
*
(struct brw_stage_prog_data*) cs_prog_data;
unsigned local_id_dwords = 0;
- if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
- local_id_dwords =
- brw_cs_prog_local_id_payload_dwords(cs_prog_data->simd_size);
- }
+ if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID)
+ local_id_dwords = cs_prog_data->local_invocation_id_regs * 8;
/* Updates the ParamaterValues[i] pointers for all parameters of the
* basic type of PROGRAM_STATE_VAR.
STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));
+ brw_cs_fill_local_id_payload(cs_prog_data, param, threads,
+ reg_aligned_constant_size);
+
/* _NEW_PROGRAM_CONSTANTS */
- unsigned x = 0, y = 0, z = 0;
for (t = 0; t < threads; t++) {
- gl_constant_value *next_param = ¶m[t * param_aligned_count];
- if (local_id_dwords > 0) {
- fill_local_id_payload(cs_prog_data, (void*)next_param, &x, &y, &z);
- next_param += local_id_dwords;
- }
+ gl_constant_value *next_param =
+ ¶m[t * param_aligned_count + local_id_dwords];
for (i = 0; i < prog_data->nr_params; i++) {
next_param[i] = *prog_data->param[i];
}
#include "main/blit.h"
#include "main/context.h"
#include "main/enums.h"
-#include "main/colormac.h"
#include "main/fbobject.h"
#include "brw_context.h"
#include "main/framebuffer.h"
#include "main/renderbuffer.h"
-
-bool
-brw_is_front_buffer_reading(struct gl_framebuffer *fb)
-{
- if (!fb || _mesa_is_user_fbo(fb))
- return false;
-
- return fb->_ColorReadBufferIndex == BUFFER_FRONT_LEFT;
-}
-
-bool
-brw_is_front_buffer_drawing(struct gl_framebuffer *fb)
-{
- if (!fb || _mesa_is_user_fbo(fb))
- return false;
-
- return (fb->_NumColorDrawBuffers >= 1 &&
- fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT);
-}
-
static void
intelDrawBuffer(struct gl_context * ctx, GLenum mode)
{
- if (brw_is_front_buffer_drawing(ctx->DrawBuffer)) {
+ if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer)) {
struct brw_context *const brw = brw_context(ctx);
/* If we might be front-buffer rendering on this buffer for the first
static void
intelReadBuffer(struct gl_context * ctx, GLenum mode)
{
- if (brw_is_front_buffer_reading(ctx->ReadBuffer)) {
+ if (_mesa_is_front_buffer_reading(ctx->ReadBuffer)) {
struct brw_context *const brw = brw_context(ctx);
/* If we might be front-buffer reading on this buffer for the first
#include "drm.h"
#include "brw_context.h"
-struct intel_framebuffer;
-
extern void intelInitBufferFuncs(struct dd_function_table *functions);
-bool brw_is_front_buffer_reading(struct gl_framebuffer *fb);
-bool brw_is_front_buffer_drawing(struct gl_framebuffer *fb);
-
#endif /* INTEL_BUFFERS_H */
#include "intel_debug.h"
#include "utils.h"
#include "util/u_atomic.h" /* for p_atomic_cmpxchg */
+#include "util/debug.h"
uint64_t INTEL_DEBUG = 0;
-static const struct dri_debug_control debug_control[] = {
+static const struct debug_control debug_control[] = {
{ "tex", DEBUG_TEXTURE},
{ "state", DEBUG_STATE},
{ "blit", DEBUG_BLIT},
}
void
-brw_process_intel_debug_variable(struct intel_screen *screen)
+brw_process_intel_debug_variable(void)
{
- uint64_t intel_debug = driParseDebugString(getenv("INTEL_DEBUG"), debug_control);
+ uint64_t intel_debug = parse_debug_string(getenv("INTEL_DEBUG"), debug_control);
(void) p_atomic_cmpxchg(&INTEL_DEBUG, 0, intel_debug);
-
- if (INTEL_DEBUG & DEBUG_BUFMGR)
- dri_bufmgr_set_debug(screen->bufmgr, true);
-
- if ((INTEL_DEBUG & DEBUG_SHADER_TIME) && screen->devinfo->gen < 7) {
- fprintf(stderr,
- "shader_time debugging requires gen7 (Ivybridge) or better.\n");
- INTEL_DEBUG &= ~DEBUG_SHADER_TIME;
- }
-
- if (INTEL_DEBUG & DEBUG_AUB)
- drm_intel_bufmgr_gem_set_aub_dump(screen->bufmgr, true);
}
/**
extern uint64_t intel_debug_flag_for_shader_stage(gl_shader_stage stage);
-struct intel_screen;
-
-extern void brw_process_intel_debug_variable(struct intel_screen *);
+extern void brw_process_intel_debug_variable(void);
extern bool brw_env_var_as_boolean(const char *var_name, bool default_value);
ctx->Extensions.EXT_packed_float = true;
ctx->Extensions.EXT_pixel_buffer_object = true;
ctx->Extensions.EXT_point_parameters = true;
+ ctx->Extensions.EXT_polygon_offset_clamp = true;
ctx->Extensions.EXT_provoking_vertex = true;
ctx->Extensions.EXT_stencil_two_side = true;
ctx->Extensions.EXT_texture_array = true;
ctx->Extensions.AMD_vertex_shader_layer = true;
ctx->Extensions.EXT_framebuffer_multisample = true;
ctx->Extensions.EXT_framebuffer_multisample_blit_scaled = true;
- ctx->Extensions.EXT_polygon_offset_clamp = true;
ctx->Extensions.EXT_transform_feedback = true;
ctx->Extensions.OES_depth_texture_cube_map = true;
#define FILE_DEBUG_FLAG DEBUG_FBO
-/**
- * Create a new framebuffer object.
- */
-static struct gl_framebuffer *
-intel_new_framebuffer(struct gl_context * ctx, GLuint name)
-{
- /* Only drawable state in intel_framebuffer at this time, just use Mesa's
- * class
- */
- return _mesa_new_framebuffer(ctx, name);
-}
-
-
/** Called by gl_renderbuffer::Delete() */
static void
intel_delete_renderbuffer(struct gl_context *ctx, struct gl_renderbuffer *rb)
intel_fbo_init(struct brw_context *brw)
{
struct dd_function_table *dd = &brw->ctx.Driver;
- dd->NewFramebuffer = intel_new_framebuffer;
dd->NewRenderbuffer = intel_new_renderbuffer;
dd->MapRenderbuffer = intel_map_renderbuffer;
dd->UnmapRenderbuffer = intel_unmap_renderbuffer;
}
}
-bool
+static bool
intel_tiling_supports_non_msrt_mcs(struct brw_context *brw, unsigned tiling)
{
/* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
* - MCS buffer for non-MSRT is supported only for RT formats 32bpp,
* 64bpp, and 128bpp.
*/
-bool
-intel_miptree_is_fast_clear_capable(struct brw_context *brw,
- struct intel_mipmap_tree *mt)
+static bool
+intel_miptree_supports_non_msrt_fast_clear(struct brw_context *brw,
+ struct intel_mipmap_tree *mt)
{
/* MCS support does not exist prior to Gen7 */
if (brw->gen < 7)
if (mt->disable_aux_buffers)
return false;
+ /* This function applies only to non-multisampled render targets. */
+ if (mt->num_samples > 1)
+ return false;
+
/* MCS is only supported for color buffers */
switch (_mesa_get_format_base_format(mt->format)) {
case GL_DEPTH_COMPONENT:
return false;
}
+
+ /* Check for layered surfaces. */
if (mt->physical_depth0 != 1) {
+ /* Multisample surfaces with the CMS layout are not layered surfaces,
+ * yet still have physical_depth0 > 1. Assert that we don't
+ * accidentally reject a multisampled surface here. We should have
+ * rejected it earlier by explicitly checking the sample count.
+ */
+ assert(mt->num_samples <= 1);
+
if (brw->gen >= 8) {
perf_debug("Layered fast clear - giving up. (%dx%d%d)\n",
mt->logical_width0, mt->logical_height0,
* 7 | ? | ?
* 6 | ? | ?
*/
- if (intel_miptree_is_fast_clear_capable(brw, mt)) {
+ if (intel_miptree_supports_non_msrt_fast_clear(brw, mt)) {
if (brw->gen >= 9 || (brw->gen == 8 && num_samples <= 1))
layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16;
} else if (brw->gen >= 9 && num_samples > 1) {
* clear actually occurs.
*/
if (intel_tiling_supports_non_msrt_mcs(brw, mt->tiling) &&
- intel_miptree_is_fast_clear_capable(brw, mt)) {
+ intel_miptree_supports_non_msrt_fast_clear(brw, mt)) {
mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
assert(brw->gen < 8 || mt->halign == 16 || num_samples <= 1);
}
* clear actually occurs.
*/
if (intel_tiling_supports_non_msrt_mcs(intel, singlesample_mt->tiling) &&
- intel_miptree_is_fast_clear_capable(intel, singlesample_mt))
+ intel_miptree_supports_non_msrt_fast_clear(intel, singlesample_mt)) {
singlesample_mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
+ }
if (num_samples == 0) {
intel_miptree_release(&irb->mt);
void
intel_get_non_msrt_mcs_alignment(struct intel_mipmap_tree *mt,
unsigned *width_px, unsigned *height);
-bool
-intel_tiling_supports_non_msrt_mcs(struct brw_context *brw, unsigned tiling);
-bool
-intel_miptree_is_fast_clear_capable(struct brw_context *brw,
- struct intel_mipmap_tree *mt);
+
bool
intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw,
struct intel_mipmap_tree *mt);
void
intelInitPixelFuncs(struct dd_function_table *functions)
{
- functions->Accum = _mesa_accum;
functions->Bitmap = intelBitmap;
functions->CopyPixels = intelCopyPixels;
functions->DrawPixels = intelDrawPixels;
if (!intelScreen->devinfo)
return false;
- brw_process_intel_debug_variable(intelScreen);
+ brw_process_intel_debug_variable();
+
+ if (INTEL_DEBUG & DEBUG_BUFMGR)
+ dri_bufmgr_set_debug(intelScreen->bufmgr, true);
+
+ if ((INTEL_DEBUG & DEBUG_SHADER_TIME) && intelScreen->devinfo->gen < 7) {
+ fprintf(stderr,
+ "shader_time debugging requires gen7 (Ivybridge) or better.\n");
+ INTEL_DEBUG &= ~DEBUG_SHADER_TIME;
+ }
+
+ if (INTEL_DEBUG & DEBUG_AUB)
+ drm_intel_bufmgr_gem_set_aub_dump(intelScreen->bufmgr, true);
intelScreen->hw_must_use_separate_stencil = intelScreen->devinfo->gen >= 7;
#include "main/context.h"
#include "main/macros.h"
#include "main/enums.h"
-#include "main/colormac.h"
#include "main/dd.h"
#include "intel_screen.h"
#include "main/macros.h"
#include "main/mtypes.h"
-#include "main/colormac.h"
#include "r200_reg.h"
#include "r200_vertprog.h"
#include "main/glheader.h"
#include "main/mtypes.h"
-#include "main/colormac.h"
#include "main/imports.h"
#include "main/macros.h"
#include "main/imports.h"
#include "main/api_arrayelt.h"
#include "main/enums.h"
-#include "main/colormac.h"
#include "main/light.h"
#include "main/framebuffer.h"
#include "main/fbobject.h"
functions->Enable = r200Enable;
functions->Fogfv = r200Fogfv;
functions->FrontFace = r200FrontFace;
- functions->Hint = NULL;
functions->LightModelfv = r200LightModelfv;
functions->Lightfv = r200Lightfv;
functions->LineStipple = r200LineStipple;
#include "main/glheader.h"
#include "main/imports.h"
#include "main/enums.h"
-#include "main/colormac.h"
#include "main/api_arrayelt.h"
#include "swrast/swrast.h"
#include "main/glheader.h"
#include "main/mtypes.h"
-#include "main/colormac.h"
#include "main/enums.h"
#include "main/image.h"
#include "main/imports.h"
#include "main/imports.h"
#include "main/mtypes.h"
#include "main/enums.h"
-#include "main/colormac.h"
#include "main/light.h"
#include "main/state.h"
#include "main/glheader.h"
#include "main/imports.h"
-#include "main/colormac.h"
#include "main/context.h"
#include "main/enums.h"
#include "main/image.h"
#if DO_DEBUG
- RADEON_DEBUG = driParseDebugString( getenv( "RADEON_DEBUG" ),
- debug_control );
+ RADEON_DEBUG = parse_debug_string( getenv( "RADEON_DEBUG" ),
+ debug_control );
#endif
tcl_mode = driQueryOptioni(&rmesa->radeon.optionCache, "tcl_mode");
#include "radeon_drm.h"
#include "main/macros.h"
#include "main/mtypes.h"
-#include "main/colormac.h"
#include "radeon_screen.h"
#include "radeon_common.h"
* Pauli Nieminen <suokkos@gmail.com>
*/
-#include "utils.h"
+#include "util/debug.h"
#include "radeon_common_context.h"
#include "radeon_debug.h"
#include <stdarg.h>
#include <stdio.h>
-static const struct dri_debug_control debug_control[] = {
+static const struct debug_control debug_control[] = {
{"fall", RADEON_FALLBACKS},
{"tex", RADEON_TEXTURE},
{"ioctl", RADEON_IOCTL},
void radeon_init_debug(void)
{
- radeon_enabled_debug_types = driParseDebugString(getenv("RADEON_DEBUG"), debug_control);
+ radeon_enabled_debug_types = parse_debug_string(getenv("RADEON_DEBUG"), debug_control);
radeon_enabled_debug_types |= RADEON_GENERAL;
}
printf(__VA_ARGS__); \
} while(0)
-static struct gl_framebuffer *
-radeon_new_framebuffer(struct gl_context *ctx, GLuint name)
-{
- return _mesa_new_framebuffer(ctx, name);
-}
-
static void
radeon_delete_renderbuffer(struct gl_context *ctx, struct gl_renderbuffer *rb)
{
void radeon_fbo_init(struct radeon_context *radeon)
{
- radeon->glCtx.Driver.NewFramebuffer = radeon_new_framebuffer;
radeon->glCtx.Driver.NewRenderbuffer = radeon_new_renderbuffer;
radeon->glCtx.Driver.MapRenderbuffer = radeon_map_renderbuffer;
radeon->glCtx.Driver.UnmapRenderbuffer = radeon_unmap_renderbuffer;
ctx->Driver.Enable = radeonEnable;
ctx->Driver.Fogfv = radeonFogfv;
ctx->Driver.FrontFace = radeonFrontFace;
- ctx->Driver.Hint = NULL;
ctx->Driver.LightModelfv = radeonLightModelfv;
ctx->Driver.Lightfv = radeonLightfv;
ctx->Driver.LineStipple = radeonLineStipple;
#include "main/glheader.h"
#include "main/mtypes.h"
-#include "main/colormac.h"
#include "main/enums.h"
#include "main/imports.h"
#include "main/macros.h"
#define HAVE_ELTS 0
static const GLuint hw_prim[GL_POLYGON+1] = {
- RADEON_CP_VC_CNTL_PRIM_TYPE_POINT,
- RADEON_CP_VC_CNTL_PRIM_TYPE_LINE,
- 0,
- RADEON_CP_VC_CNTL_PRIM_TYPE_LINE_STRIP,
- RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
- RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_STRIP,
- RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN,
- 0,
- 0,
- 0
+ [GL_POINTS] = RADEON_CP_VC_CNTL_PRIM_TYPE_POINT,
+ [GL_LINES] = RADEON_CP_VC_CNTL_PRIM_TYPE_LINE,
+ [GL_LINE_LOOP] = 0,
+ [GL_LINE_STRIP] = RADEON_CP_VC_CNTL_PRIM_TYPE_LINE_STRIP,
+ [GL_TRIANGLES] = RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
+ [GL_TRIANGLE_STRIP] = RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_STRIP,
+ [GL_TRIANGLE_FAN] = RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN,
+ [GL_QUADS] = 0,
+ [GL_QUAD_STRIP] = 0,
+ [GL_POLYGON] = 0
};
static inline void
static const GLuint reduced_hw_prim[GL_POLYGON+1] = {
- RADEON_CP_VC_CNTL_PRIM_TYPE_POINT,
- RADEON_CP_VC_CNTL_PRIM_TYPE_LINE,
- RADEON_CP_VC_CNTL_PRIM_TYPE_LINE,
- RADEON_CP_VC_CNTL_PRIM_TYPE_LINE,
- RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
- RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
- RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
- RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
- RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
- RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST
+ [GL_POINTS] = RADEON_CP_VC_CNTL_PRIM_TYPE_POINT,
+ [GL_LINES] = RADEON_CP_VC_CNTL_PRIM_TYPE_LINE,
+ [GL_LINE_LOOP] = RADEON_CP_VC_CNTL_PRIM_TYPE_LINE,
+ [GL_LINE_STRIP] = RADEON_CP_VC_CNTL_PRIM_TYPE_LINE,
+ [GL_TRIANGLES] = RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
+ [GL_TRIANGLE_STRIP] = RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
+ [GL_TRIANGLE_FAN] = RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
+ [GL_QUADS] = RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
+ [GL_QUAD_STRIP] = RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
+ [GL_POLYGON] = RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST
};
static void radeonRasterPrimitive( struct gl_context *ctx, GLuint hwprim );
#include "main/glheader.h"
#include "main/imports.h"
-#include "main/colormac.h"
#include "main/context.h"
#include "main/enums.h"
#include "main/image.h"
#include "main/glheader.h"
#include "main/imports.h"
-#include "main/colormac.h"
#include "main/context.h"
#include "main/macros.h"
#include "main/teximage.h"
* Allocate and initialize a new vertex array object.
*
* This function is intended to be called via
- * \c dd_function_table::NewArrayObject.
*/
struct gl_vertex_array_object *
_mesa_new_vao(struct gl_context *ctx, GLuint name)
deleteFlag = (oldObj->RefCount == 0);
mtx_unlock(&oldObj->Mutex);
- if (deleteFlag) {
- assert(ctx->Driver.DeleteArrayObject);
- ctx->Driver.DeleteArrayObject(ctx, oldObj);
- }
+ if (deleteFlag)
+ _mesa_delete_vao(ctx, oldObj);
*ptr = NULL;
}
}
/* For APPLE version, generate a new array object now */
- newObj = (*ctx->Driver.NewArrayObject)(ctx, id);
+ newObj = _mesa_new_vao(ctx, id);
if (!newObj) {
_mesa_error(ctx, GL_OUT_OF_MEMORY, "glBindVertexArrayAPPLE");
return;
ctx->NewState |= _NEW_ARRAY;
_mesa_reference_vao(ctx, &ctx->Array.VAO, newObj);
-
- /* Pass BindVertexArray call to device driver */
- if (ctx->Driver.BindArrayObject && newObj)
- ctx->Driver.BindArrayObject(ctx, newObj);
}
struct gl_vertex_array_object *obj;
GLuint name = first + i;
- obj = (*ctx->Driver.NewArrayObject)( ctx, name );
+ obj = _mesa_new_vao(ctx, name);
if (!obj) {
_mesa_error(ctx, GL_OUT_OF_MEMORY, "%s", func);
return;
ctx->Color.Blend[buf].DstA = dfactorA;
update_uses_dual_src(ctx, buf);
ctx->Color._BlendFuncPerBuffer = GL_TRUE;
-
- if (ctx->Driver.BlendFuncSeparatei) {
- ctx->Driver.BlendFuncSeparatei(ctx, buf, sfactorRGB, dfactorRGB,
- sfactorA, dfactorA);
- }
}
ctx->Color.Blend[buf].EquationRGB = mode;
ctx->Color.Blend[buf].EquationA = mode;
ctx->Color._BlendEquationPerBuffer = GL_TRUE;
-
- if (ctx->Driver.BlendEquationSeparatei)
- ctx->Driver.BlendEquationSeparatei(ctx, buf, mode, mode);
}
ctx->Color.Blend[buf].EquationRGB = modeRGB;
ctx->Color.Blend[buf].EquationA = modeA;
ctx->Color._BlendEquationPerBuffer = GL_TRUE;
-
- if (ctx->Driver.BlendEquationSeparatei)
- ctx->Driver.BlendEquationSeparatei(ctx, buf, modeRGB, modeA);
}
FLUSH_VERTICES(ctx, _NEW_COLOR);
COPY_4UBV(ctx->Color.ColorMask[buf], tmp);
-
- if (ctx->Driver.ColorMaskIndexed)
- ctx->Driver.ColorMaskIndexed(ctx, buf, red, green, blue, alpha);
}
#include "imports.h"
#include "mtypes.h"
+#include "vbo/vbo.h"
#ifdef __cplusplus
if (MESA_VERBOSE & VERBOSE_STATE) \
_mesa_debug(ctx, "FLUSH_VERTICES in %s\n", MESA_FUNCTION);\
if (ctx->Driver.NeedFlush & FLUSH_STORED_VERTICES) \
- ctx->Driver.FlushVertices(ctx, FLUSH_STORED_VERTICES); \
+ vbo_exec_FlushVertices(ctx, FLUSH_STORED_VERTICES); \
ctx->NewState |= newstate; \
} while (0)
if (MESA_VERBOSE & VERBOSE_STATE) \
_mesa_debug(ctx, "FLUSH_CURRENT in %s\n", MESA_FUNCTION); \
if (ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT) \
- ctx->Driver.FlushVertices(ctx, FLUSH_UPDATE_CURRENT); \
+ vbo_exec_FlushVertices(ctx, FLUSH_UPDATE_CURRENT); \
ctx->NewState |= newstate; \
} while (0)
*/
void (*UpdateState)( struct gl_context *ctx, GLbitfield new_state );
- /**
- * Resize the given framebuffer to the given size.
- * XXX OBSOLETE: this function will be removed in the future.
- */
- void (*ResizeBuffers)( struct gl_context *ctx, struct gl_framebuffer *fb,
- GLuint width, GLuint height);
-
/**
* This is called whenever glFinish() is called.
*/
*/
void (*Clear)( struct gl_context *ctx, GLbitfield buffers );
- /**
- * Execute glAccum command.
- */
- void (*Accum)( struct gl_context *ctx, GLenum op, GLfloat value );
-
-
/**
* Execute glRasterPos, updating the ctx->Current.Raster fields
*/
/** Set the blend equation */
void (*BlendEquationSeparate)(struct gl_context *ctx,
GLenum modeRGB, GLenum modeA);
- void (*BlendEquationSeparatei)(struct gl_context *ctx, GLuint buffer,
- GLenum modeRGB, GLenum modeA);
/** Specify pixel arithmetic */
void (*BlendFuncSeparate)(struct gl_context *ctx,
GLenum sfactorRGB, GLenum dfactorRGB,
GLenum sfactorA, GLenum dfactorA);
- void (*BlendFuncSeparatei)(struct gl_context *ctx, GLuint buffer,
- GLenum sfactorRGB, GLenum dfactorRGB,
- GLenum sfactorA, GLenum dfactorA);
/** Specify a plane against which all geometry is clipped */
void (*ClipPlane)(struct gl_context *ctx, GLenum plane, const GLfloat *eq);
/** Enable and disable writing of frame buffer color components */
void (*ColorMask)(struct gl_context *ctx, GLboolean rmask, GLboolean gmask,
GLboolean bmask, GLboolean amask );
- void (*ColorMaskIndexed)(struct gl_context *ctx, GLuint buf, GLboolean rmask,
- GLboolean gmask, GLboolean bmask, GLboolean amask);
/** Cause a material color to track the current color */
void (*ColorMaterial)(struct gl_context *ctx, GLenum face, GLenum mode);
/** Specify whether front- or back-facing facets can be culled */
void (*Enable)(struct gl_context *ctx, GLenum cap, GLboolean state);
/** Specify fog parameters */
void (*Fogfv)(struct gl_context *ctx, GLenum pname, const GLfloat *params);
- /** Specify implementation-specific hints */
- void (*Hint)(struct gl_context *ctx, GLenum target, GLenum mode);
/** Set light source parameters.
* Note: for GL_POSITION and GL_SPOT_DIRECTION, params will have already
* been transformed to eye-space.
GLint *bytesWritten);
/*@}*/
-
- /**
- * \name Vertex Array objects
- */
- /*@{*/
- struct gl_vertex_array_object * (*NewArrayObject)(struct gl_context *ctx, GLuint id);
- void (*DeleteArrayObject)(struct gl_context *ctx, struct gl_vertex_array_object *);
- void (*BindArrayObject)(struct gl_context *ctx, struct gl_vertex_array_object *);
- /*@}*/
-
/**
* \name GLSL-related functions (ARB extensions and OpenGL 2.x)
*/
/*@{*/
struct gl_shader *(*NewShader)(struct gl_context *ctx,
GLuint name, GLenum type);
- void (*DeleteShader)(struct gl_context *ctx, struct gl_shader *shader);
- struct gl_shader_program *(*NewShaderProgram)(GLuint name);
- void (*DeleteShaderProgram)(struct gl_context *ctx,
- struct gl_shader_program *shProg);
void (*UseProgram)(struct gl_context *ctx, struct gl_shader_program *shProg);
/*@}*/
*/
GLbitfield NeedFlush;
- /** Need to call SaveFlushVertices() upon state change? */
+ /** Need to call vbo_save_SaveFlushVertices() upon state change? */
GLboolean SaveNeedFlush;
- /* Called prior to any of the GLvertexformat functions being
- * called. Paired with Driver.FlushVertices().
- */
- void (*BeginVertices)( struct gl_context *ctx );
-
- /**
- * If inside glBegin()/glEnd(), it should assert(0). Otherwise, if
- * FLUSH_STORED_VERTICES bit in \p flags is set flushes any buffered
- * vertices, if FLUSH_UPDATE_CURRENT bit is set updates
- * __struct gl_contextRec::Current and gl_light_attrib::Material
- *
- * Note that the default T&L engine never clears the
- * FLUSH_UPDATE_CURRENT bit, even after performing the update.
- */
- void (*FlushVertices)( struct gl_context *ctx, GLuint flags );
- void (*SaveFlushVertices)( struct gl_context *ctx );
-
- /**
- * Give the driver the opportunity to hook in its own vtxfmt for
- * compiling optimized display lists. This is called on each valid
- * glBegin() during list compilation.
- */
- GLboolean (*NotifySaveBegin)( struct gl_context *ctx, GLenum mode );
-
/**
* Notify driver that the special derived value _NeedEyeCoords has
* changed.
*/
void (*LightingSpaceChange)( struct gl_context *ctx );
- /**
- * Called by glNewList().
- *
- * Let the T&L component know what is going on with display lists
- * in time to make changes to dispatch tables, etc.
- */
- void (*NewList)( struct gl_context *ctx, GLuint list, GLenum mode );
- /**
- * Called by glEndList().
- *
- * \sa dd_function_table::NewList.
- */
- void (*EndList)( struct gl_context *ctx );
-
- /**
- * Called by glCallList(s).
- *
- * Notify the T&L component before and after calling a display list.
- */
- void (*BeginCallList)( struct gl_context *ctx,
- struct gl_display_list *dlist );
- /**
- * Called by glEndCallList().
- *
- * \sa dd_function_table::BeginCallList.
- */
- void (*EndCallList)( struct gl_context *ctx );
-
/**@}*/
/**
*/
struct gl_sampler_object * (*NewSamplerObject)(struct gl_context *ctx,
GLuint name);
- void (*DeleteSamplerObject)(struct gl_context *ctx,
- struct gl_sampler_object *samp);
/**
* \name Return a timestamp in nanoseconds as defined by GL_ARB_timer_query.
* \name GL_ARB_shader_image_load_store interface.
*/
/** @{ */
- void (*BindImageTexture)(struct gl_context *ctx,
- struct gl_image_unit *unit,
- struct gl_texture_object *texObj,
- GLint level, GLboolean layered, GLint layer,
- GLenum access, GLenum format);
-
void (*MemoryBarrier)(struct gl_context *ctx, GLbitfield barriers);
/** @} */
* \param ctx GL context.
*
* Checks if dd_function_table::SaveNeedFlush is marked to flush
- * stored (save) vertices, and calls
- * dd_function_table::SaveFlushVertices if so.
+ * stored (save) vertices, and calls vbo_save_SaveFlushVertices if so.
*/
#define SAVE_FLUSH_VERTICES(ctx) \
do { \
if (ctx->Driver.SaveNeedFlush) \
- ctx->Driver.SaveFlushVertices(ctx); \
+ vbo_save_SaveFlushVertices(ctx); \
} while (0)
/* Give the driver an opportunity to hook in an optimized
* display list compiler.
*/
- if (ctx->Driver.NotifySaveBegin(ctx, mode))
+ if (vbo_save_NotifyBegin(ctx, mode))
return;
SAVE_FLUSH_VERTICES(ctx);
ctx->ListState.CallDepth++;
- if (ctx->Driver.BeginCallList)
- ctx->Driver.BeginCallList(ctx, dlist);
+ vbo_save_BeginCallList(ctx, dlist);
n = dlist->Head;
}
}
- if (ctx->Driver.EndCallList)
- ctx->Driver.EndCallList(ctx);
+ vbo_save_EndCallList(ctx);
ctx->ListState.CallDepth--;
}
ctx->ListState.CurrentBlock = ctx->ListState.CurrentList->Head;
ctx->ListState.CurrentPos = 0;
- ctx->Driver.NewList(ctx, name, mode);
+ vbo_save_NewList(ctx, name, mode);
ctx->CurrentDispatch = ctx->Save;
_glapi_set_dispatch(ctx->CurrentDispatch);
/* Call before emitting END_OF_LIST, in case the driver wants to
* emit opcodes itself.
*/
- ctx->Driver.EndList(ctx);
+ vbo_save_EndList(ctx);
(void) alloc_instruction(ctx, OPCODE_END_OF_LIST, 0);
#include "main/imports.h"
#include "main/macros.h"
#include "main/samplerobj.h"
+#include "main/shaderobj.h"
#include "main/texenvprogram.h"
#include "main/texobj.h"
#include "main/uniforms.h"
p.top_instructions = p.shader->ir;
p.instructions = p.shader->ir;
p.state = key;
- p.shader_program = ctx->Driver.NewShaderProgram(0);
+ p.shader_program = _mesa_new_shader_program(0);
/* Tell the linker to ignore the fact that we're building a
* separate shader, in case we're in a GLES2 context that would
* Resize the given framebuffer's renderbuffers to the new width and height.
* This should only be used for window-system framebuffers, not
* user-created renderbuffers (i.e. made with GL_EXT_framebuffer_object).
- * This will typically be called via ctx->Driver.ResizeBuffers() or directly
- * from a device driver.
+ * This will typically be called directly from a device driver.
*
* \note it's possible for ctx to be null since a window can be resized
* without a currently bound rendering context.
}
}
}
+
+bool
+_mesa_is_front_buffer_reading(const struct gl_framebuffer *fb)
+{
+ if (!fb || _mesa_is_user_fbo(fb))
+ return false;
+
+ return fb->_ColorReadBufferIndex == BUFFER_FRONT_LEFT;
+}
+
+bool
+_mesa_is_front_buffer_drawing(const struct gl_framebuffer *fb)
+{
+ if (!fb || _mesa_is_user_fbo(fb))
+ return false;
+
+ return (fb->_NumColorDrawBuffers >= 1 &&
+ fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT);
+}
extern void
_mesa_print_framebuffer(const struct gl_framebuffer *fb);
+extern bool
+_mesa_is_front_buffer_reading(const struct gl_framebuffer *fb);
+
+extern bool
+_mesa_is_front_buffer_drawing(const struct gl_framebuffer *fb);
+
#endif /* FRAMEBUFFER_H */
# GL_EXT_texture_filter_anisotropic
[ "MAX_TEXTURE_MAX_ANISOTROPY_EXT", "CONTEXT_FLOAT(Const.MaxTextureMaxAnisotropy), extra_EXT_texture_filter_anisotropic" ],
+
+# GL_KHR_debug (GL 4.3)/ GL_ARB_debug_output
+ [ "DEBUG_LOGGED_MESSAGES", "LOC_CUSTOM, TYPE_INT, 0, NO_EXTRA" ],
+ [ "DEBUG_NEXT_LOGGED_MESSAGE_LENGTH", "LOC_CUSTOM, TYPE_INT, 0, NO_EXTRA" ],
+ [ "MAX_DEBUG_LOGGED_MESSAGES", "CONST(MAX_DEBUG_LOGGED_MESSAGES), NO_EXTRA" ],
+ [ "MAX_DEBUG_MESSAGE_LENGTH", "CONST(MAX_DEBUG_MESSAGE_LENGTH), NO_EXTRA" ],
+ [ "MAX_LABEL_LENGTH", "CONST(MAX_LABEL_LENGTH), NO_EXTRA" ],
+ [ "MAX_DEBUG_GROUP_STACK_DEPTH", "CONST(MAX_DEBUG_GROUP_STACK_DEPTH), NO_EXTRA" ],
+ [ "DEBUG_GROUP_STACK_DEPTH", "LOC_CUSTOM, TYPE_INT, 0, NO_EXTRA" ],
]},
# Enums in OpenGL and GLES1
# GL_ARB_robustness
[ "RESET_NOTIFICATION_STRATEGY_ARB", "CONTEXT_ENUM(Const.ResetStrategy), NO_EXTRA" ],
-# GL_KHR_debug (GL 4.3)/ GL_ARB_debug_output
- [ "DEBUG_LOGGED_MESSAGES", "LOC_CUSTOM, TYPE_INT, 0, NO_EXTRA" ],
- [ "DEBUG_NEXT_LOGGED_MESSAGE_LENGTH", "LOC_CUSTOM, TYPE_INT, 0, NO_EXTRA" ],
- [ "MAX_DEBUG_LOGGED_MESSAGES", "CONST(MAX_DEBUG_LOGGED_MESSAGES), NO_EXTRA" ],
- [ "MAX_DEBUG_MESSAGE_LENGTH", "CONST(MAX_DEBUG_MESSAGE_LENGTH), NO_EXTRA" ],
- [ "MAX_LABEL_LENGTH", "CONST(MAX_LABEL_LENGTH), NO_EXTRA" ],
- [ "MAX_DEBUG_GROUP_STACK_DEPTH", "CONST(MAX_DEBUG_GROUP_STACK_DEPTH), NO_EXTRA" ],
- [ "DEBUG_GROUP_STACK_DEPTH", "LOC_CUSTOM, TYPE_INT, 0, NO_EXTRA" ],
-
[ "MAX_DUAL_SOURCE_DRAW_BUFFERS", "CONTEXT_INT(Const.MaxDualSourceDrawBuffers), extra_ARB_blend_func_extended" ],
# GL_ARB_uniform_buffer_object
* internal formats, they do not correspond to GL constants, so the base
* format is returned instead.
*/
+ case GL_BGRA_EXT:
case GL_LUMINANCE_ALPHA:
case GL_LUMINANCE:
case GL_ALPHA:
if (effectiveInternalFormat == GL_NONE)
return GL_INVALID_OPERATION;
- GLenum baseInternalFormat =
- _mesa_base_tex_format(ctx, effectiveInternalFormat);
+ GLenum baseInternalFormat;
+ if (internalFormat == GL_BGRA_EXT) {
+ /* Unfortunately, _mesa_base_tex_format returns a base format of
+ * GL_RGBA for GL_BGRA_EXT. This makes perfect sense if you're
+ * asking the question, "what channels does this format have?"
+ * However, if we're trying to determine if two internal formats
+ * match in the ES3 sense, we actually want GL_BGRA.
+ */
+ baseInternalFormat = GL_BGRA_EXT;
+ } else {
+ baseInternalFormat =
+ _mesa_base_tex_format(ctx, effectiveInternalFormat);
+ }
if (internalFormat != baseInternalFormat)
return GL_INVALID_OPERATION;
}
switch (format) {
+ case GL_BGRA_EXT:
+ if (type != GL_UNSIGNED_BYTE || internalFormat != GL_BGRA)
+ return GL_INVALID_OPERATION;
+ break;
+
case GL_RGBA:
switch (type) {
case GL_UNSIGNED_BYTE:
default:
goto invalid_target;
}
-
- if (ctx->Driver.Hint) {
- (*ctx->Driver.Hint)( ctx, target, mode );
- }
-
return;
invalid_target:
#include "glheader.h"
-#include "colormac.h"
#include "enums.h"
#include "image.h"
#include "imports.h"
}
break;
default:
- _mesa_problem(ctx, "bad type in _mesa_pack_depth_span");
+ _mesa_problem(ctx, "bad type in _mesa_pack_depth_span (%s)",
+ _mesa_enum_to_string(dstType));
}
free(depthCopy);
_mesa_HashLookupLocked(ctx->Shared->SamplerObjects, name);
}
+static void
+delete_sampler_object(struct gl_context *ctx,
+ struct gl_sampler_object *sampObj)
+{
+ mtx_destroy(&sampObj->Mutex);
+ free(sampObj->Label);
+ free(sampObj);
+}
/**
* Handle reference counting.
deleteFlag = (oldSamp->RefCount == 0);
mtx_unlock(&oldSamp->Mutex);
- if (deleteFlag) {
- assert(ctx->Driver.DeleteSamplerObject);
- ctx->Driver.DeleteSamplerObject(ctx, oldSamp);
- }
+ if (deleteFlag)
+ delete_sampler_object(ctx, oldSamp);
*ptr = NULL;
}
return sampObj;
}
-
-/**
- * Fallback for ctx->Driver.DeleteSamplerObject();
- */
-static void
-_mesa_delete_sampler_object(struct gl_context *ctx,
- struct gl_sampler_object *sampObj)
-{
- mtx_destroy(&sampObj->Mutex);
- free(sampObj->Label);
- free(sampObj);
-}
-
static void
create_samplers(struct gl_context *ctx, GLsizei count, GLuint *samplers,
const char *caller)
set_sampler_compare_mode(struct gl_context *ctx,
struct gl_sampler_object *samp, GLint param)
{
+ /* If GL_ARB_shadow is not supported, don't report an error. The
+ * sampler object extension spec isn't clear on this extension interaction.
+ * Silences errors with Wine on older GPUs such as R200.
+ */
if (!ctx->Extensions.ARB_shadow)
- return INVALID_PNAME;
+ return GL_FALSE;
if (samp->CompareMode == param)
return GL_FALSE;
set_sampler_compare_func(struct gl_context *ctx,
struct gl_sampler_object *samp, GLint param)
{
+ /* If GL_ARB_shadow is not supported, don't report an error. The
+ * sampler object extension spec isn't clear on this extension interaction.
+ * Silences errors with Wine on older GPUs such as R200.
+ */
if (!ctx->Extensions.ARB_shadow)
- return INVALID_PNAME;
+ return GL_FALSE;
if (samp->CompareFunc == param)
return GL_FALSE;
*params = IROUND(sampObj->LodBias);
break;
case GL_TEXTURE_COMPARE_MODE:
- if (!ctx->Extensions.ARB_shadow)
- goto invalid_pname;
*params = sampObj->CompareMode;
break;
case GL_TEXTURE_COMPARE_FUNC:
- if (!ctx->Extensions.ARB_shadow)
- goto invalid_pname;
*params = sampObj->CompareFunc;
break;
case GL_TEXTURE_MAX_ANISOTROPY_EXT:
*params = sampObj->LodBias;
break;
case GL_TEXTURE_COMPARE_MODE:
- if (!ctx->Extensions.ARB_shadow)
- goto invalid_pname;
*params = (GLfloat) sampObj->CompareMode;
break;
case GL_TEXTURE_COMPARE_FUNC:
- if (!ctx->Extensions.ARB_shadow)
- goto invalid_pname;
*params = (GLfloat) sampObj->CompareFunc;
break;
case GL_TEXTURE_MAX_ANISOTROPY_EXT:
*params = (GLint) sampObj->LodBias;
break;
case GL_TEXTURE_COMPARE_MODE:
- if (!ctx->Extensions.ARB_shadow)
- goto invalid_pname;
*params = sampObj->CompareMode;
break;
case GL_TEXTURE_COMPARE_FUNC:
- if (!ctx->Extensions.ARB_shadow)
- goto invalid_pname;
*params = sampObj->CompareFunc;
break;
case GL_TEXTURE_MAX_ANISOTROPY_EXT:
*params = (GLuint) sampObj->LodBias;
break;
case GL_TEXTURE_COMPARE_MODE:
- if (!ctx->Extensions.ARB_shadow)
- goto invalid_pname;
*params = sampObj->CompareMode;
break;
case GL_TEXTURE_COMPARE_FUNC:
- if (!ctx->Extensions.ARB_shadow)
- goto invalid_pname;
*params = sampObj->CompareFunc;
break;
case GL_TEXTURE_MAX_ANISOTROPY_EXT:
_mesa_init_sampler_object_functions(struct dd_function_table *driver)
{
driver->NewSamplerObject = _mesa_new_sampler_object;
- driver->DeleteSamplerObject = _mesa_delete_sampler_object;
}
const glsl_type *array_type = field->type->fields.array;
if (interface->interface_packing != GLSL_INTERFACE_PACKING_STD430) {
- if (array_type->is_record()) {
+ if (array_type->is_record() || array_type->is_array()) {
array_stride = array_type->std140_size(row_major);
array_stride = glsl_align(array_stride, 16);
} else {
name = _mesa_HashFindFreeKeyBlock(ctx->Shared->ShaderObjects, 1);
- shProg = ctx->Driver.NewShaderProgram(name);
+ shProg = _mesa_new_shader_program(name);
_mesa_HashInsert(ctx->Shared->ShaderObjects, name, shProg);
{
struct gl_uniform_storage *uni = sh->SubroutineUniformRemapTable[location];
- int offset = location - uni->subroutine[stage].index;
+ int offset = location - uni->opaque[stage].index;
memcpy(params, &uni->storage[offset],
sizeof(GLuint));
}
u->Layered = GL_FALSE;
u->Layer = 0;
}
-
- if (ctx->Driver.BindImageTexture)
- ctx->Driver.BindImageTexture(ctx, u, u->TexObj, level, layered,
- layer, access, format);
}
void GLAPIENTRY
u->_ActualFormat = MESA_FORMAT_R_UNORM8;
u->_Valid = GL_FALSE;
}
-
- /* Pass the BindImageTexture call down to the device driver */
- if (ctx->Driver.BindImageTexture)
- ctx->Driver.BindImageTexture(ctx, u, u->TexObj, u->Level, u->Layered,
- u->Layer, u->Access, u->Format);
}
_mesa_end_texture_lookups(ctx);
if (deleteFlag) {
if (old->Name != 0)
_mesa_HashRemove(ctx->Shared->ShaderObjects, old->Name);
- ctx->Driver.DeleteShader(ctx, old);
+ _mesa_delete_shader(ctx, old);
}
*ptr = NULL;
/**
* Delete a shader object.
- * Called via ctx->Driver.DeleteShader().
*/
-static void
+void
_mesa_delete_shader(struct gl_context *ctx, struct gl_shader *sh)
{
free((void *)sh->Source);
if (deleteFlag) {
if (old->Name != 0)
_mesa_HashRemove(ctx->Shared->ShaderObjects, old->Name);
- ctx->Driver.DeleteShaderProgram(ctx, old);
+ _mesa_delete_shader_program(ctx, old);
}
*ptr = NULL;
/**
* Allocate a new gl_shader_program object, initialize it.
- * Called via ctx->Driver.NewShaderProgram()
*/
-static struct gl_shader_program *
+struct gl_shader_program *
_mesa_new_shader_program(GLuint name)
{
struct gl_shader_program *shProg;
for (sh = 0; sh < MESA_SHADER_STAGES; sh++) {
if (shProg->_LinkedShaders[sh] != NULL) {
- ctx->Driver.DeleteShader(ctx, shProg->_LinkedShaders[sh]);
+ _mesa_delete_shader(ctx, shProg->_LinkedShaders[sh]);
shProg->_LinkedShaders[sh] = NULL;
}
}
/**
* Free/delete a shader program object.
- * Called via ctx->Driver.DeleteShaderProgram().
*/
-static void
-_mesa_delete_shader_program(struct gl_context *ctx, struct gl_shader_program *shProg)
+void
+_mesa_delete_shader_program(struct gl_context *ctx,
+ struct gl_shader_program *shProg)
{
_mesa_free_shader_program_data(ctx, shProg);
_mesa_init_shader_object_functions(struct dd_function_table *driver)
{
driver->NewShader = _mesa_new_shader;
- driver->DeleteShader = _mesa_delete_shader;
- driver->NewShaderProgram = _mesa_new_shader_program;
- driver->DeleteShaderProgram = _mesa_delete_shader_program;
driver->LinkShader = _mesa_ir_link_shader;
}
extern struct gl_shader *
_mesa_new_shader(struct gl_context *ctx, GLuint name, GLenum type);
+extern void
+_mesa_delete_shader(struct gl_context *ctx, struct gl_shader *sh);
+
extern struct gl_shader_program *
_mesa_lookup_shader_program(struct gl_context *ctx, GLuint name);
_mesa_lookup_shader_program_err(struct gl_context *ctx, GLuint name,
const char *caller);
+extern struct gl_shader_program *
+_mesa_new_shader_program(GLuint name);
+
extern void
_mesa_clear_shader_program_data(struct gl_shader_program *shProg);
_mesa_free_shader_program_data(struct gl_context *ctx,
struct gl_shader_program *shProg);
+extern void
+_mesa_delete_shader_program(struct gl_context *ctx,
+ struct gl_shader_program *shProg);
extern void
struct gl_context *ctx = (struct gl_context *) userData;
struct gl_shader *sh = (struct gl_shader *) data;
if (_mesa_validate_shader_target(ctx, sh->Type)) {
- ctx->Driver.DeleteShader(ctx, sh);
+ _mesa_delete_shader(ctx, sh);
}
else {
struct gl_shader_program *shProg = (struct gl_shader_program *) data;
assert(shProg->Type == GL_SHADER_PROGRAM_MESA);
- ctx->Driver.DeleteShaderProgram(ctx, shProg);
+ _mesa_delete_shader_program(ctx, shProg);
}
}
len -= MIN2(bytes_before_alignment_boundary, len);
}
+ if (len >= 64)
+ _mm_mfence();
+
while (len >= 64) {
__m128i *dst_cacheline = (__m128i *)d;
__m128i *src_cacheline = (__m128i *)s;
{ "glUnmapBufferOES", 11, -1 },
{ "glVertexPointer", 11, _gloffset_VertexPointer },
{ "glViewport", 11, _gloffset_Viewport },
+
+ /* GL_KHR_debug */
+ { "glPushDebugGroupKHR", 11, -1 },
+ { "glPopDebugGroupKHR", 11, -1 },
+ { "glDebugMessageCallbackKHR", 11, -1 },
+ { "glDebugMessageControlKHR", 11, -1 },
+ { "glDebugMessageInsertKHR", 11, -1 },
+ { "glGetDebugMessageLogKHR", 11, -1 },
+ { "glGetObjectLabelKHR", 11, -1 },
+ { "glGetObjectPtrLabelKHR", 11, -1 },
+ { "glObjectLabelKHR", 11, -1 },
+ { "glObjectPtrLabelKHR", 11, -1 },
+
{ NULL, 0, -1 }
};
{ "glEndPerfQueryINTEL", 20, -1 },
{ "glGetPerfQueryDataINTEL", 20, -1 },
+ /* GL_KHR_debug */
+ { "glPushDebugGroupKHR", 20, -1 },
+ { "glPopDebugGroupKHR", 20, -1 },
+ { "glDebugMessageCallbackKHR", 20, -1 },
+ { "glDebugMessageControlKHR", 20, -1 },
+ { "glDebugMessageInsertKHR", 20, -1 },
+ { "glGetDebugMessageLogKHR", 20, -1 },
+ { "glGetObjectLabelKHR", 20, -1 },
+ { "glGetObjectPtrLabelKHR", 20, -1 },
+ { "glObjectLabelKHR", 20, -1 },
+ { "glObjectPtrLabelKHR", 20, -1 },
+
{ NULL, 0, -1 }
};
/* If the shader stage doesn't use the sampler uniform, skip this.
*/
- if (sh == NULL || !uni->sampler[i].active)
+ if (sh == NULL || !uni->opaque[i].active)
continue;
for (int j = 0; j < count; j++) {
- sh->SamplerUnits[uni->sampler[i].index + offset + j] =
+ sh->SamplerUnits[uni->opaque[i].index + offset + j] =
((unsigned *) values)[j];
}
*/
if (uni->type->is_image()) {
for (int i = 0; i < MESA_SHADER_STAGES; i++) {
- if (uni->image[i].active) {
+ if (uni->opaque[i].active) {
struct gl_shader *sh = shProg->_LinkedShaders[i];
for (int j = 0; j < count; j++)
- sh->ImageUnits[uni->image[i].index + offset + j] =
+ sh->ImageUnits[uni->opaque[i].index + offset + j] =
((GLint *) values)[j];
}
}
void
_mesa_init_varray(struct gl_context *ctx)
{
- ctx->Array.DefaultVAO = ctx->Driver.NewArrayObject(ctx, 0);
+ ctx->Array.DefaultVAO = _mesa_new_vao(ctx, 0);
_mesa_reference_vao(ctx, &ctx->Array.VAO, ctx->Array.DefaultVAO);
ctx->Array.ActiveTexture = 0; /* GL_ARB_multitexture */
struct gl_uniform_storage *storage =
&this->shader_program->UniformStorage[location];
- assert(storage->sampler[shader_type].active);
+ assert(storage->type->is_sampler() &&
+ storage->opaque[shader_type].active);
for (unsigned int j = 0; j < size / 4; j++)
params->ParameterValues[index + j][0].f =
- storage->sampler[shader_type].index + j;
+ storage->opaque[shader_type].index + j;
}
}
return 0;
}
- if (!shader_program->UniformStorage[location].sampler[shader].active) {
+ if (!shader_program->UniformStorage[location].opaque[shader].active) {
assert(0 && "cannot return a sampler");
linker_error(shader_program,
"cannot return a sampler named %s, because it is not "
return 0;
}
- return shader_program->UniformStorage[location].sampler[shader].index +
+ return shader_program->UniformStorage[location].opaque[shader].index +
getname.offset;
}
/* _NEW_MULTISAMPLE */
raster->multisample = ctx->Multisample._Enabled;
+ /* _NEW_MULTISAMPLE | _NEW_BUFFERS */
+ raster->force_persample_interp =
+ st->can_force_persample_interp &&
+ ctx->Multisample._Enabled &&
+ ctx->Multisample.SampleShading &&
+ ctx->Multisample.MinSampleShadingValue *
+ ctx->DrawBuffer->Visual.samples > 1;
+
/* _NEW_SCISSOR */
raster->scissor = ctx->Scissor.EnableFlags;
key.clamp_color = st->clamp_frag_color_in_shader &&
st->ctx->Color._ClampFragmentColor;
- /* Ignore sample qualifier while computing this flag. */
+ /* Don't set it if the driver can force the interpolation by itself.
+ * If SAMPLE_ID or SAMPLE_POS are used, the interpolation is set
+ * automatically.
+ * Ignore sample qualifier while computing this flag.
+ */
key.persample_shading =
+ !st->can_force_persample_interp &&
+ !(stfp->Base.Base.SystemValuesRead & (SYSTEM_BIT_SAMPLE_ID |
+ SYSTEM_BIT_SAMPLE_POS)) &&
_mesa_get_min_invocations_per_fragment(st->ctx, &stfp->Base, true) > 1;
st->fp_variant = st_get_fp_variant(st, stfp, &key);
functions->UnmapBuffer = st_bufferobj_unmap;
functions->CopyBufferSubData = st_copy_buffer_subdata;
functions->ClearBufferSubData = st_clear_buffer_subdata;
-
- /* For GL_APPLE_vertex_array_object */
- functions->NewArrayObject = _mesa_new_vao;
- functions->DeleteArrayObject = _mesa_delete_vao;
}
}
-/**
- * Called via ctx->Driver.NewFramebuffer()
- */
-static struct gl_framebuffer *
-st_new_framebuffer(struct gl_context *ctx, GLuint name)
-{
- /* XXX not sure we need to subclass gl_framebuffer for pipe */
- return _mesa_new_framebuffer(ctx, name);
-}
-
-
/**
* Called via ctx->Driver.NewRenderbuffer()
*/
void st_init_fbo_functions(struct dd_function_table *functions)
{
- functions->NewFramebuffer = st_new_framebuffer;
+ functions->NewFramebuffer = _mesa_new_framebuffer;
functions->NewRenderbuffer = st_new_renderbuffer;
functions->FramebufferRenderbuffer = _mesa_FramebufferRenderbuffer_sw;
functions->RenderTexture = st_render_texture;
PIPE_BIND_SAMPLER_VIEW);
st->prefer_blit_based_texture_transfer = screen->get_param(screen,
PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER);
+ st->can_force_persample_interp = screen->get_param(screen,
+ PIPE_CAP_FORCE_PERSAMPLE_INTERP);
st->needs_texcoord_semantic =
screen->get_param(screen, PIPE_CAP_TGSI_TEXCOORD);
_mesa_init_shader_object_functions(functions);
_mesa_init_sampler_object_functions(functions);
- functions->Accum = _mesa_accum;
-
st_init_blit_functions(functions);
st_init_bufferobject_functions(functions);
st_init_clear_functions(functions);
boolean has_etc1;
boolean has_etc2;
boolean prefer_blit_based_texture_transfer;
+ boolean can_force_persample_interp;
boolean needs_texcoord_semantic;
boolean apply_texture_swizzle_to_border_color;
else
interpLocation[slot] = TGSI_INTERPOLATE_LOC_CENTER;
- if (key->persample_shading)
+ if (stfp->Base.Base.SystemValuesRead & (SYSTEM_BIT_SAMPLE_ID |
+ SYSTEM_BIT_SAMPLE_POS) ||
+ key->persample_shading)
interpLocation[slot] = TGSI_INTERPOLATE_LOC_SAMPLE;
switch (attr) {
break;
}
+ case GL_TESS_CONTROL_PROGRAM_NV: {
+ struct st_tessctrl_program *p = (struct st_tessctrl_program *)prog;
+ struct st_tcp_variant_key key;
+
+ memset(&key, 0, sizeof(key));
+ key.st = st;
+ st_get_tcp_variant(st, p, &key);
+ break;
+ }
+
+ case GL_TESS_EVALUATION_PROGRAM_NV: {
+ struct st_tesseval_program *p = (struct st_tesseval_program *)prog;
+ struct st_tep_variant_key key;
+
+ memset(&key, 0, sizeof(key));
+ key.st = st;
+ st_get_tep_variant(st, p, &key);
+ break;
+ }
+
case GL_GEOMETRY_PROGRAM_NV: {
struct st_geometry_program *p = (struct st_geometry_program *)prog;
struct st_gp_variant_key key;
currentsz = dmasz;
}
} else {
- fprintf(stderr, "%s - cannot draw primitive\n", __func__);
- return;
+ unreachable("Cannot draw primitive; validate_render should have "
+ "prevented this");
}
}
}
FLUSH();
- } else if (ctx->Light.ShadeModel == GL_SMOOTH) {
+ } else if (ctx->Light.ShadeModel == GL_SMOOTH ||
+ ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION) {
TAG(render_tri_fan_verts)( ctx, start, count, flags );
} else {
- fprintf(stderr, "%s - cannot draw primitive\n", __func__);
- return;
+ unreachable("Cannot draw primitive; validate_render should have "
+ "prevented this");
}
}
{
GLuint j, nr;
- if (ctx->Light.ShadeModel == GL_FLAT &&
- TNL_CONTEXT(ctx)->vb.AttribPtr[_TNL_ATTRIB_COLOR0]->stride) {
- /* Vertices won't fit in a single buffer or elts not available - should
- * never happen.
- */
- fprintf(stderr, "%s - cannot draw primitive\n", __func__);
- return;
- } else {
+ if (ctx->Light.ShadeModel == GL_SMOOTH) {
LOCAL_VARS;
const unsigned dmasz = GET_SUBSEQUENT_VB_MAX_VERTS() & ~1;
unsigned currentsz;
}
FLUSH();
+ } else {
+ unreachable("Cannot draw primitive; validate_render should have "
+ "prevented this");
}
}
GLuint count,
GLuint flags)
{
- LOCAL_VARS;
- GLuint j;
+ if (ctx->Light.ShadeModel == GL_SMOOTH ||
+ ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION) {
+ LOCAL_VARS;
+ GLuint j;
- /* Emit whole number of quads in total. */
- count -= count & 3;
+ /* Emit whole number of quads in total. */
+ count -= count & 3;
- /* Hardware doesn't have a quad primitive type -- try to simulate it using
- * triangle primitive. This is a win for gears, but is it useful in the
- * broader world?
- */
- INIT(GL_TRIANGLES);
-
- for (j = 0; j + 3 < count; j += 4) {
- void *tmp = ALLOC_VERTS(6);
- /* Send v0, v1, v3
- */
- tmp = EMIT_VERTS(ctx, start + j, 2, tmp);
- tmp = EMIT_VERTS(ctx, start + j + 3, 1, tmp);
- /* Send v1, v2, v3
+ /* Hardware doesn't have a quad primitive type -- try to simulate it using
+ * triangle primitive. This is a win for gears, but is it useful in the
+ * broader world?
*/
- tmp = EMIT_VERTS(ctx, start + j + 1, 3, tmp);
- (void) tmp;
+ INIT(GL_TRIANGLES);
+
+ for (j = 0; j + 3 < count; j += 4) {
+ void *tmp = ALLOC_VERTS(6);
+ /* Send v0, v1, v3
+ */
+ tmp = EMIT_VERTS(ctx, start + j, 2, tmp);
+ tmp = EMIT_VERTS(ctx, start + j + 3, 1, tmp);
+ /* Send v1, v2, v3
+ */
+ tmp = EMIT_VERTS(ctx, start + j + 1, 3, tmp);
+ (void) tmp;
+ }
+ } else {
+ unreachable("Cannot draw primitive");
}
}
ok = true;
break;
case GL_POLYGON:
- ok = (HAVE_POLYGONS) || ctx->Light.ShadeModel == GL_SMOOTH;
+ ok = (HAVE_POLYGONS) || ctx->Light.ShadeModel == GL_SMOOTH ||
+ ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION;
break;
case GL_QUAD_STRIP:
- ok = VB->Elts ||
- (ctx->Light.ShadeModel != GL_FLAT ||
- VB->AttribPtr[_TNL_ATTRIB_COLOR0]->stride == 0);
+ ok = VB->Elts || ctx->Light.ShadeModel == GL_SMOOTH;
break;
case GL_QUADS:
- ok = true; /* flatshading is ok. */
+ ok = ctx->Light.ShadeModel == GL_SMOOTH ||
+ ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION;
break;
default:
break;
vbo_initialize_save_dispatch(const struct gl_context *ctx,
struct _glapi_table *exec);
+void vbo_exec_FlushVertices(struct gl_context *ctx, GLuint flags);
+void vbo_save_SaveFlushVertices(struct gl_context *ctx);
+GLboolean vbo_save_NotifyBegin(struct gl_context *ctx, GLenum mode);
+void vbo_save_NewList(struct gl_context *ctx, GLuint list, GLenum mode);
+void vbo_save_EndList(struct gl_context *ctx);
+void vbo_save_BeginCallList(struct gl_context *ctx, struct gl_display_list *list);
+void vbo_save_EndCallList(struct gl_context *ctx);
+
typedef void (*vbo_draw_func)( struct gl_context *ctx,
const struct _mesa_prim *prims,
ctx->Driver.NeedFlush = 0;
ctx->Driver.CurrentExecPrimitive = PRIM_OUTSIDE_BEGIN_END;
- ctx->Driver.BeginVertices = vbo_exec_BeginVertices;
- ctx->Driver.FlushVertices = vbo_exec_FlushVertices;
vbo_exec_invalidate_state( ctx, ~0 );
}
void vbo_exec_invalidate_state( struct gl_context *ctx, GLuint new_state );
void vbo_exec_BeginVertices( struct gl_context *ctx );
-void vbo_exec_FlushVertices( struct gl_context *ctx, GLuint flags );
/* Internal functions:
struct vbo_exec_context *exec = &vbo_context(ctx)->exec; \
int sz = (sizeof(C) / sizeof(GLfloat)); \
if (unlikely(!(ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT))) \
- ctx->Driver.BeginVertices( ctx ); \
+ vbo_exec_BeginVertices(ctx); \
\
if (unlikely(exec->vtx.active_sz[A] != N * sz) || \
unlikely(exec->vtx.attrtype[A] != T)) \
/**
- * Called via ctx->Driver.FlushVertices()
+ * If inside glBegin()/glEnd(), it should assert(0). Otherwise, if
+ * FLUSH_STORED_VERTICES bit in \p flags is set flushes any buffered
+ * vertices, if FLUSH_UPDATE_CURRENT bit is set updates
+ * __struct gl_contextRec::Current and gl_light_attrib::Material
+ *
+ * Note that the default T&L engine never clears the
+ * FLUSH_UPDATE_CURRENT bit, even after performing the update.
+ *
* \param flags bitmask of FLUSH_STORED_VERTICES, FLUSH_UPDATE_CURRENT
*/
void vbo_exec_FlushVertices( struct gl_context *ctx, GLuint flags )
/* Flush (draw), and make sure VBO is left unmapped when done */
vbo_exec_FlushVertices_internal(exec, GL_TRUE);
- /* Need to do this to ensure BeginVertices gets called again:
+ /* Need to do this to ensure vbo_exec_BeginVertices gets called again:
*/
ctx->Driver.NeedFlush &= ~(FLUSH_UPDATE_CURRENT | flags);
#include "vbo_context.h"
-static void vbo_save_callback_init( struct gl_context *ctx )
-{
- ctx->Driver.NewList = vbo_save_NewList;
- ctx->Driver.EndList = vbo_save_EndList;
- ctx->Driver.SaveFlushVertices = vbo_save_SaveFlushVertices;
- ctx->Driver.BeginCallList = vbo_save_BeginCallList;
- ctx->Driver.EndCallList = vbo_save_EndCallList;
- ctx->Driver.NotifySaveBegin = vbo_save_NotifyBegin;
-}
-
-
-
/**
* Called at context creation time.
*/
save->ctx = ctx;
vbo_save_api_init( save );
- vbo_save_callback_init(ctx);
{
struct gl_client_array *arrays = save->arrays;
/* Callbacks:
*/
-void vbo_save_EndList( struct gl_context *ctx );
-void vbo_save_NewList( struct gl_context *ctx, GLuint list, GLenum mode );
-void vbo_save_EndCallList( struct gl_context *ctx );
-void vbo_save_BeginCallList( struct gl_context *ctx, struct gl_display_list *list );
-void vbo_save_SaveFlushVertices( struct gl_context *ctx );
-GLboolean vbo_save_NotifyBegin( struct gl_context *ctx, GLenum mode );
-
void vbo_save_playback_vertex_list( struct gl_context *ctx, void *data );
void vbo_save_api_init( struct vbo_save_context *save );
/**
- * Called via ctx->Driver.NotifySaveBegin() when a glBegin is getting
- * compiled into a display list.
+ * Called when a glBegin is getting compiled into a display list.
* Updating of ctx->Driver.CurrentSavePrimitive is already taken care of.
*/
GLboolean
_mesa_install_save_vtxfmt(ctx, &save->vtxfmt);
}
- /* We need to call SaveFlushVertices() if there's state change */
+ /* We need to call vbo_save_SaveFlushVertices() if there's state change */
ctx->Driver.SaveNeedFlush = GL_TRUE;
/* GL_TRUE means we've handled this glBegin here; don't compile a BEGIN
vbo_destroy_vertex_list,
vbo_print_vertex_list);
- ctx->Driver.NotifySaveBegin = vbo_save_NotifyBegin;
-
_save_vtxfmt_init(ctx);
_save_current_init(ctx);
_mesa_noop_vtxfmt_init(&save->vtxfmt_noop);
MESA_UTIL_FILES := \
bitset.h \
+ debug.c \
+ debug.h \
format_srgb.h \
hash_table.c \
hash_table.h \
--- /dev/null
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <string.h>
+#include "main/macros.h"
+#include "debug.h"
+
+uint64_t
+parse_debug_string(const char *debug,
+ const struct debug_control *control)
+{
+ uint64_t flag = 0;
+
+ if (debug != NULL) {
+ for (; control->string != NULL; control++) {
+ if (!strcmp(debug, "all")) {
+ flag |= control->flag;
+
+ } else {
+ const char *s = debug;
+ unsigned n;
+
+ for (; n = strcspn(s, ", "), *s; s += MAX2(1, n)) {
+ if (strlen(control->string) == n &&
+ !strncmp(control->string, s, n))
+ flag |= control->flag;
+ }
+ }
+ }
+ }
+
+ return flag;
+}
--- /dev/null
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef _DEBUG_H
+#define _DEBUG_H
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct debug_control {
+ const char * string;
+ uint64_t flag;
+};
+
+uint64_t
+parse_debug_string(const char *debug,
+ const struct debug_control *control);
+
+#ifdef __cplusplus
+} /* extern C */
+#endif
+
+#endif /* _DEBUG_H */
CLEANFILES = $(BUILT_SOURCES)
libvulkan_la_LIBADD = $(WAYLAND_LIBS) -lxcb -lxcb-dri3 \
- $(top_builddir)/src/mesa/drivers/dri/i965/libi965_compiler.la
+ $(top_builddir)/src/mesa/drivers/dri/i965/libi965_compiler.la \
+ ../mesa/libmesa.la \
+ ../mesa/drivers/dri/common/libdri_test_stubs.la \
+ -lpthread -ldl
# Libvulkan with dummy gem. Used for unit tests.
#include <brw_vs.h>
#include <brw_gs.h>
#include <brw_cs.h>
+#include "brw_vec4_gs_visitor.h"
#include <mesa/main/shaderobj.h>
#include <mesa/main/fbobject.h>
(const gl_constant_value *)&null_data->client_data[i * sizeof(float)];
}
+/**
+ * Return a bitfield where bit n is set if barycentric interpolation mode n
+ * (see enum brw_wm_barycentric_interp_mode) is needed by the fragment shader.
+ */
+unsigned
+brw_compute_barycentric_interp_modes(const struct brw_device_info *devinfo,
+ bool shade_model_flat,
+ bool persample_shading,
+ nir_shader *shader)
+{
+ unsigned barycentric_interp_modes = 0;
+
+ nir_foreach_variable(var, &shader->inputs) {
+ enum glsl_interp_qualifier interp_qualifier =
+ (enum glsl_interp_qualifier) var->data.interpolation;
+ bool is_centroid = var->data.centroid && !persample_shading;
+ bool is_sample = var->data.sample || persample_shading;
+ bool is_gl_Color = (var->data.location == VARYING_SLOT_COL0) ||
+ (var->data.location == VARYING_SLOT_COL1);
+
+ /* Ignore WPOS and FACE, because they don't require interpolation. */
+ if (var->data.location == VARYING_SLOT_POS ||
+ var->data.location == VARYING_SLOT_FACE)
+ continue;
+
+ /* Determine the set (or sets) of barycentric coordinates needed to
+ * interpolate this variable. Note that when
+ * brw->needs_unlit_centroid_workaround is set, centroid interpolation
+ * uses PIXEL interpolation for unlit pixels and CENTROID interpolation
+ * for lit pixels, so we need both sets of barycentric coordinates.
+ */
+ if (interp_qualifier == INTERP_QUALIFIER_NOPERSPECTIVE) {
+ if (is_centroid) {
+ barycentric_interp_modes |=
+ 1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC;
+ } else if (is_sample) {
+ barycentric_interp_modes |=
+ 1 << BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC;
+ }
+ if ((!is_centroid && !is_sample) ||
+ devinfo->needs_unlit_centroid_workaround) {
+ barycentric_interp_modes |=
+ 1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC;
+ }
+ } else if (interp_qualifier == INTERP_QUALIFIER_SMOOTH ||
+ (!(shade_model_flat && is_gl_Color) &&
+ interp_qualifier == INTERP_QUALIFIER_NONE)) {
+ if (is_centroid) {
+ barycentric_interp_modes |=
+ 1 << BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC;
+ } else if (is_sample) {
+ barycentric_interp_modes |=
+ 1 << BRW_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC;
+ }
+ if ((!is_centroid && !is_sample) ||
+ devinfo->needs_unlit_centroid_workaround) {
+ barycentric_interp_modes |=
+ 1 << BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
+ }
+ }
+ }
+
+ return barycentric_interp_modes;
+}
+
static void
brw_vs_populate_key(struct brw_context *brw,
struct brw_vertex_program *vp,
key->point_coord_replace |= (1 << i);
}
}
-
- /* _NEW_TEXTURE */
- brw_populate_sampler_prog_key_data(ctx, prog, brw->vs.base.sampler_count,
- &key->tex);
}
static bool
/* Emit GEN4 code.
*/
program = brw_vs_emit(brw, mem_ctx, key, prog_data, &vp->program,
- prog, &program_size);
+ prog, -1, &program_size);
if (program == NULL) {
ralloc_free(mem_ctx);
return false;
struct brw_wm_prog_key *key)
{
struct gl_context *ctx = &brw->ctx;
- struct gl_program *prog = (struct gl_program *) brw->fragment_program;
GLuint lookup = 0;
GLuint line_aa;
bool program_uses_dfdy = fp->program.UsesDFdy;
/* _NEW_FRAG_CLAMP | _NEW_BUFFERS */
key->clamp_fragment_color = ctx->Color._ClampFragmentColor;
- /* _NEW_TEXTURE */
- brw_populate_sampler_prog_key_data(ctx, prog, brw->wm.base.sampler_count,
- &key->tex);
-
/* _NEW_BUFFERS */
/*
* Include the draw buffer origin and height so that we can calculate
prog_data->binding_table.render_target_start = 0;
program = brw_wm_fs_emit(brw, mem_ctx, key, prog_data,
- &fp->program, prog, &program_size);
+ &fp->program, prog, -1, -1, &program_size);
if (program == NULL) {
ralloc_free(mem_ctx);
return false;
return true;
}
-static void
-brw_gs_populate_key(struct brw_context *brw,
- struct anv_pipeline *pipeline,
+bool
+anv_codegen_gs_prog(struct brw_context *brw,
+ struct gl_shader_program *prog,
struct brw_geometry_program *gp,
- struct brw_gs_prog_key *key)
+ struct brw_gs_prog_key *key,
+ struct anv_pipeline *pipeline)
{
- struct gl_context *ctx = &brw->ctx;
- struct brw_stage_state *stage_state = &brw->gs.base;
- struct gl_program *prog = &gp->program.Base;
+ struct brw_gs_compile c;
- memset(key, 0, sizeof(*key));
+ memset(&c, 0, sizeof(c));
+ c.key = *key;
+ c.gp = gp;
- key->program_string_id = gp->id;
+ c.prog_data.include_primitive_id =
+ (gp->program.Base.InputsRead & VARYING_BIT_PRIMITIVE_ID) != 0;
- /* _NEW_TEXTURE */
- brw_populate_sampler_prog_key_data(ctx, prog, stage_state->sampler_count,
- &key->tex);
-}
+ c.prog_data.invocations = gp->program.Invocations;
-static bool
-really_do_gs_prog(struct brw_context *brw,
- struct gl_shader_program *prog,
- struct brw_geometry_program *gp,
- struct brw_gs_prog_key *key, struct anv_pipeline *pipeline)
-{
- struct brw_gs_compile_output output;
-
- /* FIXME: We pass the bind map to the compile in the output struct. Need
- * something better. */
- set_binding_table_layout(&output.prog_data.base.base,
+ set_binding_table_layout(&c.prog_data.base.base,
pipeline, VK_SHADER_STAGE_GEOMETRY);
- brw_compile_gs_prog(brw, prog, gp, key, &output);
+ /* Allocate the references to the uniforms that will end up in the
+ * prog_data associated with the compiled program, and which will be freed
+ * by the state cache.
+ *
+ * Note: param_count needs to be num_uniform_components * 4, since we add
+ * padding around uniform values below vec4 size, so the worst case is that
+ * every uniform is a float which gets padded to the size of a vec4.
+ */
+ struct gl_shader *gs = prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
+ int param_count = gp->program.Base.nir->num_uniforms * 4;
+
+ c.prog_data.base.base.param =
+ rzalloc_array(NULL, const gl_constant_value *, param_count);
+ c.prog_data.base.base.pull_param =
+ rzalloc_array(NULL, const gl_constant_value *, param_count);
+ c.prog_data.base.base.image_param =
+ rzalloc_array(NULL, struct brw_image_param, gs->NumImages);
+ c.prog_data.base.base.nr_params = param_count;
+ c.prog_data.base.base.nr_image_params = gs->NumImages;
+
+ brw_nir_setup_glsl_uniforms(gp->program.Base.nir, prog, &gp->program.Base,
+ &c.prog_data.base.base, false);
+
+ if (brw->gen >= 8) {
+ c.prog_data.static_vertex_count = !gp->program.Base.nir ? -1 :
+ nir_gs_count_vertices(gp->program.Base.nir);
+ }
+
+ if (brw->gen >= 7) {
+ if (gp->program.OutputType == GL_POINTS) {
+ /* When the output type is points, the geometry shader may output data
+ * to multiple streams, and EndPrimitive() has no effect. So we
+ * configure the hardware to interpret the control data as stream ID.
+ */
+ c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID;
+
+ /* We only have to emit control bits if we are using streams */
+ if (prog->Geom.UsesStreams)
+ c.control_data_bits_per_vertex = 2;
+ else
+ c.control_data_bits_per_vertex = 0;
+ } else {
+ /* When the output type is triangle_strip or line_strip, EndPrimitive()
+ * may be used to terminate the current strip and start a new one
+ * (similar to primitive restart), and outputting data to multiple
+ * streams is not supported. So we configure the hardware to interpret
+ * the control data as EndPrimitive information (a.k.a. "cut bits").
+ */
+ c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT;
+
+ /* We only need to output control data if the shader actually calls
+ * EndPrimitive().
+ */
+ c.control_data_bits_per_vertex = gp->program.UsesEndPrimitive ? 1 : 0;
+ }
+ } else {
+ /* There are no control data bits in gen6. */
+ c.control_data_bits_per_vertex = 0;
+
+ /* If it is using transform feedback, enable it */
+ if (prog->TransformFeedback.NumVarying)
+ c.prog_data.gen6_xfb_enabled = true;
+ else
+ c.prog_data.gen6_xfb_enabled = false;
+ }
+ c.control_data_header_size_bits =
+ gp->program.VerticesOut * c.control_data_bits_per_vertex;
+
+ /* 1 HWORD = 32 bytes = 256 bits */
+ c.prog_data.control_data_header_size_hwords =
+ ALIGN(c.control_data_header_size_bits, 256) / 256;
+
+ GLbitfield64 outputs_written = gp->program.Base.OutputsWritten;
+
+ brw_compute_vue_map(brw->intelScreen->devinfo,
+ &c.prog_data.base.vue_map, outputs_written,
+ prog ? prog->SeparateShader : false);
+
+ /* Compute the output vertex size.
+ *
+ * From the Ivy Bridge PRM, Vol2 Part1 7.2.1.1 STATE_GS - Output Vertex
+ * Size (p168):
+ *
+ * [0,62] indicating [1,63] 16B units
+ *
+ * Specifies the size of each vertex stored in the GS output entry
+ * (following any Control Header data) as a number of 128-bit units
+ * (minus one).
+ *
+ * Programming Restrictions: The vertex size must be programmed as a
+ * multiple of 32B units with the following exception: Rendering is
+ * disabled (as per SOL stage state) and the vertex size output by the
+ * GS thread is 16B.
+ *
+ * If rendering is enabled (as per SOL state) the vertex size must be
+ * programmed as a multiple of 32B units. In other words, the only time
+ * software can program a vertex size with an odd number of 16B units
+ * is when rendering is disabled.
+ *
+ * Note: B=bytes in the above text.
+ *
+ * It doesn't seem worth the extra trouble to optimize the case where the
+ * vertex size is 16B (especially since this would require special-casing
+ * the GEN assembly that writes to the URB). So we just set the vertex
+ * size to a multiple of 32B (2 vec4's) in all cases.
+ *
+ * The maximum output vertex size is 62*16 = 992 bytes (31 hwords). We
+ * budget that as follows:
+ *
+ * 512 bytes for varyings (a varying component is 4 bytes and
+ * gl_MaxGeometryOutputComponents = 128)
+ * 16 bytes overhead for VARYING_SLOT_PSIZ (each varying slot is 16
+ * bytes)
+ * 16 bytes overhead for gl_Position (we allocate it a slot in the VUE
+ * even if it's not used)
+ * 32 bytes overhead for gl_ClipDistance (we allocate it 2 VUE slots
+ * whenever clip planes are enabled, even if the shader doesn't
+ * write to gl_ClipDistance)
+ * 16 bytes overhead since the VUE size must be a multiple of 32 bytes
+ * (see below)--this causes up to 1 VUE slot to be wasted
+ * 400 bytes available for varying packing overhead
+ *
+ * Worst-case varying packing overhead is 3/4 of a varying slot (12 bytes)
+ * per interpolation type, so this is plenty.
+ *
+ */
+ unsigned output_vertex_size_bytes = c.prog_data.base.vue_map.num_slots * 16;
+ assert(brw->gen == 6 ||
+ output_vertex_size_bytes <= GEN7_MAX_GS_OUTPUT_VERTEX_SIZE_BYTES);
+ c.prog_data.output_vertex_size_hwords =
+ ALIGN(output_vertex_size_bytes, 32) / 32;
+
+ /* Compute URB entry size. The maximum allowed URB entry size is 32k.
+ * That divides up as follows:
+ *
+ * 64 bytes for the control data header (cut indices or StreamID bits)
+ * 4096 bytes for varyings (a varying component is 4 bytes and
+ * gl_MaxGeometryTotalOutputComponents = 1024)
+ * 4096 bytes overhead for VARYING_SLOT_PSIZ (each varying slot is 16
+ * bytes/vertex and gl_MaxGeometryOutputVertices is 256)
+ * 4096 bytes overhead for gl_Position (we allocate it a slot in the VUE
+ * even if it's not used)
+ * 8192 bytes overhead for gl_ClipDistance (we allocate it 2 VUE slots
+ * whenever clip planes are enabled, even if the shader doesn't
+ * write to gl_ClipDistance)
+ * 4096 bytes overhead since the VUE size must be a multiple of 32
+ * bytes (see above)--this causes up to 1 VUE slot to be wasted
+ * 8128 bytes available for varying packing overhead
+ *
+ * Worst-case varying packing overhead is 3/4 of a varying slot per
+ * interpolation type, which works out to 3072 bytes, so this would allow
+ * us to accommodate 2 interpolation types without any danger of running
+ * out of URB space.
+ *
+ * In practice, the risk of running out of URB space is very small, since
+ * the above figures are all worst-case, and most of them scale with the
+ * number of output vertices. So we'll just calculate the amount of space
+ * we need, and if it's too large, fail to compile.
+ *
+ * The above is for gen7+ where we have a single URB entry that will hold
+ * all the output. In gen6, we will have to allocate URB entries for every
+ * vertex we emit, so our URB entries only need to be large enough to hold
+ * a single vertex. Also, gen6 does not have a control data header.
+ */
+ unsigned output_size_bytes;
+ if (brw->gen >= 7) {
+ output_size_bytes =
+ c.prog_data.output_vertex_size_hwords * 32 * gp->program.VerticesOut;
+ output_size_bytes += 32 * c.prog_data.control_data_header_size_hwords;
+ } else {
+ output_size_bytes = c.prog_data.output_vertex_size_hwords * 32;
+ }
+
+ /* Broadwell stores "Vertex Count" as a full 8 DWord (32 byte) URB output,
+ * which comes before the control header.
+ */
+ if (brw->gen >= 8)
+ output_size_bytes += 32;
+
+ assert(output_size_bytes >= 1);
+ int max_output_size_bytes = GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES;
+ if (brw->gen == 6)
+ max_output_size_bytes = GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES;
+ if (output_size_bytes > max_output_size_bytes)
+ return false;
- pipeline->gs_vec4 = upload_kernel(pipeline, output.program, output.program_size);
+
+ /* URB entry sizes are stored as a multiple of 64 bytes in gen7+ and
+ * a multiple of 128 bytes in gen6.
+ */
+ if (brw->gen >= 7)
+ c.prog_data.base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
+ else
+ c.prog_data.base.urb_entry_size = ALIGN(output_size_bytes, 128) / 128;
+
+ /* FIXME: Need to pull this from nir shader. */
+ c.prog_data.output_topology = _3DPRIM_TRISTRIP;
+
+ /* The GLSL linker will have already matched up GS inputs and the outputs
+ * of prior stages. The driver does extend VS outputs in some cases, but
+ * only for legacy OpenGL or Gen4-5 hardware, neither of which offer
+ * geometry shader support. So we can safely ignore that.
+ *
+ * For SSO pipelines, we use a fixed VUE map layout based on variable
+ * locations, so we can rely on rendezvous-by-location making this work.
+ *
+ * However, we need to ignore VARYING_SLOT_PRIMITIVE_ID, as it's not
+ * written by previous stages and shows up via payload magic.
+ */
+ GLbitfield64 inputs_read =
+ gp->program.Base.InputsRead & ~VARYING_BIT_PRIMITIVE_ID;
+ brw_compute_vue_map(brw->intelScreen->devinfo,
+ &c.input_vue_map, inputs_read,
+ prog->SeparateShader);
+
+ /* GS inputs are read from the VUE 256 bits (2 vec4's) at a time, so we
+ * need to program a URB read length of ceiling(num_slots / 2).
+ */
+ c.prog_data.base.urb_read_length = (c.input_vue_map.num_slots + 1) / 2;
+
+ void *mem_ctx = ralloc_context(NULL);
+ unsigned program_size;
+ const unsigned *program =
+ brw_gs_emit(brw, prog, &c, mem_ctx, -1, &program_size);
+ if (program == NULL) {
+ ralloc_free(mem_ctx);
+ return false;
+ }
+
+ pipeline->gs_vec4 = upload_kernel(pipeline, program, program_size);
pipeline->gs_vertex_count = gp->program.VerticesIn;
- ralloc_free(output.mem_ctx);
+ ralloc_free(mem_ctx);
return true;
}
anv_nir_apply_dynamic_offsets(pipeline, cs->Program->nir, &prog_data->base);
program = brw_cs_emit(brw, mem_ctx, key, prog_data,
- &cp->program, prog, &program_size);
+ &cp->program, prog, -1, &program_size);
if (program == NULL) {
ralloc_free(mem_ctx);
return false;
compiler->brw->intelScreen = compiler->screen;
compiler->screen->devinfo = &device->info;
- brw_process_intel_debug_variable(compiler->screen);
+ brw_process_intel_debug_variable();
compiler->screen->compiler = brw_compiler_create(compiler, &device->info);
brw->use_rep_send = pipeline->use_repclear;
brw->no_simd8 = pipeline->use_repclear;
- program = brw->ctx.Driver.NewShaderProgram(name);
+ program = _mesa_new_shader_program(name);
program->Shaders = (struct gl_shader **)
calloc(VK_SHADER_STAGE_NUM, sizeof(struct gl_shader *));
fail_if(program == NULL || program->Shaders == NULL,
program->_LinkedShaders[MESA_SHADER_GEOMETRY]->Program;
struct brw_geometry_program *bgp = brw_geometry_program(gp);
- brw_gs_populate_key(brw, pipeline, bgp, &gs_key);
-
- success = really_do_gs_prog(brw, program, bgp, &gs_key, pipeline);
+ success = anv_codegen_gs_prog(brw, program, bgp, &gs_key, pipeline);
fail_if(!success, "do_gs_prog failed\n");
add_compiled_stage(pipeline, VK_SHADER_STAGE_GEOMETRY,
&pipeline->gs_prog_data.base.base);
&pipeline->cs_prog_data.base);
}
- brw->ctx.Driver.DeleteShaderProgram(&brw->ctx, program);
+ _mesa_delete_shader_program(&brw->ctx, program);
struct anv_device *device = compiler->device;
while (device->scratch_block_pool.bo.size < pipeline->total_scratch)