Merge ../mesa into vulkan
authorKristian Høgsberg Kristensen <kristian.h.kristensen@intel.com>
Thu, 8 Oct 2015 19:25:34 +0000 (12:25 -0700)
committerKristian Høgsberg Kristensen <kristian.h.kristensen@intel.com>
Fri, 9 Oct 2015 00:20:24 +0000 (17:20 -0700)
231 files changed:
docs/GL3.txt
docs/relnotes/11.1.0.html
include/c11/threads_posix.h
src/egl/drivers/dri2/egl_dri2.c
src/egl/drivers/dri2/egl_dri2.h
src/egl/drivers/dri2/platform_drm.c
src/egl/drivers/dri2/platform_wayland.c
src/egl/drivers/dri2/platform_x11.c
src/gallium/auxiliary/tgsi/tgsi_scan.c
src/gallium/auxiliary/tgsi/tgsi_scan.h
src/gallium/docs/source/screen.rst
src/gallium/drivers/ddebug/Makefile.sources
src/gallium/drivers/ddebug/dd_draw.c
src/gallium/drivers/ddebug/dd_pipe.h
src/gallium/drivers/ddebug/dd_util.h [new file with mode: 0644]
src/gallium/drivers/freedreno/freedreno_screen.c
src/gallium/drivers/i915/i915_screen.c
src/gallium/drivers/ilo/ilo_screen.c
src/gallium/drivers/llvmpipe/lp_screen.c
src/gallium/drivers/nouveau/nouveau_winsys.h
src/gallium/drivers/nouveau/nv30/nv30_format.c
src/gallium/drivers/nouveau/nv30/nv30_screen.c
src/gallium/drivers/nouveau/nv30/nv30_vbo.c
src/gallium/drivers/nouveau/nv50/nv50_screen.c
src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
src/gallium/drivers/r300/r300_cs.h
src/gallium/drivers/r300/r300_emit.c
src/gallium/drivers/r300/r300_screen.c
src/gallium/drivers/r600/evergreen_compute.c
src/gallium/drivers/r600/evergreen_hw_context.c
src/gallium/drivers/r600/evergreen_state.c
src/gallium/drivers/r600/r600_blit.c
src/gallium/drivers/r600/r600_hw_context.c
src/gallium/drivers/r600/r600_pipe.c
src/gallium/drivers/r600/r600_pipe.h
src/gallium/drivers/r600/r600_state.c
src/gallium/drivers/r600/r600_state_common.c
src/gallium/drivers/radeon/r600_cs.h
src/gallium/drivers/radeon/r600_pipe_common.c
src/gallium/drivers/radeon/r600_pipe_common.h
src/gallium/drivers/radeon/r600_query.c
src/gallium/drivers/radeon/r600_streamout.c
src/gallium/drivers/radeon/radeon_uvd.c
src/gallium/drivers/radeon/radeon_vce.c
src/gallium/drivers/radeon/radeon_winsys.h
src/gallium/drivers/radeonsi/cik_sdma.c
src/gallium/drivers/radeonsi/si_blit.c
src/gallium/drivers/radeonsi/si_compute.c
src/gallium/drivers/radeonsi/si_cp_dma.c
src/gallium/drivers/radeonsi/si_debug.c
src/gallium/drivers/radeonsi/si_descriptors.c
src/gallium/drivers/radeonsi/si_dma.c
src/gallium/drivers/radeonsi/si_hw_context.c
src/gallium/drivers/radeonsi/si_pipe.c
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_pm4.c
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_shader.h
src/gallium/drivers/radeonsi/si_state.c
src/gallium/drivers/radeonsi/si_state.h
src/gallium/drivers/radeonsi/si_state_draw.c
src/gallium/drivers/radeonsi/si_state_shaders.c
src/gallium/drivers/softpipe/sp_screen.c
src/gallium/drivers/svga/svga_resource_buffer.c
src/gallium/drivers/svga/svga_sampler_view.c
src/gallium/drivers/svga/svga_screen.c
src/gallium/drivers/svga/svga_state_constants.c
src/gallium/drivers/vc4/vc4_context.c
src/gallium/drivers/vc4/vc4_context.h
src/gallium/drivers/vc4/vc4_program.c
src/gallium/drivers/vc4/vc4_screen.c
src/gallium/drivers/vc4/vc4_simulator.c
src/gallium/include/pipe/p_defines.h
src/gallium/include/pipe/p_state.h
src/gallium/state_trackers/dri/dri2.c
src/gallium/state_trackers/dri/dri_drawable.c
src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
src/gallium/winsys/radeon/drm/radeon_drm_cs.c
src/gallium/winsys/radeon/drm/radeon_drm_cs.h
src/gallium/winsys/radeon/drm/radeon_drm_cs_dump.c
src/glsl/Makefile.sources
src/glsl/ast_to_hir.cpp
src/glsl/glsl_parser.yy
src/glsl/glsl_parser_extras.cpp
src/glsl/glsl_types.cpp
src/glsl/ir_optimization.h
src/glsl/ir_uniform.h
src/glsl/link_uniform_initializers.cpp
src/glsl/link_uniforms.cpp
src/glsl/linker.cpp
src/glsl/nir/glsl_to_nir.cpp
src/glsl/nir/nir.h
src/glsl/nir/nir_intrinsics.h
src/glsl/nir/nir_lower_io.c
src/glsl/nir/nir_lower_samplers.c
src/glsl/nir/nir_print.c
src/glsl/nir/nir_sweep.c
src/glsl/opt_cse.cpp [deleted file]
src/glsl/standalone_scaffolding.cpp
src/glsl/standalone_scaffolding.h
src/glsl/tests/set_uniform_initializer_tests.cpp
src/glx/packrender.h
src/glx/packsingle.h
src/mapi/glapi/gen/KHR_debug.xml
src/mesa/drivers/common/driverfuncs.c
src/mesa/drivers/common/meta_generate_mipmap.c
src/mesa/drivers/dri/common/dri_util.c
src/mesa/drivers/dri/common/utils.c
src/mesa/drivers/dri/common/utils.h
src/mesa/drivers/dri/i915/i830_state.c
src/mesa/drivers/dri/i915/i915_vtbl.c
src/mesa/drivers/dri/i915/intel_buffers.c
src/mesa/drivers/dri/i915/intel_buffers.h
src/mesa/drivers/dri/i915/intel_context.c
src/mesa/drivers/dri/i915/intel_context.h
src/mesa/drivers/dri/i915/intel_fbo.c
src/mesa/drivers/dri/i915/intel_pixel.c
src/mesa/drivers/dri/i915/intel_render.c
src/mesa/drivers/dri/i915/intel_state.c
src/mesa/drivers/dri/i915/intel_tris.c
src/mesa/drivers/dri/i965/Makefile.am
src/mesa/drivers/dri/i965/Makefile.sources
src/mesa/drivers/dri/i965/brw_clip.c
src/mesa/drivers/dri/i965/brw_clip.h
src/mesa/drivers/dri/i965/brw_clip_unfilled.c
src/mesa/drivers/dri/i965/brw_context.c
src/mesa/drivers/dri/i965/brw_context.h
src/mesa/drivers/dri/i965/brw_cs.c
src/mesa/drivers/dri/i965/brw_cs.h
src/mesa/drivers/dri/i965/brw_draw.c
src/mesa/drivers/dri/i965/brw_fs.cpp
src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp [deleted file]
src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp
src/mesa/drivers/dri/i965/brw_gs.c
src/mesa/drivers/dri/i965/brw_gs.h
src/mesa/drivers/dri/i965/brw_inst.h
src/mesa/drivers/dri/i965/brw_link.cpp [new file with mode: 0644]
src/mesa/drivers/dri/i965/brw_misc_state.c
src/mesa/drivers/dri/i965/brw_nir.c
src/mesa/drivers/dri/i965/brw_predicated_break.cpp [new file with mode: 0644]
src/mesa/drivers/dri/i965/brw_program.c
src/mesa/drivers/dri/i965/brw_shader.cpp
src/mesa/drivers/dri/i965/brw_shader.h
src/mesa/drivers/dri/i965/brw_vec4.cpp
src/mesa/drivers/dri/i965/brw_vec4.h
src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp
src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h
src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
src/mesa/drivers/dri/i965/brw_vs.c
src/mesa/drivers/dri/i965/brw_vs.h
src/mesa/drivers/dri/i965/brw_wm.c
src/mesa/drivers/dri/i965/brw_wm.h
src/mesa/drivers/dri/i965/brw_wm_state.c
src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
src/mesa/drivers/dri/i965/gen6_gs_visitor.h
src/mesa/drivers/dri/i965/gen7_cs_state.c
src/mesa/drivers/dri/i965/intel_blit.c
src/mesa/drivers/dri/i965/intel_buffers.c
src/mesa/drivers/dri/i965/intel_buffers.h
src/mesa/drivers/dri/i965/intel_debug.c
src/mesa/drivers/dri/i965/intel_debug.h
src/mesa/drivers/dri/i965/intel_extensions.c
src/mesa/drivers/dri/i965/intel_fbo.c
src/mesa/drivers/dri/i965/intel_mipmap_tree.c
src/mesa/drivers/dri/i965/intel_mipmap_tree.h
src/mesa/drivers/dri/i965/intel_pixel.c
src/mesa/drivers/dri/i965/intel_screen.c
src/mesa/drivers/dri/i965/intel_state.c
src/mesa/drivers/dri/r200/r200_context.h
src/mesa/drivers/dri/r200/r200_maos_arrays.c
src/mesa/drivers/dri/r200/r200_state.c
src/mesa/drivers/dri/r200/r200_state_init.c
src/mesa/drivers/dri/r200/r200_swtcl.c
src/mesa/drivers/dri/r200/r200_tcl.c
src/mesa/drivers/dri/r200/r200_tex.c
src/mesa/drivers/dri/radeon/radeon_context.c
src/mesa/drivers/dri/radeon/radeon_context.h
src/mesa/drivers/dri/radeon/radeon_debug.c
src/mesa/drivers/dri/radeon/radeon_fbo.c
src/mesa/drivers/dri/radeon/radeon_state.c
src/mesa/drivers/dri/radeon/radeon_swtcl.c
src/mesa/drivers/dri/radeon/radeon_tex.c
src/mesa/drivers/dri/radeon/radeon_texstate.c
src/mesa/main/arrayobj.c
src/mesa/main/blend.c
src/mesa/main/context.h
src/mesa/main/dd.h
src/mesa/main/dlist.c
src/mesa/main/ff_fragment_shader.cpp
src/mesa/main/framebuffer.c
src/mesa/main/framebuffer.h
src/mesa/main/get_hash_params.py
src/mesa/main/glformats.c
src/mesa/main/hint.c
src/mesa/main/pack.c
src/mesa/main/samplerobj.c
src/mesa/main/shader_query.cpp
src/mesa/main/shaderapi.c
src/mesa/main/shaderimage.c
src/mesa/main/shaderobj.c
src/mesa/main/shaderobj.h
src/mesa/main/shared.c
src/mesa/main/streaming-load-memcpy.c
src/mesa/main/tests/dispatch_sanity.cpp
src/mesa/main/uniform_query.cpp
src/mesa/main/varray.c
src/mesa/program/ir_to_mesa.cpp
src/mesa/program/sampler.cpp
src/mesa/state_tracker/st_atom_rasterizer.c
src/mesa/state_tracker/st_atom_shader.c
src/mesa/state_tracker/st_cb_bufferobjects.c
src/mesa/state_tracker/st_cb_fbo.c
src/mesa/state_tracker/st_context.c
src/mesa/state_tracker/st_context.h
src/mesa/state_tracker/st_program.c
src/mesa/tnl_dd/t_dd_dmatmp.h
src/mesa/vbo/vbo.h
src/mesa/vbo/vbo_exec.c
src/mesa/vbo/vbo_exec.h
src/mesa/vbo/vbo_exec_api.c
src/mesa/vbo/vbo_save.c
src/mesa/vbo/vbo_save.h
src/mesa/vbo/vbo_save_api.c
src/util/Makefile.sources
src/util/debug.c [new file with mode: 0644]
src/util/debug.h [new file with mode: 0644]
src/vulkan/Makefile.am
src/vulkan/anv_compiler.cpp

index e020deb3b6c4d404bca5dd791e69222a6b4b5a22..e17e783d3310fdb75473823ce393af1a6448343f 100644 (file)
@@ -178,7 +178,13 @@ GL 4.4, GLSL 4.40:
   GL_MAX_VERTEX_ATTRIB_STRIDE                          DONE (all drivers)
   GL_ARB_buffer_storage                                DONE (i965, nv50, nvc0, r600, radeonsi)
   GL_ARB_clear_texture                                 DONE (i965) (gallium - in progress, VMware)
-  GL_ARB_enhanced_layouts                              not started
+  GL_ARB_enhanced_layouts                              in progress (Timothy)
+  - compile-time constant expressions                  in progress
+  - explicit byte offsets for blocks                   in progress
+  - forced alignment within blocks                     in progress
+  - specified vec4-slot component numbers              in progress
+  - specified transform/feedback layout                in progress
+  - input/output block locations                       in progress
   GL_ARB_multi_bind                                    DONE (all drivers)
   GL_ARB_query_buffer_object                           not started
   GL_ARB_texture_mirror_clamp_to_edge                  DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
index c755c98f6eb9512c9c726aebded44f3927f3f696..d4f30d0da62de3149e55b48b711f62abb12a4d65 100644 (file)
@@ -45,11 +45,13 @@ Note: some of the new features are only available with certain drivers.
 
 <ul>
 <li>GL_ARB_blend_func_extended on freedreno (a3xx)</li>
+<li>GL_ARB_gpu_shader_fp64 on r600 for Cypress/Cayman/Aruba chips</li>
 <li>GL_ARB_shader_storage_buffer_object on i965</li>
 <li>GL_ARB_shader_texture_image_samples on i965, nv50, nvc0, r600, radeonsi</li>
 <li>GL_ARB_texture_barrier / GL_NV_texture_barrier on i965</li>
 <li>GL_ARB_texture_query_lod on softpipe</li>
-<li>GL_ARB_gpu_shader_fp64 on r600 for Cypress/Cayman/Aruba chips</li>
+<li>EGL_KHR_create_context on softpipe, llvmpipe</li>
+<li>EGL_KHR_gl_colorspace on softpipe, llvmpipe</li>
 </ul>
 
 <h2>Bug fixes</h2>
index 3def6c41ca57c94189332eb4773cbb6141577d1c..ce9853b18b3b1f969e6e2aafcaafb3401d70ac29 100644 (file)
@@ -136,8 +136,14 @@ cnd_timedwait(cnd_t *cond, mtx_t *mtx, const xtime *xt)
 {
     struct timespec abs_time;
     int rt;
+
     assert(mtx != NULL);
     assert(cond != NULL);
+    assert(xt != NULL);
+
+    abs_time.tv_sec = xt->sec;
+    abs_time.tv_nsec = xt->nsec;
+
     rt = pthread_cond_timedwait(cond, mtx, &abs_time);
     if (rt == ETIMEDOUT)
         return thrd_busy;
index 1740ee3dc4782f8dee6bcc4abb3d890b18afded0..4cc5f2313333b0c416755af8392c99ff16420283 100644 (file)
@@ -131,12 +131,10 @@ const __DRIconfig *
 dri2_get_dri_config(struct dri2_egl_config *conf, EGLint surface_type,
                     EGLenum colorspace)
 {
-   if (colorspace == EGL_GL_COLORSPACE_SRGB_KHR)
-      return surface_type == EGL_WINDOW_BIT ? conf->dri_srgb_double_config :
-                                              conf->dri_srgb_single_config;
-   else
-      return surface_type == EGL_WINDOW_BIT ? conf->dri_double_config :
-                                              conf->dri_single_config;
+   const bool srgb = colorspace == EGL_GL_COLORSPACE_SRGB_KHR;
+
+   return surface_type == EGL_WINDOW_BIT ? conf->dri_double_config[srgb] :
+                                           conf->dri_single_config[srgb];
 }
 
 static EGLBoolean
@@ -284,14 +282,10 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id,
    if (num_configs == 1) {
       conf = (struct dri2_egl_config *) matching_config;
 
-      if (double_buffer && srgb && !conf->dri_srgb_double_config)
-         conf->dri_srgb_double_config = dri_config;
-      else if (double_buffer && !srgb && !conf->dri_double_config)
-         conf->dri_double_config = dri_config;
-      else if (!double_buffer && srgb && !conf->dri_srgb_single_config)
-         conf->dri_srgb_single_config = dri_config;
-      else if (!double_buffer && !srgb && !conf->dri_single_config)
-         conf->dri_single_config = dri_config;
+      if (double_buffer && !conf->dri_double_config[srgb])
+         conf->dri_double_config[srgb] = dri_config;
+      else if (!double_buffer && !conf->dri_single_config[srgb])
+         conf->dri_single_config[srgb] = dri_config;
       else
          /* a similar config type is already added (unlikely) => discard */
          return NULL;
@@ -301,18 +295,13 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id,
       if (conf == NULL)
          return NULL;
 
+      if (double_buffer)
+         conf->dri_double_config[srgb] = dri_config;
+      else
+         conf->dri_single_config[srgb] = dri_config;
+
       memcpy(&conf->base, &base, sizeof base);
-      if (double_buffer) {
-         if (srgb)
-            conf->dri_srgb_double_config = dri_config;
-         else
-            conf->dri_double_config = dri_config;
-      } else {
-         if (srgb)
-            conf->dri_srgb_single_config = dri_config;
-         else
-            conf->dri_single_config = dri_config;
-      }
+      conf->base.SurfaceType = 0;
       conf->base.ConfigID = config_id;
 
       _eglLinkConfig(&conf->base);
@@ -1021,10 +1010,10 @@ dri2_create_context(_EGLDriver *drv, _EGLDisplay *disp, _EGLConfig *conf,
        * doubleBufferMode check in
        * src/mesa/main/context.c:check_compatible()
        */
-      if (dri2_config->dri_double_config)
-         dri_config = dri2_config->dri_double_config;
+      if (dri2_config->dri_double_config[0])
+         dri_config = dri2_config->dri_double_config[0];
       else
-         dri_config = dri2_config->dri_single_config;
+         dri_config = dri2_config->dri_single_config[0];
 
       /* EGL_WINDOW_BIT is set only when there is a dri_double_config.  This
        * makes sure the back buffer will always be used.
@@ -2424,13 +2413,18 @@ dri2_client_wait_sync(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync,
    unsigned wait_flags = 0;
    EGLint ret = EGL_CONDITION_SATISFIED_KHR;
 
-   if (flags & EGL_SYNC_FLUSH_COMMANDS_BIT_KHR)
+   /* The EGL_KHR_fence_sync spec states:
+    *
+    *    "If no context is current for the bound API,
+    *     the EGL_SYNC_FLUSH_COMMANDS_BIT_KHR bit is ignored.
+    */
+   if (dri2_ctx && flags & EGL_SYNC_FLUSH_COMMANDS_BIT_KHR)
       wait_flags |= __DRI2_FENCE_FLAG_FLUSH_COMMANDS;
 
    /* the sync object should take a reference while waiting */
    dri2_egl_ref_sync(dri2_sync);
 
-   if (dri2_dpy->fence->client_wait_sync(dri2_ctx->dri_context,
+   if (dri2_dpy->fence->client_wait_sync(dri2_ctx ? dri2_ctx->dri_context : NULL,
                                          dri2_sync->fence, wait_flags,
                                          timeout))
       dri2_sync->base.SyncStatus = EGL_SIGNALED_KHR;
index 9aa2a8c10032cb564bed957ad50e234b1f8d7764..0e837b3eb8bf03043626050b60848a3493eefbf6 100644 (file)
@@ -284,10 +284,8 @@ struct dri2_egl_surface
 struct dri2_egl_config
 {
    _EGLConfig         base;
-   const __DRIconfig *dri_single_config;
-   const __DRIconfig *dri_double_config;
-   const __DRIconfig *dri_srgb_single_config;
-   const __DRIconfig *dri_srgb_double_config;
+   const __DRIconfig *dri_single_config[2];
+   const __DRIconfig *dri_double_config[2];
 };
 
 struct dri2_egl_image
index 050c309dceb4ab62c9511191888a51b7e26ccf7d..815d2674cb2d3e1a74865793710c5fd0306ce8c7 100644 (file)
@@ -101,6 +101,7 @@ dri2_drm_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type,
    struct dri2_egl_surface *dri2_surf;
    struct gbm_surface *window = native_window;
    struct gbm_dri_surface *surf;
+   const __DRIconfig *config;
 
    (void) drv;
 
@@ -130,21 +131,20 @@ dri2_drm_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type,
       goto cleanup_surf;
    }
 
-   if (dri2_dpy->dri2) {
-      const __DRIconfig *config =
-         dri2_get_dri_config(dri2_conf, EGL_WINDOW_BIT,
-                             dri2_surf->base.GLColorspace);
+   config = dri2_get_dri_config(dri2_conf, EGL_WINDOW_BIT,
+                                dri2_surf->base.GLColorspace);
 
+   if (dri2_dpy->dri2) {
       dri2_surf->dri_drawable =
          (*dri2_dpy->dri2->createNewDrawable)(dri2_dpy->dri_screen, config,
                                               dri2_surf->gbm_surf);
 
    } else {
       assert(dri2_dpy->swrast != NULL);
+
       dri2_surf->dri_drawable =
-         (*dri2_dpy->swrast->createNewDrawable) (dri2_dpy->dri_screen,
-                                                 dri2_conf->dri_double_config,
-                                                 dri2_surf->gbm_surf);
+         (*dri2_dpy->swrast->createNewDrawable)(dri2_dpy->dri_screen, config,
+                                                dri2_surf->gbm_surf);
 
    }
    if (dri2_surf->dri_drawable == NULL) {
index 6cf5461d52c9fc17498b0f9641ac4e655432353b..0d161f617a1935f8e272c3cdb5e0532f87a136fb 100644 (file)
@@ -1645,6 +1645,7 @@ dri2_wl_swrast_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp,
    struct dri2_egl_config *dri2_conf = dri2_egl_config(conf);
    struct wl_egl_window *window = native_window;
    struct dri2_egl_surface *dri2_surf;
+   const __DRIconfig *config;
 
    (void) drv;
 
@@ -1669,10 +1670,12 @@ dri2_wl_swrast_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp,
    dri2_surf->base.Width = -1;
    dri2_surf->base.Height = -1;
 
+   config = dri2_get_dri_config(dri2_conf, EGL_WINDOW_BIT,
+                                dri2_surf->base.GLColorspace);
+
    dri2_surf->dri_drawable =
-      (*dri2_dpy->swrast->createNewDrawable) (dri2_dpy->dri_screen,
-                                              dri2_conf->dri_double_config,
-                                              dri2_surf);
+      (*dri2_dpy->swrast->createNewDrawable)(dri2_dpy->dri_screen,
+                                             config, dri2_surf);
    if (dri2_surf->dri_drawable == NULL) {
       _eglError(EGL_BAD_ALLOC, "swrast->createNewDrawable");
       goto cleanup_dri_drawable;
index 7991fc2b67b4c9d66625caa654eeb70021ea9b1b..88a06a8c6a8b5aa805e6bad302f88798a02e40cb 100644 (file)
@@ -206,6 +206,7 @@ dri2_x11_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type,
    xcb_generic_error_t *error;
    xcb_drawable_t drawable;
    xcb_screen_t *screen;
+   const __DRIconfig *config;
 
    STATIC_ASSERT(sizeof(uintptr_t) == sizeof(native_surface));
    drawable = (uintptr_t) native_surface;
@@ -245,19 +246,18 @@ dri2_x11_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type,
       dri2_surf->drawable = drawable;
    }
 
-   if (dri2_dpy->dri2) {
-      const __DRIconfig *config =
-         dri2_get_dri_config(dri2_conf, type, dri2_surf->base.GLColorspace);
+   config = dri2_get_dri_config(dri2_conf, type,
+                                dri2_surf->base.GLColorspace);
 
+   if (dri2_dpy->dri2) {
       dri2_surf->dri_drawable =
         (*dri2_dpy->dri2->createNewDrawable)(dri2_dpy->dri_screen, config,
                                              dri2_surf);
    } else {
       assert(dri2_dpy->swrast);
       dri2_surf->dri_drawable = 
-        (*dri2_dpy->swrast->createNewDrawable) (dri2_dpy->dri_screen,
-                                                dri2_conf->dri_double_config,
-                                                dri2_surf);
+         (*dri2_dpy->swrast->createNewDrawable)(dri2_dpy->dri_screen, config,
+                                                dri2_surf);
    }
 
    if (dri2_surf->dri_drawable == NULL) {
index 66306d7d5d233ea201e7028420c064c5a25e7bb2..d76dddbf7d94d1f8654b4c06c24ffb2ea68e2b39 100644 (file)
@@ -116,6 +116,53 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
                break;
             }
 
+            if (fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_CENTROID ||
+                fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
+                fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
+               const struct tgsi_full_src_register *src0 = &fullinst->Src[0];
+               unsigned input;
+
+               if (src0->Register.Indirect && src0->Indirect.ArrayID)
+                  input = info->input_array_first[src0->Indirect.ArrayID];
+               else
+                  input = src0->Register.Index;
+
+               /* For the INTERP opcodes, the interpolation is always
+                * PERSPECTIVE unless LINEAR is specified.
+                */
+               switch (info->input_interpolate[input]) {
+               case TGSI_INTERPOLATE_COLOR:
+               case TGSI_INTERPOLATE_CONSTANT:
+               case TGSI_INTERPOLATE_PERSPECTIVE:
+                  switch (fullinst->Instruction.Opcode) {
+                  case TGSI_OPCODE_INTERP_CENTROID:
+                     info->uses_persp_opcode_interp_centroid = true;
+                     break;
+                  case TGSI_OPCODE_INTERP_OFFSET:
+                     info->uses_persp_opcode_interp_offset = true;
+                     break;
+                  case TGSI_OPCODE_INTERP_SAMPLE:
+                     info->uses_persp_opcode_interp_sample = true;
+                     break;
+                  }
+                  break;
+
+               case TGSI_INTERPOLATE_LINEAR:
+                  switch (fullinst->Instruction.Opcode) {
+                  case TGSI_OPCODE_INTERP_CENTROID:
+                     info->uses_linear_opcode_interp_centroid = true;
+                     break;
+                  case TGSI_OPCODE_INTERP_OFFSET:
+                     info->uses_linear_opcode_interp_offset = true;
+                     break;
+                  case TGSI_OPCODE_INTERP_SAMPLE:
+                     info->uses_linear_opcode_interp_sample = true;
+                     break;
+                  }
+                  break;
+               }
+            }
+
             if (fullinst->Instruction.Opcode >= TGSI_OPCODE_F2D &&
                 fullinst->Instruction.Opcode <= TGSI_OPCODE_DSSG)
                info->uses_doubles = true;
@@ -236,8 +283,48 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
                   info->input_cylindrical_wrap[reg] = (ubyte)fulldecl->Interp.CylindricalWrap;
                   info->num_inputs++;
 
-                  if (fulldecl->Interp.Location == TGSI_INTERPOLATE_LOC_CENTROID)
-                     info->uses_centroid = TRUE;
+                  /* Only interpolated varyings. Don't include POSITION.
+                   * Don't include integer varyings, because they are not
+                   * interpolated.
+                   */
+                  if (semName == TGSI_SEMANTIC_GENERIC ||
+                      semName == TGSI_SEMANTIC_TEXCOORD ||
+                      semName == TGSI_SEMANTIC_COLOR ||
+                      semName == TGSI_SEMANTIC_BCOLOR ||
+                      semName == TGSI_SEMANTIC_FOG ||
+                      semName == TGSI_SEMANTIC_CLIPDIST ||
+                      semName == TGSI_SEMANTIC_CULLDIST) {
+                     switch (fulldecl->Interp.Interpolate) {
+                     case TGSI_INTERPOLATE_COLOR:
+                     case TGSI_INTERPOLATE_PERSPECTIVE:
+                        switch (fulldecl->Interp.Location) {
+                        case TGSI_INTERPOLATE_LOC_CENTER:
+                           info->uses_persp_center = true;
+                           break;
+                        case TGSI_INTERPOLATE_LOC_CENTROID:
+                           info->uses_persp_centroid = true;
+                           break;
+                        case TGSI_INTERPOLATE_LOC_SAMPLE:
+                           info->uses_persp_sample = true;
+                           break;
+                        }
+                        break;
+                     case TGSI_INTERPOLATE_LINEAR:
+                        switch (fulldecl->Interp.Location) {
+                        case TGSI_INTERPOLATE_LOC_CENTER:
+                           info->uses_linear_center = true;
+                           break;
+                        case TGSI_INTERPOLATE_LOC_CENTROID:
+                           info->uses_linear_centroid = true;
+                           break;
+                        case TGSI_INTERPOLATE_LOC_SAMPLE:
+                           info->uses_linear_sample = true;
+                           break;
+                        }
+                        break;
+                     /* TGSI_INTERPOLATE_CONSTANT doesn't do any interpolation. */
+                     }
+                  }
 
                   if (semName == TGSI_SEMANTIC_PRIMID)
                      info->uses_primid = TRUE;
index 42539ee9f45096d0e4e0d8e8a131153db6e72487..3ceb55717eeebd695fbc14cc433dff57941a9579 100644 (file)
@@ -82,7 +82,18 @@ struct tgsi_shader_info
    boolean writes_stencil; /**< does fragment shader write stencil value? */
    boolean writes_edgeflag; /**< vertex shader outputs edgeflag */
    boolean uses_kill;  /**< KILL or KILL_IF instruction used? */
-   boolean uses_centroid;
+   boolean uses_persp_center;
+   boolean uses_persp_centroid;
+   boolean uses_persp_sample;
+   boolean uses_linear_center;
+   boolean uses_linear_centroid;
+   boolean uses_linear_sample;
+   boolean uses_persp_opcode_interp_centroid;
+   boolean uses_persp_opcode_interp_offset;
+   boolean uses_persp_opcode_interp_sample;
+   boolean uses_linear_opcode_interp_centroid;
+   boolean uses_linear_opcode_interp_offset;
+   boolean uses_linear_opcode_interp_sample;
    boolean uses_instanceid;
    boolean uses_vertexid;
    boolean uses_vertexid_nobase;
index e7800472f4420ebf10c9129a44284fb6fcb4defc..e08844b2f0bfbbf0148273823dd6ba0bd5813d75 100644 (file)
@@ -268,6 +268,15 @@ The integer capabilities:
   bounds_max states of pipe_depth_stencil_alpha_state behave according
   to the GL_EXT_depth_bounds_test specification.
 * ``PIPE_CAP_TGSI_TXQS``: Whether the `TXQS` opcode is supported
+* ``PIPE_CAP_FORCE_PERSAMPLE_INTERP``: If the driver can force per-sample
+  interpolation for all fragment shader inputs if
+  pipe_rasterizer_state::force_persample_interp is set. This is only used
+  by GL3-level sample shading (ARB_sample_shading). GL4-level sample shading
+  (ARB_gpu_shader5) doesn't use this. While GL3 hardware has a state for it,
+  GL4 hardware will likely need to emulate it with a shader variant, or by
+  selecting the interpolation weights with a conditional assignment
+  in the shader.
+
 
 
 .. _pipe_capf:
index 780edd8aa00e644b9d45efe33b21d5d32ebbce99..1bd38274df80cf5b7635b42a42f5c6ffd8a9aa21 100644 (file)
@@ -1,6 +1,7 @@
 C_SOURCES := \
-       dd_pipe.h \
-       dd_public.h \
        dd_context.c \
        dd_draw.c \
-       dd_screen.c
+       dd_pipe.h \
+       dd_public.h \
+       dd_screen.c \
+       dd_util.h
index 1c9862387082f6346e4ee421cffdd265b54f7f4f..b443c5b0b03e7c027a72f116ce0d5686bd88a039 100644 (file)
@@ -30,9 +30,6 @@
 #include "util/u_dump.h"
 #include "util/u_format.h"
 #include "tgsi/tgsi_scan.h"
-#include "os/os_process.h"
-#include <errno.h>
-#include <sys/stat.h>
 
 
 enum call_type
@@ -88,33 +85,13 @@ struct dd_call
    } info;
 };
 
-
 static FILE *
 dd_get_file_stream(struct dd_context *dctx)
 {
    struct pipe_screen *screen = dctx->pipe->screen;
-   static unsigned index;
-   char proc_name[128], dir[256], name[512];
-   FILE *f;
-
-   if (!os_get_process_name(proc_name, sizeof(proc_name))) {
-      fprintf(stderr, "dd: can't get the process name\n");
-      return NULL;
-   }
-
-   snprintf(dir, sizeof(dir), "%s/"DD_DIR, debug_get_option("HOME", "."));
-
-   if (mkdir(dir, 0774) && errno != EEXIST) {
-      fprintf(stderr, "dd: can't create a directory (%i)\n", errno);
-      return NULL;
-   }
-
-   snprintf(name, sizeof(name), "%s/%s_%u_%08u", dir, proc_name, getpid(), index++);
-   f = fopen(name, "w");
-   if (!f) {
-      fprintf(stderr, "dd: can't open file %s\n", name);
+   FILE *f = dd_get_debug_file();
+   if (!f)
       return NULL;
-   }
 
    fprintf(f, "Driver vendor: %s\n", screen->get_vendor(screen));
    fprintf(f, "Device vendor: %s\n", screen->get_device_vendor(screen));
index c78d112988a1d276a4d2786f65f9e044ac01a17d..34f59203e4b2a43ecacdedff784b43588a2334d3 100644 (file)
@@ -31,9 +31,7 @@
 #include "pipe/p_context.h"
 #include "pipe/p_state.h"
 #include "pipe/p_screen.h"
-
-/* name of the directory in home */
-#define DD_DIR "ddebug_dumps"
+#include "dd_util.h"
 
 enum dd_mode {
    DD_DETECT_HANGS,
diff --git a/src/gallium/drivers/ddebug/dd_util.h b/src/gallium/drivers/ddebug/dd_util.h
new file mode 100644 (file)
index 0000000..c217c8e
--- /dev/null
@@ -0,0 +1,71 @@
+/**************************************************************************
+ *
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ * Copyright 2008 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef DD_UTIL_H
+#define DD_UTIL_H
+
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/stat.h>
+
+#include "os/os_process.h"
+#include "util/u_debug.h"
+
+/* name of the directory in home */
+#define DD_DIR "ddebug_dumps"
+
+static inline FILE *
+dd_get_debug_file()
+{
+   static unsigned index;
+   char proc_name[128], dir[256], name[512];
+   FILE *f;
+
+   if (!os_get_process_name(proc_name, sizeof(proc_name))) {
+      fprintf(stderr, "dd: can't get the process name\n");
+      return NULL;
+   }
+
+   snprintf(dir, sizeof(dir), "%s/"DD_DIR, debug_get_option("HOME", "."));
+
+   if (mkdir(dir, 0774) && errno != EEXIST) {
+      fprintf(stderr, "dd: can't create a directory (%i)\n", errno);
+      return NULL;
+   }
+
+   snprintf(name, sizeof(name), "%s/%s_%u_%08u", dir, proc_name, getpid(), index++);
+   f = fopen(name, "w");
+   if (!f) {
+      fprintf(stderr, "dd: can't open file %s\n", name);
+      return NULL;
+   }
+
+   return f;
+}
+
+#endif /* DD_UTIL_H */
index 9a684d4ffbb71a48c7c0e683b1ccf219ede05966..0d0100590d66f4f0356fb5224b5c1eccf5e46a11 100644 (file)
@@ -235,6 +235,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
        case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
        case PIPE_CAP_DEPTH_BOUNDS_TEST:
        case PIPE_CAP_TGSI_TXQS:
+       case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
                return 0;
 
        case PIPE_CAP_MAX_VIEWPORTS:
index 51c64edda2204e898e47dce7feb4f6ce73f881f0..9d6b3d391839bb06fbd845d44f226fc30c3770d5 100644 (file)
@@ -248,6 +248,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap)
    case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
    case PIPE_CAP_DEPTH_BOUNDS_TEST:
    case PIPE_CAP_TGSI_TXQS:
+   case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
       return 0;
 
    case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
index 9e37e24014a45d76a9a6021d965f506288747f90..76812a666a07d69977345104250745094c3617aa 100644 (file)
@@ -470,6 +470,7 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
    case PIPE_CAP_DEPTH_BOUNDS_TEST:
    case PIPE_CAP_TGSI_TXQS:
+   case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
       return 0;
 
    case PIPE_CAP_VENDOR_ID:
index 697e3d97c19440af3d720fc2237ee30374aab590..50c3781f5f84b83e4a9fe4e02de75bd2c975a478 100644 (file)
@@ -297,6 +297,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
    case PIPE_CAP_DEPTH_BOUNDS_TEST:
    case PIPE_CAP_TGSI_TXQS:
+   case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
       return 0;
    }
    /* should only get here on unhandled cases */
index 389a229eb7825b247470da4770b1ebaed5c020e1..a44fd3efcf7b0b2caa8080f57910d56fc2146063 100644 (file)
@@ -24,6 +24,8 @@ PUSH_AVAIL(struct nouveau_pushbuf *push)
 static inline bool
 PUSH_SPACE(struct nouveau_pushbuf *push, uint32_t size)
 {
+   /* Provide a buffer so that fences always have room to be emitted */
+   size += 8;
    if (PUSH_AVAIL(push) < size)
       return nouveau_pushbuf_space(push, size, 0, 0) == 0;
    return true;
index 67e0d5e9c5682a2471369b152fd76bc0e853f754..8d9516b863fc7d9f2e4362c7f3364a005cde2f7c 100644 (file)
@@ -78,12 +78,12 @@ nv30_format_info_table[PIPE_FORMAT_COUNT] = {
    _(B4G4R4X4_UNORM      , S___),
    _(B4G4R4A4_UNORM      , S___),
    _(B5G6R5_UNORM        , SB__),
-   _(B8G8R8X8_UNORM      , SB__),
-   _(B8G8R8X8_SRGB       , S___),
-   _(B8G8R8A8_UNORM      , SB__),
-   _(B8G8R8A8_SRGB       , S___),
+   _(BGRX8888_UNORM      , SB__),
+   _(BGRX8888_SRGB       , S___),
+   _(BGRA8888_UNORM      , SB__),
+   _(BGRA8888_SRGB       , S___),
    _(R8G8B8A8_UNORM      , __V_),
-   _(R8G8B8A8_SNORM      , S___),
+   _(RGBA8888_SNORM      , S___),
    _(DXT1_RGB            , S___),
    _(DXT1_SRGB           , S___),
    _(DXT1_RGBA           , S___),
@@ -138,8 +138,8 @@ const struct nv30_format
 nv30_format_table[PIPE_FORMAT_COUNT] = {
    R_(B5G5R5X1_UNORM    , X1R5G5B5          ),
    R_(B5G6R5_UNORM      , R5G6B5            ),
-   R_(B8G8R8X8_UNORM    , X8R8G8B8          ),
-   R_(B8G8R8A8_UNORM    , A8R8G8B8          ),
+   R_(BGRX8888_UNORM    , X8R8G8B8          ),
+   R_(BGRA8888_UNORM    , A8R8G8B8          ),
    Z_(Z16_UNORM         , Z16               ),
    Z_(X8Z24_UNORM       , Z24S8             ),
    Z_(S8_UINT_Z24_UNORM , Z24S8             ),
@@ -223,11 +223,11 @@ nv30_texfmt_table[PIPE_FORMAT_COUNT] = {
    _(B4G4R4X4_UNORM    , A4R4G4B4, 0, C, C, C, 1, 2, 1, 0, x, NONE, ____),
    _(B4G4R4A4_UNORM    , A4R4G4B4, 0, C, C, C, C, 2, 1, 0, 3, NONE, ____),
    _(B5G6R5_UNORM      , R5G6B5  , 0, C, C, C, 1, 2, 1, 0, x, NONE, ____),
-   _(B8G8R8X8_UNORM    , A8R8G8B8, 0, C, C, C, 1, 2, 1, 0, x, NONE, ____),
-   _(B8G8R8X8_SRGB     , A8R8G8B8, 0, C, C, C, 1, 2, 1, 0, x, SRGB, ____),
-   _(B8G8R8A8_UNORM    , A8R8G8B8, 0, C, C, C, C, 2, 1, 0, 3, NONE, ____),
-   _(B8G8R8A8_SRGB     , A8R8G8B8, 0, C, C, C, C, 2, 1, 0, 3, SRGB, ____),
-   _(R8G8B8A8_SNORM    , A8R8G8B8, 0, C, C, C, C, 0, 1, 2, 3, NONE, SSSS),
+   _(BGRX8888_UNORM    , A8R8G8B8, 0, C, C, C, 1, 2, 1, 0, x, NONE, ____),
+   _(BGRX8888_SRGB     , A8R8G8B8, 0, C, C, C, 1, 2, 1, 0, x, SRGB, ____),
+   _(BGRA8888_UNORM    , A8R8G8B8, 0, C, C, C, C, 2, 1, 0, 3, NONE, ____),
+   _(BGRA8888_SRGB     , A8R8G8B8, 0, C, C, C, C, 2, 1, 0, 3, SRGB, ____),
+   _(RGBA8888_SNORM    , A8R8G8B8, 0, C, C, C, C, 0, 1, 2, 3, NONE, SSSS),
    _(DXT1_RGB          , DXT1    , 0, C, C, C, 1, 2, 1, 0, x, NONE, ____),
    _(DXT1_SRGB         , DXT1    , 0, C, C, C, 1, 2, 1, 0, x, SRGB, ____),
    _(DXT1_RGBA         , DXT1    , 0, C, C, C, C, 2, 1, 0, 3, NONE, ____),
index 806d4e6b04f8f250baf64ff30c4f61972936c8ef..335c163b66187111ea2c36f435727f54c801ce82 100644 (file)
@@ -170,6 +170,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
    case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
    case PIPE_CAP_TGSI_TXQS:
+   case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
       return 0;
 
    case PIPE_CAP_VENDOR_ID:
@@ -346,7 +347,9 @@ nv30_screen_fence_emit(struct pipe_screen *pscreen, uint32_t *sequence)
 
    *sequence = ++screen->base.fence.sequence;
 
-   BEGIN_NV04(push, NV30_3D(FENCE_OFFSET), 2);
+   assert(PUSH_AVAIL(push) >= 3);
+   PUSH_DATA (push, NV30_3D_FENCE_OFFSET |
+              (2 /* size */ << 18) | (7 /* subchan */ << 13));
    PUSH_DATA (push, 0);
    PUSH_DATA (push, *sequence);
 }
index 8494549e9b182165c1c1570b0f153e5224d833fb..dec3a0bb8565876385d2c457f3f2efaa8c2043b0 100644 (file)
@@ -191,7 +191,11 @@ nv30_vbo_validate(struct nv30_context *nv30)
    if (!nv30->vertex || nv30->draw_flags)
       return;
 
+#ifdef PIPE_ARCH_BIG_ENDIAN
+   if (1) { /* Figure out where the buffers are getting messed up */
+#else
    if (unlikely(vertex->need_conversion)) {
+#endif
       nv30->vbo_fifo = ~0;
       nv30->vbo_user = 0;
    } else {
index c3bbc833f5b157faa6ba6543c639bf1d253ecb83..812b246ea0e00c23970218fae667077fcc7a53ef 100644 (file)
@@ -215,6 +215,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
    case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
    case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+   case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
       return 0;
 
    case PIPE_CAP_VENDOR_ID:
@@ -387,6 +388,7 @@ nv50_screen_fence_emit(struct pipe_screen *pscreen, u32 *sequence)
    /* we need to do it after possible flush in MARK_RING */
    *sequence = ++screen->base.fence.sequence;
 
+   assert(PUSH_AVAIL(push) >= 5);
    PUSH_DATA (push, NV50_FIFO_PKHDR(NV50_3D(QUERY_ADDRESS_HIGH), 4));
    PUSH_DATAh(push, screen->fence.bo->offset);
    PUSH_DATA (push, screen->fence.bo->offset);
index 1909b914d0283e66e709320e9481c717516551dc..afd91e6feeeda6c1f57c1b7b6c3fc4601a1d862f 100644 (file)
@@ -201,6 +201,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_VERTEXID_NOBASE:
    case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
    case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+   case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
       return 0;
 
    case PIPE_CAP_VENDOR_ID:
@@ -536,7 +537,8 @@ nvc0_screen_fence_emit(struct pipe_screen *pscreen, u32 *sequence)
    /* we need to do it after possible flush in MARK_RING */
    *sequence = ++screen->base.fence.sequence;
 
-   BEGIN_NVC0(push, NVC0_3D(QUERY_ADDRESS_HIGH), 4);
+   assert(PUSH_AVAIL(push) >= 5);
+   PUSH_DATA (push, NVC0_FIFO_PKHDR_SQ(NVC0_3D(QUERY_ADDRESS_HIGH), 4));
    PUSH_DATAh(push, screen->fence.bo->offset);
    PUSH_DATA (push, screen->fence.bo->offset);
    PUSH_DATA (push, *sequence);
index fc150542d4bc857f378a2af1174f236dd6194971..a2d042ca48ef28994be22234362a72a9e5e4d2b9 100644 (file)
 
 
 /**
- * Writing relocations.
+ * Writing buffers.
  */
 
 #define OUT_CS_RELOC(r) do { \
     assert((r)); \
     assert((r)->cs_buf); \
     OUT_CS(0xc0001000); /* PKT3_NOP */ \
-    OUT_CS(cs_winsys->cs_get_reloc(cs_copy, (r)->cs_buf) * 4); \
+    OUT_CS(cs_winsys->cs_lookup_buffer(cs_copy, (r)->cs_buf) * 4); \
 } while (0)
 
 
index 4c9971e5128c37471f8f17cb4c91ba63f1a651c8..7610c3ddf5b9084ce52fd06c421c39a3197c3b99 100644 (file)
@@ -1049,7 +1049,7 @@ void r300_emit_vertex_arrays_swtcl(struct r300_context *r300, boolean indexed)
 
     assert(r300->vbo_cs);
     OUT_CS(0xc0001000); /* PKT3_NOP */
-    OUT_CS(r300->rws->cs_get_reloc(r300->cs, r300->vbo_cs) * 4);
+    OUT_CS(r300->rws->cs_lookup_buffer(r300->cs, r300->vbo_cs) * 4);
     END_CS;
 }
 
@@ -1320,7 +1320,7 @@ validate:
                 continue;
             tex = r300_resource(fb->cbufs[i]->texture);
             assert(tex && tex->buf && "cbuf is marked, but NULL!");
-            r300->rws->cs_add_reloc(r300->cs, tex->cs_buf,
+            r300->rws->cs_add_buffer(r300->cs, tex->cs_buf,
                                     RADEON_USAGE_READWRITE,
                                     r300_surface(fb->cbufs[i])->domain,
                                     tex->b.b.nr_samples > 1 ?
@@ -1331,7 +1331,7 @@ validate:
         if (fb->zsbuf) {
             tex = r300_resource(fb->zsbuf->texture);
             assert(tex && tex->buf && "zsbuf is marked, but NULL!");
-            r300->rws->cs_add_reloc(r300->cs, tex->cs_buf,
+            r300->rws->cs_add_buffer(r300->cs, tex->cs_buf,
                                     RADEON_USAGE_READWRITE,
                                     r300_surface(fb->zsbuf)->domain,
                                     tex->b.b.nr_samples > 1 ?
@@ -1342,7 +1342,7 @@ validate:
     /* The AA resolve buffer. */
     if (r300->aa_state.dirty) {
         if (aa->dest) {
-            r300->rws->cs_add_reloc(r300->cs, aa->dest->cs_buf,
+            r300->rws->cs_add_buffer(r300->cs, aa->dest->cs_buf,
                                     RADEON_USAGE_WRITE,
                                     aa->dest->domain,
                                     RADEON_PRIO_COLOR_BUFFER);
@@ -1356,20 +1356,20 @@ validate:
             }
 
             tex = r300_resource(texstate->sampler_views[i]->base.texture);
-            r300->rws->cs_add_reloc(r300->cs, tex->cs_buf, RADEON_USAGE_READ,
-                                    tex->domain, RADEON_PRIO_SHADER_TEXTURE_RO);
+            r300->rws->cs_add_buffer(r300->cs, tex->cs_buf, RADEON_USAGE_READ,
+                                    tex->domain, RADEON_PRIO_SAMPLER_TEXTURE);
         }
     }
     /* ...occlusion query buffer... */
     if (r300->query_current)
-        r300->rws->cs_add_reloc(r300->cs, r300->query_current->cs_buf,
+        r300->rws->cs_add_buffer(r300->cs, r300->query_current->cs_buf,
                                 RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT,
-                                RADEON_PRIO_MIN);
+                                RADEON_PRIO_QUERY);
     /* ...vertex buffer for SWTCL path... */
     if (r300->vbo_cs)
-        r300->rws->cs_add_reloc(r300->cs, r300->vbo_cs,
+        r300->rws->cs_add_buffer(r300->cs, r300->vbo_cs,
                                 RADEON_USAGE_READ, RADEON_DOMAIN_GTT,
-                                RADEON_PRIO_MIN);
+                                RADEON_PRIO_VERTEX_BUFFER);
     /* ...vertex buffers for HWTCL path... */
     if (do_validate_vertex_buffers && r300->vertex_arrays_dirty) {
         struct pipe_vertex_buffer *vbuf = r300->vertex_buffer;
@@ -1382,18 +1382,18 @@ validate:
             if (!buf)
                 continue;
 
-            r300->rws->cs_add_reloc(r300->cs, r300_resource(buf)->cs_buf,
+            r300->rws->cs_add_buffer(r300->cs, r300_resource(buf)->cs_buf,
                                     RADEON_USAGE_READ,
                                     r300_resource(buf)->domain,
-                                    RADEON_PRIO_SHADER_BUFFER_RO);
+                                    RADEON_PRIO_SAMPLER_BUFFER);
         }
     }
     /* ...and index buffer for HWTCL path. */
     if (index_buffer)
-        r300->rws->cs_add_reloc(r300->cs, r300_resource(index_buffer)->cs_buf,
+        r300->rws->cs_add_buffer(r300->cs, r300_resource(index_buffer)->cs_buf,
                                 RADEON_USAGE_READ,
                                 r300_resource(index_buffer)->domain,
-                                RADEON_PRIO_MIN);
+                                RADEON_PRIO_INDEX_BUFFER);
 
     /* Now do the validation (flush is called inside cs_validate on failure). */
     if (!r300->rws->cs_validate(r300->cs)) {
index e669ba2edde0d582f7af8021c2fefd811b5db4cf..1165ac8a9c02d201101d167ef24a92c8954e60e0 100644 (file)
@@ -196,6 +196,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
         case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
         case PIPE_CAP_DEPTH_BOUNDS_TEST:
         case PIPE_CAP_TGSI_TXQS:
+        case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
             return 0;
 
         /* SWTCL-only features. */
index 33009c16f688985005bb4b13f93715a41215ccd2..6f2b7ba0db36482af848774c3f46ba7f5de32fa7 100644 (file)
@@ -442,7 +442,7 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
                unsigned reloc = radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.gfx,
                                                       (struct r600_resource*)cb->base.texture,
                                                       RADEON_USAGE_READWRITE,
-                                                      RADEON_PRIO_SHADER_RESOURCE_RW);
+                                                      RADEON_PRIO_SHADER_RW_BUFFER);
 
                radeon_compute_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 7);
                radeon_emit(cs, cb->cb_color_base);     /* R_028C60_CB_COLOR0_BASE */
@@ -566,7 +566,7 @@ void evergreen_emit_cs_shader(
        radeon_emit(cs, PKT3C(PKT3_NOP, 0, 0));
        radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
                                              code_bo, RADEON_USAGE_READ,
-                                             RADEON_PRIO_SHADER_DATA));
+                                             RADEON_PRIO_USER_SHADER));
 }
 
 static void evergreen_launch_grid(
index 29bdd9daddb47441d3e45eedc6db51e27098fab5..89abe92cbb469e69b2bc2d62585110237f55d7d1 100644 (file)
@@ -65,9 +65,9 @@ void evergreen_dma_copy_buffer(struct r600_context *rctx,
                csize = size < EG_DMA_COPY_MAX_SIZE ? size : EG_DMA_COPY_MAX_SIZE;
                /* emit reloc before writing cs so that cs is always in consistent state */
                radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
-                                     RADEON_PRIO_MIN);
+                                     RADEON_PRIO_SDMA_BUFFER);
                radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
-                                     RADEON_PRIO_MIN);
+                                     RADEON_PRIO_SDMA_BUFFER);
                cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, sub_cmd, csize);
                cs->buf[cs->cdw++] = dst_offset & 0xffffffff;
                cs->buf[cs->cdw++] = src_offset & 0xffffffff;
@@ -131,7 +131,7 @@ void evergreen_cp_dma_clear_buffer(struct r600_context *rctx,
                /* This must be done after r600_need_cs_space. */
                reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
                                              (struct r600_resource*)dst, RADEON_USAGE_WRITE,
-                                             RADEON_PRIO_MIN);
+                                             RADEON_PRIO_CP_DMA);
 
                radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0));
                radeon_emit(cs, clear_value);   /* DATA [31:0] */
index 52f4dc81d9f4e313dc066f2721c8a365d5b1fb1b..c6702a9ca3400fb1a49eb797df01853e0fe447bf 100644 (file)
@@ -783,6 +783,12 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
 
        va = tmp->resource.gpu_address;
 
+       if (state->format == PIPE_FORMAT_X24S8_UINT ||
+           state->format == PIPE_FORMAT_S8X24_UINT ||
+           state->format == PIPE_FORMAT_X32_S8X24_UINT ||
+           state->format == PIPE_FORMAT_S8_UINT)
+               view->is_stencil_sampler = true;
+
        view->tex_resource = &tmp->resource;
        view->tex_resource_words[0] = (S_030000_DIM(r600_tex_dim(texture->target, texture->nr_samples)) |
                                       S_030000_PITCH((pitch / 8) - 1) |
@@ -1584,7 +1590,7 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r
                if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) {
                        cmask_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
                                tex->cmask_buffer, RADEON_USAGE_READWRITE,
-                               RADEON_PRIO_COLOR_META);
+                               RADEON_PRIO_CMASK);
                } else {
                        cmask_reloc = reloc;
                }
@@ -1767,7 +1773,7 @@ static void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom
                radeon_set_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, a->rsurf->db_preload_control);
                radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base);
                reloc_idx = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rtex->htile_buffer,
-                                                 RADEON_USAGE_READWRITE, RADEON_PRIO_DEPTH_META);
+                                                 RADEON_USAGE_READWRITE, RADEON_PRIO_HTILE);
                cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
                cs->buf[cs->cdw++] = reloc_idx;
        } else {
@@ -1823,9 +1829,9 @@ static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_
                                     S_028000_STENCIL_COPY_ENABLE(a->copy_stencil) |
                                     S_028000_COPY_CENTROID(1) |
                                     S_028000_COPY_SAMPLE(a->copy_sample);
-       } else if (a->flush_depthstencil_in_place) {
-               db_render_control |= S_028000_DEPTH_COMPRESS_DISABLE(1) |
-                                    S_028000_STENCIL_COMPRESS_DISABLE(1);
+       } else if (a->flush_depth_inplace || a->flush_stencil_inplace) {
+               db_render_control |= S_028000_DEPTH_COMPRESS_DISABLE(a->flush_depth_inplace) |
+                                    S_028000_STENCIL_COMPRESS_DISABLE(a->flush_stencil_inplace);
                db_render_override |= S_02800C_DISABLE_PIXEL_RATE_TILES(1);
        }
        if (a->htile_clear) {
@@ -1881,7 +1887,7 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx,
 
                radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
                radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
-                                                     RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO));
+                                                     RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER));
        }
        state->dirty_mask = 0;
 }
@@ -1929,7 +1935,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx,
 
                radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
                radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
-                                                     RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO));
+                                                     RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER));
 
                radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags);
                radeon_emit(cs, (buffer_id_base + buffer_index) * 8);
@@ -1954,7 +1960,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx,
 
                radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
                radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
-                                                     RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO));
+                                                     RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER));
 
                dirty_mask &= ~(1 << buffer_index);
        }
@@ -2018,9 +2024,7 @@ static void evergreen_emit_sampler_views(struct r600_context *rctx,
 
                reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rview->tex_resource,
                                              RADEON_USAGE_READ,
-                                             rview->tex_resource->b.b.nr_samples > 1 ?
-                                                     RADEON_PRIO_SHADER_TEXTURE_MSAA :
-                                                     RADEON_PRIO_SHADER_TEXTURE_RO);
+                                             r600_get_sampler_view_priority(rview->tex_resource));
                radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
                radeon_emit(cs, reloc);
 
@@ -2140,7 +2144,8 @@ static void evergreen_emit_vertex_fetch_shader(struct r600_context *rctx, struct
                               (shader->buffer->gpu_address + shader->offset) >> 8);
        radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
        radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, shader->buffer,
-                                             RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA));
+                                                  RADEON_USAGE_READ,
+                                                  RADEON_PRIO_INTERNAL_SHADER));
 }
 
 static void evergreen_emit_shader_stages(struct r600_context *rctx, struct r600_atom *a)
@@ -2199,7 +2204,7 @@ static void evergreen_emit_gs_rings(struct r600_context *rctx, struct r600_atom
                radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
                radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
                                                      RADEON_USAGE_READWRITE,
-                                                     RADEON_PRIO_SHADER_RESOURCE_RW));
+                                                     RADEON_PRIO_RINGS_STREAMOUT));
                radeon_set_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE,
                                state->esgs_ring.buffer_size >> 8);
 
@@ -2209,7 +2214,7 @@ static void evergreen_emit_gs_rings(struct r600_context *rctx, struct r600_atom
                radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
                radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
                                                      RADEON_USAGE_READWRITE,
-                                                     RADEON_PRIO_SHADER_RESOURCE_RW));
+                                                     RADEON_PRIO_RINGS_STREAMOUT));
                radeon_set_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE,
                                state->gsvs_ring.buffer_size >> 8);
        } else {
@@ -3330,9 +3335,9 @@ static void evergreen_dma_copy_tile(struct r600_context *rctx,
                size = (cheight * pitch) / 4;
                /* emit reloc before writing cs so that cs is always in consistent state */
                radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rsrc->resource,
-                                     RADEON_USAGE_READ, RADEON_PRIO_MIN);
+                                     RADEON_USAGE_READ, RADEON_PRIO_SDMA_TEXTURE);
                radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rdst->resource,
-                                     RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
+                                     RADEON_USAGE_WRITE, RADEON_PRIO_SDMA_TEXTURE);
                cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, sub_cmd, size);
                cs->buf[cs->cdw++] = base >> 8;
                cs->buf[cs->cdw++] = (detile << 31) | (array_mode << 27) |
@@ -3376,11 +3381,11 @@ static void evergreen_dma_copy(struct pipe_context *ctx,
        }
 
        if (src->format != dst->format || src_box->depth > 1 ||
-           rdst->dirty_level_mask != 0) {
+           (rdst->dirty_level_mask | rdst->stencil_dirty_level_mask) & (1 << dst_level)) {
                goto fallback;
        }
 
-       if (rsrc->dirty_level_mask) {
+       if (rsrc->dirty_level_mask & (1 << src_level)) {
                ctx->flush_resource(ctx, src);
        }
 
index d1370cd8f26f07ceef0e19716e8e6110b60cfeb9..aede84084469af4e7150b2bbcacf750666bd1f56 100644 (file)
@@ -202,20 +202,28 @@ static void r600_blit_decompress_depth(struct pipe_context *ctx,
 
 static void r600_blit_decompress_depth_in_place(struct r600_context *rctx,
                                                 struct r600_texture *texture,
+                                               bool is_stencil_sampler,
                                                 unsigned first_level, unsigned last_level,
                                                 unsigned first_layer, unsigned last_layer)
 {
        struct pipe_surface *zsurf, surf_tmpl = {{0}};
        unsigned layer, max_layer, checked_last_layer, level;
+       unsigned *dirty_level_mask;
 
        /* Enable decompression in DB_RENDER_CONTROL */
-       rctx->db_misc_state.flush_depthstencil_in_place = true;
+       if (is_stencil_sampler) {
+               rctx->db_misc_state.flush_stencil_inplace = true;
+               dirty_level_mask = &texture->stencil_dirty_level_mask;
+       } else {
+               rctx->db_misc_state.flush_depth_inplace = true;
+               dirty_level_mask = &texture->dirty_level_mask;
+       }
        r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
 
        surf_tmpl.format = texture->resource.b.b.format;
 
        for (level = first_level; level <= last_level; level++) {
-               if (!(texture->dirty_level_mask & (1 << level)))
+               if (!(*dirty_level_mask & (1 << level)))
                        continue;
 
                surf_tmpl.u.tex.level = level;
@@ -242,12 +250,13 @@ static void r600_blit_decompress_depth_in_place(struct r600_context *rctx,
                /* The texture will always be dirty if some layers or samples aren't flushed.
                 * I don't think this case occurs often though. */
                if (first_layer == 0 && last_layer == max_layer) {
-                       texture->dirty_level_mask &= ~(1 << level);
+                       *dirty_level_mask &= ~(1 << level);
                }
        }
 
        /* Disable decompression in DB_RENDER_CONTROL */
-       rctx->db_misc_state.flush_depthstencil_in_place = false;
+       rctx->db_misc_state.flush_depth_inplace = false;
+       rctx->db_misc_state.flush_stencil_inplace = false;
        r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
 }
 
@@ -259,12 +268,14 @@ void r600_decompress_depth_textures(struct r600_context *rctx,
 
        while (depth_texture_mask) {
                struct pipe_sampler_view *view;
+               struct r600_pipe_sampler_view *rview;
                struct r600_texture *tex;
 
                i = u_bit_scan(&depth_texture_mask);
 
                view = &textures->views[i]->base;
                assert(view);
+               rview = (struct r600_pipe_sampler_view*)view;
 
                tex = (struct r600_texture *)view->texture;
                assert(tex->is_depth && !tex->is_flushing_texture);
@@ -272,6 +283,7 @@ void r600_decompress_depth_textures(struct r600_context *rctx,
                if (rctx->b.chip_class >= EVERGREEN ||
                    r600_can_read_depth(tex)) {
                        r600_blit_decompress_depth_in_place(rctx, tex,
+                                                  rview->is_stencil_sampler,
                                                   view->u.tex.first_level, view->u.tex.last_level,
                                                   0, util_max_layer(&tex->resource.b.b, view->u.tex.first_level));
                } else {
@@ -367,9 +379,14 @@ static bool r600_decompress_subresource(struct pipe_context *ctx,
        if (rtex->is_depth && !rtex->is_flushing_texture) {
                if (rctx->b.chip_class >= EVERGREEN ||
                    r600_can_read_depth(rtex)) {
-                       r600_blit_decompress_depth_in_place(rctx, rtex,
+                       r600_blit_decompress_depth_in_place(rctx, rtex, false,
                                                   level, level,
                                                   first_layer, last_layer);
+                       if (rtex->surface.flags & RADEON_SURF_SBUFFER) {
+                               r600_blit_decompress_depth_in_place(rctx, rtex, true,
+                                                          level, level,
+                                                          first_layer, last_layer);
+                       }
                } else {
                        if (!r600_init_flushed_depth_texture(ctx, tex, NULL))
                                return false; /* error */
index cf715976ab2406f578fed6ce99f74084f699e876..6f11366e6069e84793d9501ca64f1b2f894d5f08 100644 (file)
@@ -419,9 +419,9 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx,
 
                /* This must be done after r600_need_cs_space. */
                src_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)src,
-                                                 RADEON_USAGE_READ, RADEON_PRIO_MIN);
+                                                 RADEON_USAGE_READ, RADEON_PRIO_CP_DMA);
                dst_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)dst,
-                                                 RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
+                                                 RADEON_USAGE_WRITE, RADEON_PRIO_CP_DMA);
 
                radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0));
                radeon_emit(cs, src_offset);    /* SRC_ADDR_LO [31:0] */
@@ -472,9 +472,9 @@ void r600_dma_copy_buffer(struct r600_context *rctx,
                csize = size < R600_DMA_COPY_MAX_SIZE_DW ? size : R600_DMA_COPY_MAX_SIZE_DW;
                /* emit reloc before writing cs so that cs is always in consistent state */
                radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
-                                     RADEON_PRIO_MIN);
+                                     RADEON_PRIO_SDMA_BUFFER);
                radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
-                                     RADEON_PRIO_MIN);
+                                     RADEON_PRIO_SDMA_BUFFER);
                cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, 0, 0, csize);
                cs->buf[cs->cdw++] = dst_offset & 0xfffffffc;
                cs->buf[cs->cdw++] = src_offset & 0xfffffffc;
index 36d7e68a8f2243a53720bade6acc516d68572bba..efb4889e5625ecc35dc920f2ef07ffce7dc0001e 100644 (file)
@@ -342,6 +342,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
        case PIPE_CAP_VERTEXID_NOBASE:
        case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
        case PIPE_CAP_DEPTH_BOUNDS_TEST:
+       case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
                return 0;
 
        /* Stream output. */
index d0774de857366c8509f55991424e1825f81c0aa5..520b03f605da83b17b0ace2b8a6fbfac5940e12c 100644 (file)
@@ -109,7 +109,8 @@ struct r600_db_misc_state {
        struct r600_atom                atom;
        bool                            occlusion_query_enabled;
        bool                            flush_depthstencil_through_cb;
-       bool                            flush_depthstencil_in_place;
+       bool                            flush_depth_inplace;
+       bool                            flush_stencil_inplace;
        bool                            copy_depth, copy_stencil;
        unsigned                        copy_sample;
        unsigned                        log_samples;
@@ -253,6 +254,7 @@ struct r600_pipe_sampler_view {
        struct r600_resource            *tex_resource;
        uint32_t                        tex_resource_words[8];
        bool                            skip_mip_address_reloc;
+       bool                            is_stencil_sampler;
 };
 
 struct r600_rasterizer_state {
index 7084c5f359b0a88b05a5ac4ead0d3a663f00fed0..1be3e1b4de50d888052a893d58dd76d2f3d2cb5e 100644 (file)
@@ -710,6 +710,12 @@ r600_create_sampler_view_custom(struct pipe_context *ctx,
                break;
        }
 
+       if (state->format == PIPE_FORMAT_X24S8_UINT ||
+           state->format == PIPE_FORMAT_S8X24_UINT ||
+           state->format == PIPE_FORMAT_X32_S8X24_UINT ||
+           state->format == PIPE_FORMAT_S8_UINT)
+               view->is_stencil_sampler = true;
+
        view->tex_resource = &tmp->resource;
        view->tex_resource_words[0] = (S_038000_DIM(r600_tex_dim(texture->target, texture->nr_samples)) |
                                       S_038000_TILE_MODE(array_mode) |
@@ -1605,7 +1611,7 @@ static void r600_emit_db_state(struct r600_context *rctx, struct r600_atom *atom
                radeon_set_context_reg(cs, R_028D24_DB_HTILE_SURFACE, a->rsurf->db_htile_surface);
                radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base);
                reloc_idx = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rtex->htile_buffer,
-                                                 RADEON_USAGE_READWRITE, RADEON_PRIO_DEPTH_META);
+                                                 RADEON_USAGE_READWRITE, RADEON_PRIO_HTILE);
                cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
                cs->buf[cs->cdw++] = reloc_idx;
        } else {
@@ -1659,9 +1665,9 @@ static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom
                if (rctx->b.family == CHIP_RV610 || rctx->b.family == CHIP_RV630 ||
                    rctx->b.family == CHIP_RV620 || rctx->b.family == CHIP_RV635)
                        db_render_override |= S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_DISABLE);
-       } else if (a->flush_depthstencil_in_place) {
-               db_render_control |= S_028D0C_DEPTH_COMPRESS_DISABLE(1) |
-                                    S_028D0C_STENCIL_COMPRESS_DISABLE(1);
+       } else if (a->flush_depth_inplace || a->flush_stencil_inplace) {
+               db_render_control |= S_028D0C_DEPTH_COMPRESS_DISABLE(a->flush_depth_inplace) |
+                                    S_028D0C_STENCIL_COMPRESS_DISABLE(a->flush_stencil_inplace);
                db_render_override |= S_028D10_NOOP_CULL_DISABLE(1);
        }
        if (a->htile_clear) {
@@ -1720,7 +1726,7 @@ static void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom
 
                radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
                radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
-                                                     RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO));
+                                                     RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER));
        }
 }
 
@@ -1753,7 +1759,7 @@ static void r600_emit_constant_buffers(struct r600_context *rctx,
 
                radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
                radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
-                                                     RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO));
+                                                     RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER));
 
                radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 7, 0));
                radeon_emit(cs, (buffer_id_base + buffer_index) * 7);
@@ -1769,7 +1775,7 @@ static void r600_emit_constant_buffers(struct r600_context *rctx,
 
                radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
                radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
-                                                     RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO));
+                                                     RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER));
 
                dirty_mask &= ~(1 << buffer_index);
        }
@@ -1821,9 +1827,7 @@ static void r600_emit_sampler_views(struct r600_context *rctx,
 
                reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rview->tex_resource,
                                              RADEON_USAGE_READ,
-                                             rview->tex_resource->b.b.nr_samples > 1 ?
-                                                     RADEON_PRIO_SHADER_TEXTURE_MSAA :
-                                                     RADEON_PRIO_SHADER_TEXTURE_RO);
+                                             r600_get_sampler_view_priority(rview->tex_resource));
                radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
                radeon_emit(cs, reloc);
                radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
@@ -1945,7 +1949,8 @@ static void r600_emit_vertex_fetch_shader(struct r600_context *rctx, struct r600
        radeon_set_context_reg(cs, R_028894_SQ_PGM_START_FS, shader->offset >> 8);
        radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
        radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, shader->buffer,
-                                             RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA));
+                                                  RADEON_USAGE_READ,
+                                                  RADEON_PRIO_INTERNAL_SHADER));
 }
 
 static void r600_emit_shader_stages(struct r600_context *rctx, struct r600_atom *a)
@@ -1999,7 +2004,7 @@ static void r600_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a)
                radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
                radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
                                                      RADEON_USAGE_READWRITE,
-                                                     RADEON_PRIO_SHADER_RESOURCE_RW));
+                                                     RADEON_PRIO_RINGS_STREAMOUT));
                radeon_set_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE,
                                state->esgs_ring.buffer_size >> 8);
 
@@ -2008,7 +2013,7 @@ static void r600_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a)
                radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
                radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
                                                      RADEON_USAGE_READWRITE,
-                                                     RADEON_PRIO_SHADER_RESOURCE_RW));
+                                                     RADEON_PRIO_RINGS_STREAMOUT));
                radeon_set_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE,
                                state->gsvs_ring.buffer_size >> 8);
        } else {
@@ -2914,9 +2919,9 @@ static boolean r600_dma_copy_tile(struct r600_context *rctx,
                size = (cheight * pitch) / 4;
                /* emit reloc before writing cs so that cs is always in consistent state */
                radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rsrc->resource, RADEON_USAGE_READ,
-                                     RADEON_PRIO_MIN);
+                                     RADEON_PRIO_SDMA_TEXTURE);
                radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rdst->resource, RADEON_USAGE_WRITE,
-                                     RADEON_PRIO_MIN);
+                                     RADEON_PRIO_SDMA_TEXTURE);
                cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, 1, 0, size);
                cs->buf[cs->cdw++] = base >> 8;
                cs->buf[cs->cdw++] = (detile << 31) | (array_mode << 27) |
index efce852eafad825a431d4e94fe5e80c49572fe76..178005a857408e743f890d0279beb4787ed4f17d 100644 (file)
@@ -1683,7 +1683,8 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
                cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing);
                cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
                                                           (struct r600_resource*)info.indirect,
-                                                          RADEON_USAGE_READ, RADEON_PRIO_MIN);
+                                                          RADEON_USAGE_READ,
+                                                           RADEON_PRIO_DRAW_INDIRECT);
        }
 
        if (info.indexed) {
@@ -1712,7 +1713,8 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
                                cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing);
                                cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
                                                                           (struct r600_resource*)ib.buffer,
-                                                                          RADEON_USAGE_READ, RADEON_PRIO_MIN);
+                                                                          RADEON_USAGE_READ,
+                                                                           RADEON_PRIO_INDEX_BUFFER);
                        }
                        else {
                                uint32_t max_size = (ib.buffer->width0 - ib.offset) / ib.index_size;
@@ -1724,7 +1726,8 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
                                cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing);
                                cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
                                                                           (struct r600_resource*)ib.buffer,
-                                                                          RADEON_USAGE_READ, RADEON_PRIO_MIN);
+                                                                          RADEON_USAGE_READ,
+                                                                           RADEON_PRIO_INDEX_BUFFER);
 
                                cs->buf[cs->cdw++] = PKT3(EG_PKT3_INDEX_BUFFER_SIZE, 0, rctx->b.predicate_drawing);
                                cs->buf[cs->cdw++] = max_size;
@@ -1751,7 +1754,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
                        cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
                        cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
                                                                   t->buf_filled_size, RADEON_USAGE_READ,
-                                                                  RADEON_PRIO_MIN);
+                                                                  RADEON_PRIO_SO_FILLED_SIZE);
                }
 
                if (likely(!info.indirect)) {
@@ -1776,6 +1779,9 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
                struct r600_texture *rtex = (struct r600_texture *)surf->texture;
 
                rtex->dirty_level_mask |= 1 << surf->u.tex.level;
+
+               if (rtex->surface.flags & RADEON_SURF_SBUFFER)
+                       rtex->stencil_dirty_level_mask |= 1 << surf->u.tex.level;
        }
        if (rctx->framebuffer.compressed_cb_mask) {
                struct pipe_surface *surf;
@@ -1941,7 +1947,7 @@ void r600_emit_shader(struct r600_context *rctx, struct r600_atom *a)
        r600_emit_command_buffer(cs, &shader->command_buffer);
        radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
        radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, shader->bo,
-                                             RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA));
+                                             RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER));
 }
 
 unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format,
@@ -2669,7 +2675,7 @@ void r600_trace_emit(struct r600_context *rctx)
 
        va = rscreen->b.trace_bo->gpu_address;
        reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rscreen->b.trace_bo,
-                                     RADEON_USAGE_READWRITE, RADEON_PRIO_MIN);
+                                     RADEON_USAGE_READWRITE, RADEON_PRIO_TRACE);
        radeon_emit(cs, PKT3(PKT3_MEM_WRITE, 3, 0));
        radeon_emit(cs, va & 0xFFFFFFFFUL);
        radeon_emit(cs, (va >> 32UL) & 0xFFUL);
index fa40dc42a31b517676893b1d2b30ce407e666532..b5a1dafb2730a9fbd9c69dd10c667f3341c404ce 100644 (file)
@@ -65,7 +65,7 @@ static inline unsigned radeon_add_to_buffer_list(struct r600_common_context *rct
                        rctx->rings.gfx.flush(rctx, RADEON_FLUSH_ASYNC, NULL);
                }
        }
-       return rctx->ws->cs_add_reloc(ring->cs, rbo->cs_buf, usage,
+       return rctx->ws->cs_add_buffer(ring->cs, rbo->cs_buf, usage,
                                      rbo->domains, priority) * 4;
 }
 
index 08839343b74ebef3d5f6442803ec94186885dca7..7ac94caad9ffc0180620f63043cfe276ebf02dc1 100644 (file)
@@ -359,6 +359,7 @@ static const struct debug_named_value common_debug_options[] = {
        { "forcedma", DBG_FORCE_DMA, "Use asynchronous DMA for all operations when possible." },
        { "precompile", DBG_PRECOMPILE, "Compile one shader variant at shader creation." },
        { "nowc", DBG_NO_WC, "Disable GTT write combining" },
+       { "check_vm", DBG_CHECK_VM, "Check VM faults and dump debug info." },
 
        DEBUG_NAMED_VALUE_END /* must be last */
 };
index 534b987a2cc2d2842901892281c4c48ca3de48a9..b58b500bd7658a5a4d6a482a656af5cb309bd0f5 100644 (file)
@@ -98,6 +98,7 @@
 #define DBG_PRECOMPILE         (1llu << 39)
 #define DBG_INFO               (1llu << 40)
 #define DBG_NO_WC              (1llu << 41)
+#define DBG_CHECK_VM           (1llu << 42)
 
 #define R600_MAP_BUFFER_ALIGNMENT 64
 
@@ -204,6 +205,7 @@ struct r600_texture {
        unsigned                        pitch_override;
        bool                            is_depth;
        unsigned                        dirty_level_mask; /* each bit says if that mipmap is compressed */
+       unsigned                        stencil_dirty_level_mask; /* each bit says if that mipmap is compressed */
        struct r600_texture             *flushed_depth_texture;
        boolean                         is_flushing_texture;
        struct radeon_surf              surface;
@@ -622,6 +624,18 @@ static inline unsigned r600_wavefront_size(enum radeon_family family)
        }
 }
 
+static inline enum radeon_bo_priority
+r600_get_sampler_view_priority(struct r600_resource *res)
+{
+       if (res->b.b.target == PIPE_BUFFER)
+               return RADEON_PRIO_SAMPLER_BUFFER;
+
+       if (res->b.b.nr_samples > 1)
+               return RADEON_PRIO_SAMPLER_TEXTURE_MSAA;
+
+       return RADEON_PRIO_SAMPLER_TEXTURE;
+}
+
 #define COMPUTE_DBG(rscreen, fmt, args...) \
        do { \
                if ((rscreen->b.debug_flags & DBG_COMPUTE)) fprintf(stderr, fmt, ##args); \
index deeae0a6a65d87c4916762bc7e84a0db7b26a88b..9a5402583f4702ee33b9f45dca60e6e3e29091fe 100644 (file)
@@ -226,7 +226,7 @@ static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_q
                assert(0);
        }
        r600_emit_reloc(ctx, &ctx->rings.gfx, query->buffer.buf, RADEON_USAGE_WRITE,
-                       RADEON_PRIO_MIN);
+                       RADEON_PRIO_QUERY);
 
        if (r600_is_timer_query(query->type))
                ctx->num_cs_dw_timer_queries_suspend += query->num_cs_dw;
@@ -288,7 +288,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que
                assert(0);
        }
        r600_emit_reloc(ctx, &ctx->rings.gfx, query->buffer.buf, RADEON_USAGE_WRITE,
-                       RADEON_PRIO_MIN);
+                       RADEON_PRIO_QUERY);
 
        query->buffer.results_end += query->result_size;
 
@@ -344,7 +344,7 @@ static void r600_emit_query_predication(struct r600_common_context *ctx, struct
                                radeon_emit(cs, va + results_base);
                                radeon_emit(cs, op | (((va + results_base) >> 32) & 0xFF));
                                r600_emit_reloc(ctx, &ctx->rings.gfx, qbuf->buf, RADEON_USAGE_READ,
-                                               RADEON_PRIO_MIN);
+                                               RADEON_PRIO_QUERY);
                                results_base += query->result_size;
        
                                /* set CONTINUE bit for all packets except the first */
@@ -990,7 +990,8 @@ void r600_query_init_backend_mask(struct r600_common_context *ctx)
                radeon_emit(cs, buffer->gpu_address);
                radeon_emit(cs, buffer->gpu_address >> 32);
 
-               r600_emit_reloc(ctx, &ctx->rings.gfx, buffer, RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
+               r600_emit_reloc(ctx, &ctx->rings.gfx, buffer,
+                                RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
 
                /* analyze results */
                results = r600_buffer_map_sync_with_rings(ctx, buffer, PIPE_TRANSFER_READ);
index 5198f1e041dba8233c701d720d8834fd2f51caa1..33403b572af484e69ebe96853bbc06096f6dd65f 100644 (file)
@@ -217,7 +217,7 @@ static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r
                        radeon_emit(cs, va >> 8);                       /* BUFFER_BASE */
 
                        r600_emit_reloc(rctx, &rctx->rings.gfx, r600_resource(t[i]->b.buffer),
-                                       RADEON_USAGE_WRITE, RADEON_PRIO_SHADER_RESOURCE_RW);
+                                       RADEON_USAGE_WRITE, RADEON_PRIO_RINGS_STREAMOUT);
 
                        /* R7xx requires this packet after updating BUFFER_BASE.
                         * Without this, R7xx locks up. */
@@ -227,7 +227,7 @@ static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r
                                radeon_emit(cs, va >> 8);
 
                                r600_emit_reloc(rctx, &rctx->rings.gfx, r600_resource(t[i]->b.buffer),
-                                               RADEON_USAGE_WRITE, RADEON_PRIO_SHADER_RESOURCE_RW);
+                                               RADEON_USAGE_WRITE, RADEON_PRIO_RINGS_STREAMOUT);
                        }
                }
 
@@ -245,7 +245,7 @@ static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r
                        radeon_emit(cs, va >> 32); /* src address hi */
 
                        r600_emit_reloc(rctx,  &rctx->rings.gfx, t[i]->buf_filled_size,
-                                       RADEON_USAGE_READ, RADEON_PRIO_MIN);
+                                       RADEON_USAGE_READ, RADEON_PRIO_SO_FILLED_SIZE);
                } else {
                        /* Start from the beginning. */
                        radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
@@ -289,7 +289,7 @@ void r600_emit_streamout_end(struct r600_common_context *rctx)
                radeon_emit(cs, 0); /* unused */
 
                r600_emit_reloc(rctx,  &rctx->rings.gfx, t[i]->buf_filled_size,
-                               RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
+                               RADEON_USAGE_WRITE, RADEON_PRIO_SO_FILLED_SIZE);
 
                /* Zero the buffer size. The counters (primitives generated,
                 * primitives emitted) may be enabled even if there is not
index 81f3f45db9f17f50cc468d859975706f866499f2..c3ac7e7f2ef9f0123890d74acaeb04e82b126909 100644 (file)
@@ -110,8 +110,8 @@ static void send_cmd(struct ruvd_decoder *dec, unsigned cmd,
 {
        int reloc_idx;
 
-       reloc_idx = dec->ws->cs_add_reloc(dec->cs, cs_buf, usage, domain,
-                                         RADEON_PRIO_MIN);
+       reloc_idx = dec->ws->cs_add_buffer(dec->cs, cs_buf, usage, domain,
+                                         RADEON_PRIO_UVD);
        if (!dec->use_legacy) {
                uint64_t addr;
                addr = dec->ws->buffer_get_virtual_address(cs_buf);
index 7eab974a3df94df08b359aa11e60fa2d4e872439..0dac6fbbdce8d7caa800ecb0fcf9b1faf9194092 100644 (file)
@@ -516,7 +516,7 @@ void rvce_add_buffer(struct rvce_encoder *enc, struct radeon_winsys_cs_handle *b
 {
        int reloc_idx;
 
-       reloc_idx = enc->ws->cs_add_reloc(enc->cs, buf, usage, domain, RADEON_PRIO_MIN);
+       reloc_idx = enc->ws->cs_add_buffer(enc->cs, buf, usage, domain, RADEON_PRIO_VCE);
        if (enc->use_vm) {
                uint64_t addr;
                addr = enc->ws->buffer_get_virtual_address(buf);
index 00accd5b3e6fbf6428d259108fbaba9460f0c5c0..b91e1adf41d00367632dabd1960aec6c40be6e9c 100644 (file)
@@ -178,20 +178,59 @@ enum radeon_value_id {
     RADEON_GPU_RESET_COUNTER, /* DRM 2.43.0 */
 };
 
+/* Each group of four has the same priority. */
 enum radeon_bo_priority {
-    RADEON_PRIO_MIN,
-    RADEON_PRIO_SHADER_DATA, /* shader code, resource descriptors */
-    RADEON_PRIO_SHADER_BUFFER_RO, /* read-only */
-    RADEON_PRIO_SHADER_TEXTURE_RO, /* read-only */
-    RADEON_PRIO_SHADER_RESOURCE_RW, /* buffers, textures, streamout, GS rings, RATs; read/write */
-    RADEON_PRIO_COLOR_BUFFER,
-    RADEON_PRIO_DEPTH_BUFFER,
-    RADEON_PRIO_SHADER_TEXTURE_MSAA,
-    RADEON_PRIO_COLOR_BUFFER_MSAA,
-    RADEON_PRIO_DEPTH_BUFFER_MSAA,
-    RADEON_PRIO_COLOR_META,
-    RADEON_PRIO_DEPTH_META,
-    RADEON_PRIO_MAX /* must be <= 15 */
+    RADEON_PRIO_FENCE = 0,
+    RADEON_PRIO_TRACE,
+    RADEON_PRIO_SO_FILLED_SIZE,
+    RADEON_PRIO_QUERY,
+
+    RADEON_PRIO_IB1 = 4, /* main IB submitted to the kernel */
+    RADEON_PRIO_IB2, /* IB executed with INDIRECT_BUFFER */
+    RADEON_PRIO_DRAW_INDIRECT,
+    RADEON_PRIO_INDEX_BUFFER,
+
+    RADEON_PRIO_CP_DMA = 8,
+
+    RADEON_PRIO_VCE = 12,
+    RADEON_PRIO_UVD,
+    RADEON_PRIO_SDMA_BUFFER,
+    RADEON_PRIO_SDMA_TEXTURE,
+
+    RADEON_PRIO_USER_SHADER = 16,
+    RADEON_PRIO_INTERNAL_SHADER, /* fetch shader, etc. */
+
+    /* gap: 20 */
+
+    RADEON_PRIO_CONST_BUFFER = 24,
+    RADEON_PRIO_DESCRIPTORS,
+    RADEON_PRIO_BORDER_COLORS,
+
+    RADEON_PRIO_SAMPLER_BUFFER = 28,
+    RADEON_PRIO_VERTEX_BUFFER,
+
+    RADEON_PRIO_SHADER_RW_BUFFER = 32,
+    RADEON_PRIO_RINGS_STREAMOUT,
+    RADEON_PRIO_SCRATCH_BUFFER,
+    RADEON_PRIO_COMPUTE_GLOBAL,
+
+    RADEON_PRIO_SAMPLER_TEXTURE = 36,
+    RADEON_PRIO_SHADER_RW_IMAGE,
+
+    RADEON_PRIO_SAMPLER_TEXTURE_MSAA = 40,
+
+    RADEON_PRIO_COLOR_BUFFER = 44,
+
+    RADEON_PRIO_DEPTH_BUFFER = 48,
+
+    RADEON_PRIO_COLOR_BUFFER_MSAA = 52,
+
+    RADEON_PRIO_DEPTH_BUFFER_MSAA = 56,
+
+    RADEON_PRIO_CMASK = 60,
+    RADEON_PRIO_DCC,
+    RADEON_PRIO_HTILE,
+    /* 63 is the maximum value */
 };
 
 struct winsys_handle;
@@ -329,6 +368,12 @@ struct radeon_surf {
     uint32_t                    num_banks;
 };
 
+struct radeon_bo_list_item {
+    struct pb_buffer *buf;
+    uint64_t vm_address;
+    uint64_t priority_usage; /* mask of (1 << RADEON_PRIO_*) */
+};
+
 struct radeon_winsys {
     /**
      * The screen object this winsys was created for
@@ -556,18 +601,17 @@ struct radeon_winsys {
     void (*cs_destroy)(struct radeon_winsys_cs *cs);
 
     /**
-     * Add a new buffer relocation. Every relocation must first be added
-     * before it can be written.
+     * Add a buffer. Each buffer used by a CS must be added using this function.
      *
-     * \param cs  A command stream to add buffer for validation against.
-     * \param buf A winsys buffer to validate.
+     * \param cs      Command stream
+     * \param buf     Buffer
      * \param usage   Whether the buffer is used for read and/or write.
      * \param domain  Bitmask of the RADEON_DOMAIN_* flags.
      * \param priority  A higher number means a greater chance of being
      *                  placed in the requested domain. 15 is the maximum.
-     * \return Relocation index.
+     * \return Buffer index.
      */
-    unsigned (*cs_add_reloc)(struct radeon_winsys_cs *cs,
+    unsigned (*cs_add_buffer)(struct radeon_winsys_cs *cs,
                              struct radeon_winsys_cs_handle *buf,
                              enum radeon_bo_usage usage,
                              enum radeon_bo_domain domain,
@@ -580,21 +624,21 @@ struct radeon_winsys {
      * \param buf       Buffer
      * \return          The buffer index, or -1 if the buffer has not been added.
      */
-    int (*cs_get_reloc)(struct radeon_winsys_cs *cs,
-                        struct radeon_winsys_cs_handle *buf);
+    int (*cs_lookup_buffer)(struct radeon_winsys_cs *cs,
+                            struct radeon_winsys_cs_handle *buf);
 
     /**
-     * Return TRUE if there is enough memory in VRAM and GTT for the relocs
-     * added so far. If the validation fails, all the relocations which have
+     * Return TRUE if there is enough memory in VRAM and GTT for the buffers
+     * added so far. If the validation fails, all buffers which have
      * been added since the last call of cs_validate will be removed and
-     * the CS will be flushed (provided there are still any relocations).
+     * the CS will be flushed (provided there are still any buffers).
      *
      * \param cs        A command stream to validate.
      */
     boolean (*cs_validate)(struct radeon_winsys_cs *cs);
 
     /**
-     * Return TRUE if there is enough memory in VRAM and GTT for the relocs
+     * Return TRUE if there is enough memory in VRAM and GTT for the buffers
      * added so far.
      *
      * \param cs        A command stream to validate.
@@ -603,6 +647,16 @@ struct radeon_winsys {
      */
     boolean (*cs_memory_below_limit)(struct radeon_winsys_cs *cs, uint64_t vram, uint64_t gtt);
 
+    /**
+     * Return the buffer list.
+     *
+     * \param cs    Command stream
+     * \param list  Returned buffer list. Set to NULL to query the count only.
+     * \return      The buffer count.
+     */
+    unsigned (*cs_get_buffer_list)(struct radeon_winsys_cs *cs,
+                                   struct radeon_bo_list_item *list);
+
     /**
      * Flush a command stream.
      *
index 8b0ce9f1bb8c4ef91bfd106ac13e827e7d691d2c..6454b8ce8c045df9161f67c96eeac65f1a939c9d 100644 (file)
@@ -62,9 +62,9 @@ static void cik_sdma_do_copy_buffer(struct si_context *ctx,
        r600_need_dma_space(&ctx->b, ncopy * 7);
 
        radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
-                             RADEON_PRIO_MIN);
+                             RADEON_PRIO_SDMA_BUFFER);
        radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
-                             RADEON_PRIO_MIN);
+                             RADEON_PRIO_SDMA_BUFFER);
 
        for (i = 0; i < ncopy; i++) {
                csize = size < CIK_SDMA_COPY_MAX_SIZE ? size : CIK_SDMA_COPY_MAX_SIZE;
@@ -172,9 +172,9 @@ static void cik_sdma_copy_tile(struct si_context *ctx,
        r600_need_dma_space(&ctx->b, ncopy * 12);
 
        radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, &rsrc->resource,
-                             RADEON_USAGE_READ, RADEON_PRIO_MIN);
+                             RADEON_USAGE_READ, RADEON_PRIO_SDMA_TEXTURE);
        radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, &rdst->resource,
-                             RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
+                             RADEON_USAGE_WRITE, RADEON_PRIO_SDMA_TEXTURE);
 
        copy_height = size * 4 / pitch;
        for (i = 0; i < ncopy; i++) {
@@ -242,7 +242,7 @@ void cik_sdma_copy(struct pipe_context *ctx,
 
        if (src->format != dst->format ||
            rdst->surface.nsamples > 1 || rsrc->surface.nsamples > 1 ||
-           rdst->dirty_level_mask & (1 << dst_level)) {
+           (rdst->dirty_level_mask | rdst->stencil_dirty_level_mask) & (1 << dst_level)) {
                goto fallback;
        }
 
index 93fa67a953ec313851b05f8edb10097daca84578..d5c5db30029d5d560b2384fe6f6c7acf8c0de78d 100644 (file)
@@ -180,19 +180,27 @@ static void si_blit_decompress_depth(struct pipe_context *ctx,
 
 static void si_blit_decompress_depth_in_place(struct si_context *sctx,
                                               struct r600_texture *texture,
+                                             bool is_stencil_sampler,
                                               unsigned first_level, unsigned last_level,
                                               unsigned first_layer, unsigned last_layer)
 {
        struct pipe_surface *zsurf, surf_tmpl = {{0}};
        unsigned layer, max_layer, checked_last_layer, level;
-
-       sctx->db_inplace_flush_enabled = true;
+       unsigned *dirty_level_mask;
+
+       if (is_stencil_sampler) {
+               sctx->db_flush_stencil_inplace = true;
+               dirty_level_mask = &texture->stencil_dirty_level_mask;
+       } else {
+               sctx->db_flush_depth_inplace = true;
+               dirty_level_mask = &texture->dirty_level_mask;
+       }
        si_mark_atom_dirty(sctx, &sctx->db_render_state);
 
        surf_tmpl.format = texture->resource.b.b.format;
 
        for (level = first_level; level <= last_level; level++) {
-               if (!(texture->dirty_level_mask & (1 << level)))
+               if (!(*dirty_level_mask & (1 << level)))
                        continue;
 
                surf_tmpl.u.tex.level = level;
@@ -220,11 +228,12 @@ static void si_blit_decompress_depth_in_place(struct si_context *sctx,
                /* The texture will always be dirty if some layers aren't flushed.
                 * I don't think this case occurs often though. */
                if (first_layer == 0 && last_layer == max_layer) {
-                       texture->dirty_level_mask &= ~(1 << level);
+                       *dirty_level_mask &= ~(1 << level);
                }
        }
 
-       sctx->db_inplace_flush_enabled = false;
+       sctx->db_flush_depth_inplace = false;
+       sctx->db_flush_stencil_inplace = false;
        si_mark_atom_dirty(sctx, &sctx->db_render_state);
 }
 
@@ -236,17 +245,20 @@ void si_flush_depth_textures(struct si_context *sctx,
 
        while (mask) {
                struct pipe_sampler_view *view;
+               struct si_sampler_view *sview;
                struct r600_texture *tex;
 
                i = u_bit_scan(&mask);
 
                view = textures->views.views[i];
                assert(view);
+               sview = (struct si_sampler_view*)view;
 
                tex = (struct r600_texture *)view->texture;
                assert(tex->is_depth && !tex->is_flushing_texture);
 
                si_blit_decompress_depth_in_place(sctx, tex,
+                                                 sview->is_stencil_sampler,
                                                  view->u.tex.first_level, view->u.tex.last_level,
                                                  0, util_max_layer(&tex->resource.b.b, view->u.tex.first_level));
        }
@@ -436,9 +448,13 @@ static void si_decompress_subresource(struct pipe_context *ctx,
        struct r600_texture *rtex = (struct r600_texture*)tex;
 
        if (rtex->is_depth && !rtex->is_flushing_texture) {
-               si_blit_decompress_depth_in_place(sctx, rtex,
+               si_blit_decompress_depth_in_place(sctx, rtex, false,
                                                  level, level,
                                                  first_layer, last_layer);
+               if (rtex->surface.flags & RADEON_SURF_SBUFFER)
+                       si_blit_decompress_depth_in_place(sctx, rtex, true,
+                                                         level, level,
+                                                         first_layer, last_layer);
        } else if (rtex->fmask.size || rtex->cmask.size) {
                si_blit_decompress_color(ctx, rtex, level, level,
                                         first_layer, last_layer);
index e1849bad9333eb557f3d5f29ee8640c8e8681ae8..c660534677154f46f3b7e7c591cff4769d52c1a4 100644 (file)
@@ -297,7 +297,7 @@ static void si_launch_grid(
                radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
                                          shader->scratch_bo,
                                          RADEON_USAGE_READWRITE,
-                                         RADEON_PRIO_SHADER_RESOURCE_RW);
+                                         RADEON_PRIO_SCRATCH_BUFFER);
 
                scratch_buffer_va = shader->scratch_bo->gpu_address;
        }
@@ -311,7 +311,7 @@ static void si_launch_grid(
        kernel_args_va += kernel_args_offset;
 
        radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, input_buffer,
-                                 RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
+                                 RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER);
 
        si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0, kernel_args_va);
        si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 + 4, S_008F04_BASE_ADDRESS_HI (kernel_args_va >> 32) | S_008F04_STRIDE(0));
@@ -340,7 +340,7 @@ static void si_launch_grid(
                }
                radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, buffer,
                                          RADEON_USAGE_READWRITE,
-                                         RADEON_PRIO_SHADER_RESOURCE_RW);
+                                         RADEON_PRIO_COMPUTE_GLOBAL);
        }
 
        /* This register has been moved to R_00CD20_COMPUTE_MAX_WAVE_ID
@@ -362,7 +362,7 @@ static void si_launch_grid(
        shader_va += pc;
 #endif
        radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, shader->bo,
-                                 RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
+                                 RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
        si_pm4_set_reg(pm4, R_00B830_COMPUTE_PGM_LO, shader_va >> 8);
        si_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, shader_va >> 40);
 
index 32ab6a9dcbfbc9158cb068074a6492ec170c63f4..d4bd7b28cf30b88514a2527479f690a93560e3f1 100644 (file)
@@ -160,7 +160,7 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
                /* This must be done after need_cs_space. */
                radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
                                      (struct r600_resource*)dst, RADEON_USAGE_WRITE,
-                                     RADEON_PRIO_MIN);
+                                     RADEON_PRIO_CP_DMA);
 
                /* Flush the caches for the first copy only.
                 * Also wait for the previous CP DMA operations. */
@@ -240,9 +240,9 @@ void si_copy_buffer(struct si_context *sctx,
 
                /* This must be done after r600_need_cs_space. */
                radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)src,
-                                     RADEON_USAGE_READ, RADEON_PRIO_MIN);
+                                     RADEON_USAGE_READ, RADEON_PRIO_CP_DMA);
                radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)dst,
-                                     RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
+                                     RADEON_USAGE_WRITE, RADEON_PRIO_CP_DMA);
 
                si_emit_cp_dma_copy_buffer(sctx, dst_offset, src_offset, byte_count, sync_flags);
 
index d3fd201ae262d70fbece18eeae141dcc75e0aada..7d41e8d00e05a7651c1e6239e444a1a58003711d 100644 (file)
@@ -28,6 +28,7 @@
 #include "si_shader.h"
 #include "sid.h"
 #include "sid_tables.h"
+#include "ddebug/dd_util.h"
 
 
 static void si_dump_shader(struct si_shader_selector *sel, const char *name,
@@ -392,6 +393,141 @@ static void si_dump_debug_registers(struct si_context *sctx, FILE *f)
        fprintf(f, "\n");
 }
 
+static void si_dump_last_ib(struct si_context *sctx, FILE *f)
+{
+       int last_trace_id = -1;
+
+       if (!sctx->last_ib)
+               return;
+
+       if (sctx->last_trace_buf) {
+               /* We are expecting that the ddebug pipe has already
+                * waited for the context, so this buffer should be idle.
+                * If the GPU is hung, there is no point in waiting for it.
+                */
+               uint32_t *map = sctx->b.ws->buffer_map(sctx->last_trace_buf->cs_buf,
+                                                      NULL,
+                                                      PIPE_TRANSFER_UNSYNCHRONIZED |
+                                                      PIPE_TRANSFER_READ);
+               if (map)
+                       last_trace_id = *map;
+       }
+
+       si_parse_ib(f, sctx->last_ib, sctx->last_ib_dw_size,
+                   last_trace_id);
+       free(sctx->last_ib); /* dump only once */
+       sctx->last_ib = NULL;
+       r600_resource_reference(&sctx->last_trace_buf, NULL);
+}
+
+static const char *priority_to_string(enum radeon_bo_priority priority)
+{
+#define ITEM(x) [RADEON_PRIO_##x] = #x
+       static const char *table[64] = {
+               ITEM(FENCE),
+               ITEM(TRACE),
+               ITEM(SO_FILLED_SIZE),
+               ITEM(QUERY),
+               ITEM(IB1),
+               ITEM(IB2),
+               ITEM(DRAW_INDIRECT),
+               ITEM(INDEX_BUFFER),
+               ITEM(CP_DMA),
+               ITEM(VCE),
+               ITEM(UVD),
+               ITEM(SDMA_BUFFER),
+               ITEM(SDMA_TEXTURE),
+               ITEM(USER_SHADER),
+               ITEM(INTERNAL_SHADER),
+               ITEM(CONST_BUFFER),
+               ITEM(DESCRIPTORS),
+               ITEM(BORDER_COLORS),
+               ITEM(SAMPLER_BUFFER),
+               ITEM(VERTEX_BUFFER),
+               ITEM(SHADER_RW_BUFFER),
+               ITEM(RINGS_STREAMOUT),
+               ITEM(SCRATCH_BUFFER),
+               ITEM(COMPUTE_GLOBAL),
+               ITEM(SAMPLER_TEXTURE),
+               ITEM(SHADER_RW_IMAGE),
+               ITEM(SAMPLER_TEXTURE_MSAA),
+               ITEM(COLOR_BUFFER),
+               ITEM(DEPTH_BUFFER),
+               ITEM(COLOR_BUFFER_MSAA),
+               ITEM(DEPTH_BUFFER_MSAA),
+               ITEM(CMASK),
+               ITEM(DCC),
+               ITEM(HTILE),
+       };
+#undef ITEM
+
+       assert(priority < ARRAY_SIZE(table));
+       return table[priority];
+}
+
+static int bo_list_compare_va(const struct radeon_bo_list_item *a,
+                                  const struct radeon_bo_list_item *b)
+{
+       return a->vm_address < b->vm_address ? -1 :
+              a->vm_address > b->vm_address ? 1 : 0;
+}
+
+static void si_dump_last_bo_list(struct si_context *sctx, FILE *f)
+{
+       unsigned i,j;
+
+       if (!sctx->last_bo_list)
+               return;
+
+       /* Sort the list according to VM adddresses first. */
+       qsort(sctx->last_bo_list, sctx->last_bo_count,
+             sizeof(sctx->last_bo_list[0]), (void*)bo_list_compare_va);
+
+       fprintf(f, "Buffer list (in units of pages = 4kB):\n"
+               COLOR_YELLOW "        Size    VM start page         "
+               "VM end page           Usage" COLOR_RESET "\n");
+
+       for (i = 0; i < sctx->last_bo_count; i++) {
+               /* Note: Buffer sizes are expected to be aligned to 4k by the winsys. */
+               const unsigned page_size = 4096;
+               uint64_t va = sctx->last_bo_list[i].vm_address;
+               uint64_t size = sctx->last_bo_list[i].buf->size;
+               bool hit = false;
+
+               /* If there's unused virtual memory between 2 buffers, print it. */
+               if (i) {
+                       uint64_t previous_va_end = sctx->last_bo_list[i-1].vm_address +
+                                                  sctx->last_bo_list[i-1].buf->size;
+
+                       if (va > previous_va_end) {
+                               fprintf(f, "  %10"PRIu64"    -- hole --\n",
+                                       (va - previous_va_end) / page_size);
+                       }
+               }
+
+               /* Print the buffer. */
+               fprintf(f, "  %10"PRIu64"    0x%013"PRIx64"       0x%013"PRIx64"       ",
+                       size / page_size, va / page_size, (va + size) / page_size);
+
+               /* Print the usage. */
+               for (j = 0; j < 64; j++) {
+                       if (!(sctx->last_bo_list[i].priority_usage & (1llu << j)))
+                               continue;
+
+                       fprintf(f, "%s%s", !hit ? "" : ", ", priority_to_string(j));
+                       hit = true;
+               }
+               fprintf(f, "\n");
+       }
+       fprintf(f, "\nNote: The holes represent memory not used by the IB.\n"
+                  "      Other buffers can still be allocated there.\n\n");
+
+       for (i = 0; i < sctx->last_bo_count; i++)
+               pb_reference(&sctx->last_bo_list[i].buf, NULL);
+       free(sctx->last_bo_list);
+       sctx->last_bo_list = NULL;
+}
+
 static void si_dump_debug_state(struct pipe_context *ctx, FILE *f,
                                unsigned flags)
 {
@@ -406,34 +542,126 @@ static void si_dump_debug_state(struct pipe_context *ctx, FILE *f,
        si_dump_shader(sctx->gs_shader, "Geometry", f);
        si_dump_shader(sctx->ps_shader, "Fragment", f);
 
-       if (sctx->last_ib) {
-               int last_trace_id = -1;
+       si_dump_last_bo_list(sctx, f);
+       si_dump_last_ib(sctx, f);
 
-               if (sctx->last_trace_buf) {
-                       /* We are expecting that the ddebug pipe has already
-                        * waited for the context, so this buffer should be idle.
-                        * If the GPU is hung, there is no point in waiting for it.
-                        */
-                       uint32_t *map =
-                               sctx->b.ws->buffer_map(sctx->last_trace_buf->cs_buf,
-                                                      NULL,
-                                                      PIPE_TRANSFER_UNSYNCHRONIZED |
-                                                      PIPE_TRANSFER_READ);
-                       if (map)
-                               last_trace_id = *map;
+       fprintf(f, "Done.\n");
+}
+
+static bool si_vm_fault_occured(struct si_context *sctx, uint32_t *out_addr)
+{
+       char line[2000];
+       unsigned sec, usec;
+       int progress = 0;
+       uint64_t timestamp = 0;
+       bool fault = false;
+
+       FILE *p = popen("dmesg", "r");
+       if (!p)
+               return false;
+
+       while (fgets(line, sizeof(line), p)) {
+               char *msg, len;
+
+               /* Get the timestamp. */
+               if (sscanf(line, "[%u.%u]", &sec, &usec) != 2) {
+                       assert(0);
+                       continue;
                }
+               timestamp = sec * 1000000llu + usec;
+
+               /* If just updating the timestamp. */
+               if (!out_addr)
+                       continue;
+
+               /* Process messages only if the timestamp is newer. */
+               if (timestamp <= sctx->dmesg_timestamp)
+                       continue;
+
+               /* Only process the first VM fault. */
+               if (fault)
+                       continue;
+
+               /* Remove trailing \n */
+               len = strlen(line);
+               if (len && line[len-1] == '\n')
+                       line[len-1] = 0;
+
+               /* Get the message part. */
+               msg = strchr(line, ']');
+               if (!msg) {
+                       assert(0);
+                       continue;
+               }
+               msg++;
 
-               si_parse_ib(f, sctx->last_ib, sctx->last_ib_dw_size,
-                           last_trace_id);
-               free(sctx->last_ib); /* dump only once */
-               sctx->last_ib = NULL;
-               r600_resource_reference(&sctx->last_trace_buf, NULL);
+               switch (progress) {
+               case 0:
+                       if (strstr(msg, "GPU fault detected:"))
+                               progress = 1;
+                       break;
+               case 1:
+                       msg = strstr(msg, "VM_CONTEXT1_PROTECTION_FAULT_ADDR");
+                       if (msg) {
+                               msg = strstr(msg, "0x");
+                               if (msg) {
+                                       msg += 2;
+                                       if (sscanf(msg, "%X", out_addr) == 1)
+                                               fault = true;
+                               }
+                       }
+                       progress = 0;
+                       break;
+               default:
+                       progress = 0;
+               }
        }
+       pclose(p);
 
-       fprintf(f, "Done.\n");
+       if (timestamp > sctx->dmesg_timestamp)
+               sctx->dmesg_timestamp = timestamp;
+       return fault;
+}
+
+void si_check_vm_faults(struct si_context *sctx)
+{
+       struct pipe_screen *screen = sctx->b.b.screen;
+       FILE *f;
+       uint32_t addr;
+
+       /* Use conservative timeout 800ms, after which we won't wait any
+        * longer and assume the GPU is hung.
+        */
+       screen->fence_finish(screen, sctx->last_gfx_fence, 800*1000*1000);
+
+       if (!si_vm_fault_occured(sctx, &addr))
+               return;
+
+       f = dd_get_debug_file();
+       if (!f)
+               return;
+
+       fprintf(f, "VM fault report.\n\n");
+       fprintf(f, "Driver vendor: %s\n", screen->get_vendor(screen));
+       fprintf(f, "Device vendor: %s\n", screen->get_device_vendor(screen));
+       fprintf(f, "Device name: %s\n\n", screen->get_name(screen));
+       fprintf(f, "Failing VM page: 0x%08x\n\n", addr);
+
+       si_dump_last_bo_list(sctx, f);
+       si_dump_last_ib(sctx, f);
+       fclose(f);
+
+       fprintf(stderr, "Detected a VM fault, exiting...\n");
+       exit(0);
 }
 
 void si_init_debug_functions(struct si_context *sctx)
 {
        sctx->b.b.dump_debug_state = si_dump_debug_state;
+
+       /* Set the initial dmesg timestamp for this context, so that
+        * only new messages will be checked for VM faults.
+        */
+       if (sctx->screen->b.debug_flags & DBG_CHECK_VM)
+               si_vm_fault_occured(sctx, NULL);
 }
index b07ab3b94acaee00fef45ef724108592ff1aec3a..19dd14f9b6fd953d116867c39bf6e3970704a19d 100644 (file)
@@ -118,7 +118,7 @@ static bool si_upload_descriptors(struct si_context *sctx,
        util_memcpy_cpu_to_le32(ptr, desc->list, list_size);
 
        radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, desc->buffer,
-                             RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
+                             RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
 
        desc->list_dirty = false;
        desc->pointer_dirty = true;
@@ -138,23 +138,12 @@ static void si_release_sampler_views(struct si_sampler_views *views)
        si_release_descriptors(&views->desc);
 }
 
-static enum radeon_bo_priority si_get_resource_ro_priority(struct r600_resource *res)
-{
-       if (res->b.b.target == PIPE_BUFFER)
-               return RADEON_PRIO_SHADER_BUFFER_RO;
-
-       if (res->b.b.nr_samples > 1)
-               return RADEON_PRIO_SHADER_TEXTURE_MSAA;
-
-       return RADEON_PRIO_SHADER_TEXTURE_RO;
-}
-
 static void si_sampler_views_begin_new_cs(struct si_context *sctx,
                                          struct si_sampler_views *views)
 {
        uint64_t mask = views->desc.enabled_mask;
 
-       /* Add relocations to the CS. */
+       /* Add buffers to the CS. */
        while (mask) {
                int i = u_bit_scan64(&mask);
                struct si_sampler_view *rview =
@@ -165,13 +154,13 @@ static void si_sampler_views_begin_new_cs(struct si_context *sctx,
 
                radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
                                      rview->resource, RADEON_USAGE_READ,
-                                     si_get_resource_ro_priority(rview->resource));
+                                     r600_get_sampler_view_priority(rview->resource));
        }
 
        if (!views->desc.buffer)
                return;
        radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, views->desc.buffer,
-                             RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA);
+                             RADEON_USAGE_READWRITE, RADEON_PRIO_DESCRIPTORS);
 }
 
 static void si_set_sampler_view(struct si_context *sctx, unsigned shader,
@@ -190,7 +179,7 @@ static void si_set_sampler_view(struct si_context *sctx, unsigned shader,
                if (rview->resource)
                        radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
                                rview->resource, RADEON_USAGE_READ,
-                               si_get_resource_ro_priority(rview->resource));
+                               r600_get_sampler_view_priority(rview->resource));
 
                pipe_sampler_view_reference(&views->views[slot], view);
                memcpy(views->desc.list + slot*8, view_desc, 8*4);
@@ -270,7 +259,7 @@ static void si_sampler_states_begin_new_cs(struct si_context *sctx,
        if (!states->desc.buffer)
                return;
        radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, states->desc.buffer,
-                             RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA);
+                             RADEON_USAGE_READWRITE, RADEON_PRIO_DESCRIPTORS);
 }
 
 static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader,
@@ -335,7 +324,7 @@ static void si_buffer_resources_begin_new_cs(struct si_context *sctx,
 {
        uint64_t mask = buffers->desc.enabled_mask;
 
-       /* Add relocations to the CS. */
+       /* Add buffers to the CS. */
        while (mask) {
                int i = u_bit_scan64(&mask);
 
@@ -348,7 +337,7 @@ static void si_buffer_resources_begin_new_cs(struct si_context *sctx,
                return;
        radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
                              buffers->desc.buffer, RADEON_USAGE_READWRITE,
-                             RADEON_PRIO_SHADER_DATA);
+                             RADEON_PRIO_DESCRIPTORS);
 }
 
 /* VERTEX BUFFERS */
@@ -369,14 +358,14 @@ static void si_vertex_buffers_begin_new_cs(struct si_context *sctx)
 
                radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
                                      (struct r600_resource*)sctx->vertex_buffer[vb].buffer,
-                                     RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO);
+                                     RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER);
        }
 
        if (!desc->buffer)
                return;
        radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
                              desc->buffer, RADEON_USAGE_READ,
-                             RADEON_PRIO_SHADER_DATA);
+                             RADEON_PRIO_DESCRIPTORS);
 }
 
 static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
@@ -403,7 +392,7 @@ static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
 
        radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
                              desc->buffer, RADEON_USAGE_READ,
-                             RADEON_PRIO_SHADER_DATA);
+                             RADEON_PRIO_DESCRIPTORS);
 
        assert(count <= SI_NUM_VERTEX_BUFFERS);
 
@@ -447,7 +436,7 @@ static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
                if (!bound[ve->vertex_buffer_index]) {
                        radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
                                              (struct r600_resource*)vb->buffer,
-                                             RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO);
+                                             RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER);
                        bound[ve->vertex_buffer_index] = true;
                }
        }
@@ -870,7 +859,7 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
 
                                radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
                                                      rbuffer, RADEON_USAGE_READ,
-                                                     RADEON_PRIO_SHADER_BUFFER_RO);
+                                                     RADEON_PRIO_SAMPLER_BUFFER);
                        }
                }
        }
@@ -1017,10 +1006,10 @@ void si_init_all_descriptors(struct si_context *sctx)
        for (i = 0; i < SI_NUM_SHADERS; i++) {
                si_init_buffer_resources(&sctx->const_buffers[i],
                                         SI_NUM_CONST_BUFFERS, SI_SGPR_CONST,
-                                        RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO);
+                                        RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER);
                si_init_buffer_resources(&sctx->rw_buffers[i],
                                         SI_NUM_RW_BUFFERS, SI_SGPR_RW_BUFFERS,
-                                        RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RESOURCE_RW);
+                                        RADEON_USAGE_READWRITE, RADEON_PRIO_RINGS_STREAMOUT);
 
                si_init_descriptors(&sctx->samplers[i].views.desc,
                                    SI_SGPR_RESOURCE, 8, SI_NUM_SAMPLER_VIEWS);
index 309ae04424a652638676e1cf0eafa3ace45a6027..31b0b41e5a41c002192834be12bf164422097bd8 100644 (file)
@@ -79,9 +79,9 @@ static void si_dma_copy_buffer(struct si_context *ctx,
        r600_need_dma_space(&ctx->b, ncopy * 5);
 
        radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
-                             RADEON_PRIO_MIN);
+                             RADEON_PRIO_SDMA_BUFFER);
        radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
-                             RADEON_PRIO_MIN);
+                             RADEON_PRIO_SDMA_BUFFER);
 
        for (i = 0; i < ncopy; i++) {
                csize = size < max_csize ? size : max_csize;
@@ -178,9 +178,9 @@ static void si_dma_copy_tile(struct si_context *ctx,
        r600_need_dma_space(&ctx->b, ncopy * 9);
 
        radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, &rsrc->resource,
-                             RADEON_USAGE_READ, RADEON_PRIO_MIN);
+                             RADEON_USAGE_READ, RADEON_PRIO_SDMA_TEXTURE);
        radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, &rdst->resource,
-                             RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
+                             RADEON_USAGE_WRITE, RADEON_PRIO_SDMA_TEXTURE);
 
        for (i = 0; i < ncopy; i++) {
                cheight = copy_height;
@@ -246,13 +246,13 @@ void si_dma_copy(struct pipe_context *ctx,
        goto fallback;
 
        if (src->format != dst->format || src_box->depth > 1 ||
-           rdst->dirty_level_mask != 0 ||
+           (rdst->dirty_level_mask | rdst->stencil_dirty_level_mask) & (1 << dst_level) ||
            rdst->cmask.size || rdst->fmask.size ||
            rsrc->cmask.size || rsrc->fmask.size) {
                goto fallback;
        }
 
-       if (rsrc->dirty_level_mask) {
+       if (rsrc->dirty_level_mask & (1 << src_level)) {
                ctx->flush_resource(ctx, src);
        }
 
index 1d5d42657e4e80c2fcdb8da96a13f753bc5db9c2..7c147e2e44c8a5b69d9988ba416dd636e8cee78c 100644 (file)
@@ -32,7 +32,7 @@ void si_need_cs_space(struct si_context *ctx)
        struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
 
        /* There are two memory usage counters in the winsys for all buffers
-        * that have been added (cs_add_reloc) and two counters in the pipe
+        * that have been added (cs_add_buffer) and two counters in the pipe
         * driver for those that haven't been added yet.
         */
        if (unlikely(!ctx->b.ws->cs_memory_below_limit(ctx->b.rings.gfx.cs,
@@ -85,14 +85,27 @@ void si_context_gfx_flush(void *context, unsigned flags,
        if (ctx->trace_buf)
                si_trace_emit(ctx);
 
-       /* Save the IB for debug contexts. */
        if (ctx->is_debug) {
+               unsigned i;
+
+               /* Save the IB for debug contexts. */
                free(ctx->last_ib);
                ctx->last_ib_dw_size = cs->cdw;
                ctx->last_ib = malloc(cs->cdw * 4);
                memcpy(ctx->last_ib, cs->buf, cs->cdw * 4);
                r600_resource_reference(&ctx->last_trace_buf, ctx->trace_buf);
                r600_resource_reference(&ctx->trace_buf, NULL);
+
+               /* Save the buffer list. */
+               if (ctx->last_bo_list) {
+                       for (i = 0; i < ctx->last_bo_count; i++)
+                               pb_reference(&ctx->last_bo_list[i].buf, NULL);
+                       free(ctx->last_bo_list);
+               }
+               ctx->last_bo_count = ws->cs_get_buffer_list(cs, NULL);
+               ctx->last_bo_list = calloc(ctx->last_bo_count,
+                                          sizeof(ctx->last_bo_list[0]));
+               ws->cs_get_buffer_list(cs, ctx->last_bo_list);
        }
 
        /* Flush the CS. */
@@ -103,6 +116,10 @@ void si_context_gfx_flush(void *context, unsigned flags,
        if (fence)
                ws->fence_reference(fence, ctx->last_gfx_fence);
 
+       /* Check VM faults if needed. */
+       if (ctx->screen->b.debug_flags & DBG_CHECK_VM)
+               si_check_vm_faults(ctx);
+
        si_begin_new_cs(ctx);
 }
 
@@ -154,6 +171,7 @@ void si_begin_new_cs(struct si_context *ctx)
        si_mark_atom_dirty(ctx, &ctx->db_render_state);
        si_mark_atom_dirty(ctx, &ctx->stencil_ref.atom);
        si_mark_atom_dirty(ctx, &ctx->spi_map);
+       si_mark_atom_dirty(ctx, &ctx->spi_ps_input);
        si_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom);
        si_all_descriptors_begin_new_cs(ctx);
 
index 9edee50ac8a4e3668d1b5b3666a58cbfc1d30f6b..a0283b7c96605cc539e025d4785c2e37f8ae4c74 100644 (file)
@@ -81,6 +81,11 @@ static void si_destroy_context(struct pipe_context *context)
        r600_resource_reference(&sctx->trace_buf, NULL);
        r600_resource_reference(&sctx->last_trace_buf, NULL);
        free(sctx->last_ib);
+       if (sctx->last_bo_list) {
+               for (i = 0; i < sctx->last_bo_count; i++)
+                       pb_reference(&sctx->last_bo_list[i].buf, NULL);
+               free(sctx->last_bo_list);
+       }
        FREE(sctx);
 }
 
@@ -107,6 +112,9 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
        if (sctx == NULL)
                return NULL;
 
+       if (sscreen->b.debug_flags & DBG_CHECK_VM)
+               flags |= PIPE_CONTEXT_DEBUG;
+
        sctx->b.b.screen = screen; /* this must be set first */
        sctx->b.b.priv = priv;
        sctx->b.b.destroy = si_destroy_context;
@@ -287,6 +295,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
        case PIPE_CAP_TEXTURE_QUERY_LOD:
        case PIPE_CAP_TEXTURE_GATHER_SM5:
        case PIPE_CAP_TGSI_TXQS:
+       case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
                return 1;
 
        case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
index 847853e59e95319d02b5a3e0da1a5dfdd97c73b3..2abd5b5a0c320e4972b1f2e5df3c54fa400dc283 100644 (file)
@@ -100,6 +100,7 @@ struct si_sampler_view {
          * [4..7] = buffer descriptor */
        uint32_t                        state[8];
        uint32_t                        fmask_state[8];
+       bool is_stencil_sampler;
 };
 
 struct si_sampler_state {
@@ -187,9 +188,11 @@ struct si_context {
        struct si_viewports             viewports;
        struct si_stencil_ref           stencil_ref;
        struct r600_atom                spi_map;
+       struct r600_atom                spi_ps_input;
 
        /* Precomputed states. */
        struct si_pm4_state             *init_config;
+       bool                            init_config_has_vgt_flush;
        struct si_pm4_state             *vgt_shader_config[4];
        /* With rasterizer discard, there doesn't have to be a pixel shader.
         * In that case, we bind this one: */
@@ -207,6 +210,7 @@ struct si_context {
        struct si_vertex_element        *vertex_elements;
        unsigned                        sprite_coord_enable;
        bool                            flatshade;
+       bool                            force_persample_interp;
 
        /* shader descriptors */
        struct si_descriptors           vertex_buffers;
@@ -237,7 +241,8 @@ struct si_context {
        bool                    dbcb_depth_copy_enabled;
        bool                    dbcb_stencil_copy_enabled;
        unsigned                dbcb_copy_sample;
-       bool                    db_inplace_flush_enabled;
+       bool                    db_flush_depth_inplace;
+       bool                    db_flush_stencil_inplace;
        bool                    db_depth_clear;
        bool                    db_depth_disable_expclear;
        unsigned                ps_db_shader_control;
@@ -276,6 +281,9 @@ struct si_context {
        struct r600_resource    *last_trace_buf;
        struct r600_resource    *trace_buf;
        unsigned                trace_id;
+       uint64_t                dmesg_timestamp;
+       unsigned                last_bo_count;
+       struct radeon_bo_list_item *last_bo_list;
 };
 
 /* cik_sdma.c */
@@ -310,6 +318,7 @@ void si_init_cp_dma_functions(struct si_context *sctx);
 
 /* si_debug.c */
 void si_init_debug_functions(struct si_context *sctx);
+void si_check_vm_faults(struct si_context *sctx);
 
 /* si_dma.c */
 void si_dma_copy(struct pipe_context *ctx,
index b1834afa796af6719c0b89d0c2cb931e5241bcb9..f16933c5f98f233967cfbcc4ba02b88e14e2a752 100644 (file)
@@ -140,7 +140,8 @@ void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state)
                struct r600_resource *ib = state->indirect_buffer;
 
                radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, ib,
-                                         RADEON_USAGE_READ, RADEON_PRIO_MIN);
+                                         RADEON_USAGE_READ,
+                                          RADEON_PRIO_IB2);
 
                radeon_emit(cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
                radeon_emit(cs, ib->gpu_address);
index a3df64824c673e1a51c5e2dd5c24a0674dc19983..32a702fcdf5497853b27271cb6fd252c092521f1 100644 (file)
@@ -855,6 +855,56 @@ static int lookup_interp_param_index(unsigned interpolate, unsigned location)
        }
 }
 
+/* This shouldn't be used by explicit INTERP opcodes. */
+static LLVMValueRef get_interp_param(struct si_shader_context *si_shader_ctx,
+                                    unsigned param)
+{
+       struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm;
+       unsigned sample_param = 0;
+       LLVMValueRef default_ij, sample_ij, force_sample;
+
+       default_ij = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, param);
+
+       /* If the shader doesn't use center/centroid, just return the parameter.
+        *
+        * If the shader only uses one set of (i,j), "si_emit_spi_ps_input" can
+        * switch between center/centroid and sample without shader changes.
+        */
+       switch (param) {
+       case SI_PARAM_PERSP_CENTROID:
+       case SI_PARAM_PERSP_CENTER:
+               if (!si_shader_ctx->shader->selector->forces_persample_interp_for_persp)
+                       return default_ij;
+
+               sample_param = SI_PARAM_PERSP_SAMPLE;
+               break;
+
+       case SI_PARAM_LINEAR_CENTROID:
+       case SI_PARAM_LINEAR_CENTER:
+               if (!si_shader_ctx->shader->selector->forces_persample_interp_for_linear)
+                       return default_ij;
+
+               sample_param = SI_PARAM_LINEAR_SAMPLE;
+               break;
+
+       default:
+               return default_ij;
+       }
+
+       /* Otherwise, we have to select (i,j) based on a user data SGPR. */
+       sample_ij = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, sample_param);
+
+       /* TODO: this can be done more efficiently by switching between
+        * 2 prologs.
+        */
+       force_sample = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+                                   SI_PARAM_PS_STATE_BITS);
+       force_sample = LLVMBuildTrunc(gallivm->builder, force_sample,
+                                     LLVMInt1TypeInContext(gallivm->context), "");
+       return LLVMBuildSelect(gallivm->builder, force_sample,
+                              sample_ij, default_ij, "");
+}
+
 static void declare_input_fs(
        struct radeon_llvm_context *radeon_bld,
        unsigned input_index,
@@ -925,7 +975,7 @@ static void declare_input_fs(
        if (interp_param_idx == -1)
                return;
        else if (interp_param_idx)
-               interp_param = LLVMGetParam(main_fn, interp_param_idx);
+               interp_param = get_interp_param(si_shader_ctx, interp_param_idx);
 
        /* fs.constant returns the param from the middle vertex, so it's not
         * really useful for flat shading. It's meant to be used for custom
@@ -3458,6 +3508,7 @@ static void create_function(struct si_shader_context *si_shader_ctx)
 
        case TGSI_PROCESSOR_FRAGMENT:
                params[SI_PARAM_ALPHA_REF] = f32;
+               params[SI_PARAM_PS_STATE_BITS] = i32;
                params[SI_PARAM_PRIM_MASK] = i32;
                last_sgpr = SI_PARAM_PRIM_MASK;
                params[SI_PARAM_PERSP_SAMPLE] = v2i32;
index 2305b9988b8e2f04797e3e32282822eee72e088a..b92fa02a171d2b9f90a57925c485224d982eaa9c 100644 (file)
@@ -88,6 +88,7 @@ struct radeon_shader_reloc;
 #define SI_SGPR_TCS_OUT_LAYOUT 9  /* TCS & TES only */
 #define SI_SGPR_TCS_IN_LAYOUT  10 /* TCS only */
 #define SI_SGPR_ALPHA_REF      8  /* PS only */
+#define SI_SGPR_PS_STATE_BITS  9  /* PS only */
 
 #define SI_VS_NUM_USER_SGPR    12
 #define SI_LS_NUM_USER_SGPR    13
@@ -95,7 +96,7 @@ struct radeon_shader_reloc;
 #define SI_TES_NUM_USER_SGPR   10
 #define SI_GS_NUM_USER_SGPR    8
 #define SI_GSCOPY_NUM_USER_SGPR        4
-#define SI_PS_NUM_USER_SGPR    9
+#define SI_PS_NUM_USER_SGPR    10
 
 /* LLVM function parameter indices */
 #define SI_PARAM_RW_BUFFERS    0
@@ -148,23 +149,27 @@ struct radeon_shader_reloc;
 
 /* PS only parameters */
 #define SI_PARAM_ALPHA_REF             4
-#define SI_PARAM_PRIM_MASK             5
-#define SI_PARAM_PERSP_SAMPLE          6
-#define SI_PARAM_PERSP_CENTER          7
-#define SI_PARAM_PERSP_CENTROID                8
-#define SI_PARAM_PERSP_PULL_MODEL      9
-#define SI_PARAM_LINEAR_SAMPLE         10
-#define SI_PARAM_LINEAR_CENTER         11
-#define SI_PARAM_LINEAR_CENTROID       12
-#define SI_PARAM_LINE_STIPPLE_TEX      13
-#define SI_PARAM_POS_X_FLOAT           14
-#define SI_PARAM_POS_Y_FLOAT           15
-#define SI_PARAM_POS_Z_FLOAT           16
-#define SI_PARAM_POS_W_FLOAT           17
-#define SI_PARAM_FRONT_FACE            18
-#define SI_PARAM_ANCILLARY             19
-#define SI_PARAM_SAMPLE_COVERAGE       20
-#define SI_PARAM_POS_FIXED_PT          21
+/* Bits:
+ * 0: force_persample_interp
+ */
+#define SI_PARAM_PS_STATE_BITS         5
+#define SI_PARAM_PRIM_MASK             6
+#define SI_PARAM_PERSP_SAMPLE          7
+#define SI_PARAM_PERSP_CENTER          8
+#define SI_PARAM_PERSP_CENTROID                9
+#define SI_PARAM_PERSP_PULL_MODEL      10
+#define SI_PARAM_LINEAR_SAMPLE         11
+#define SI_PARAM_LINEAR_CENTER         12
+#define SI_PARAM_LINEAR_CENTROID       13
+#define SI_PARAM_LINE_STIPPLE_TEX      14
+#define SI_PARAM_POS_X_FLOAT           15
+#define SI_PARAM_POS_Y_FLOAT           16
+#define SI_PARAM_POS_Z_FLOAT           17
+#define SI_PARAM_POS_W_FLOAT           18
+#define SI_PARAM_FRONT_FACE            19
+#define SI_PARAM_ANCILLARY             20
+#define SI_PARAM_SAMPLE_COVERAGE       21
+#define SI_PARAM_POS_FIXED_PT          22
 
 #define SI_NUM_PARAMS (SI_PARAM_POS_FIXED_PT + 1)
 
@@ -182,6 +187,14 @@ struct si_shader_selector {
        /* PIPE_SHADER_[VERTEX|FRAGMENT|...] */
        unsigned        type;
 
+       /* Whether the shader has to use a conditional assignment to
+        * choose between weights when emulating
+        * pipe_rasterizer_state::force_persample_interp.
+        * If false, "si_emit_spi_ps_input" will take care of it instead.
+        */
+       bool            forces_persample_interp_for_persp;
+       bool            forces_persample_interp_for_linear;
+
        unsigned        gs_output_prim;
        unsigned        gs_max_out_vertices;
        unsigned        gs_num_invocations;
index d74f6e896c4c6323ca3c07db025a47689ba12762..00d4bc1fbc2ea6307d4a36b500e81f7276e5c4fe 100644 (file)
@@ -686,6 +686,7 @@ static void *si_create_rs_state(struct pipe_context *ctx,
 
        rs->two_side = state->light_twoside;
        rs->multisample_enable = state->multisample;
+       rs->force_persample_interp = state->force_persample_interp;
        rs->clip_plane_enable = state->clip_plane_enable;
        rs->line_stipple_enable = state->line_stipple_enable;
        rs->poly_stipple_enable = state->poly_stipple_enable;
@@ -998,10 +999,10 @@ static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *s
                            S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) |
                            S_028000_COPY_CENTROID(1) |
                            S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample));
-       } else if (sctx->db_inplace_flush_enabled) {
+       } else if (sctx->db_flush_depth_inplace || sctx->db_flush_stencil_inplace) {
                radeon_emit(cs,
-                           S_028000_DEPTH_COMPRESS_DISABLE(1) |
-                           S_028000_STENCIL_COMPRESS_DISABLE(1));
+                           S_028000_DEPTH_COMPRESS_DISABLE(sctx->db_flush_depth_inplace) |
+                           S_028000_STENCIL_COMPRESS_DISABLE(sctx->db_flush_stencil_inplace));
        } else if (sctx->db_depth_clear) {
                radeon_emit(cs, S_028000_DEPTH_CLEAR_ENABLE(1));
        } else {
@@ -2238,7 +2239,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
                if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) {
                        radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
                                tex->cmask_buffer, RADEON_USAGE_READWRITE,
-                               RADEON_PRIO_COLOR_META);
+                               RADEON_PRIO_CMASK);
                }
 
                radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C,
@@ -2285,7 +2286,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
                if (zb->db_htile_data_base) {
                        radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
                                              rtex->htile_buffer, RADEON_USAGE_READWRITE,
-                                             RADEON_PRIO_DEPTH_META);
+                                             RADEON_PRIO_HTILE);
                }
 
                radeon_set_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view);
@@ -2411,6 +2412,12 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
        pipe_resource_reference(&view->base.texture, texture);
        view->resource = &tmp->resource;
 
+       if (state->format == PIPE_FORMAT_X24S8_UINT ||
+           state->format == PIPE_FORMAT_S8X24_UINT ||
+           state->format == PIPE_FORMAT_X32_S8X24_UINT ||
+           state->format == PIPE_FORMAT_S8_UINT)
+               view->is_stencil_sampler = true;
+
        /* Buffer resource. */
        if (texture->target == PIPE_BUFFER) {
                unsigned stride, num_records;
@@ -3391,7 +3398,7 @@ static void si_init_config(struct si_context *sctx)
        if (sctx->b.chip_class >= CIK)
                si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, border_color_va >> 40);
        si_pm4_add_bo(pm4, sctx->border_color_buffer, RADEON_USAGE_READ,
-                     RADEON_PRIO_SHADER_DATA);
+                     RADEON_PRIO_BORDER_COLORS);
 
        si_pm4_upload_indirect_buffer(sctx, pm4);
        sctx->init_config = pm4;
index 3fc0799c2b4e1f94187156de32219b0375c875e4..6a567688ee47b823e1196219877e83100bffa391 100644 (file)
@@ -50,6 +50,7 @@ struct si_state_rasterizer {
        bool                    flatshade;
        bool                    two_side;
        bool                    multisample_enable;
+       bool                    force_persample_interp;
        bool                    line_stipple_enable;
        unsigned                sprite_coord_enable;
        unsigned                pa_sc_line_stipple;
@@ -123,6 +124,7 @@ union si_state_atoms {
                struct r600_atom *viewports;
                struct r600_atom *stencil_ref;
                struct r600_atom *spi_map;
+               struct r600_atom *spi_ps_input;
        } s;
        struct r600_atom *array[0];
 };
index 6d8e0e509bf88102b17a1b5a119bc4c05fad5668..5face4239413287b7339e1d7ec453a8cbf337ac2 100644 (file)
@@ -353,7 +353,7 @@ static void si_emit_scratch_reloc(struct si_context *sctx)
        if (sctx->scratch_buffer) {
                radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
                                      sctx->scratch_buffer, RADEON_USAGE_READWRITE,
-                                     RADEON_PRIO_SHADER_RESOURCE_RW);
+                                     RADEON_PRIO_SCRATCH_BUFFER);
 
        }
        sctx->emit_scratch_reloc = false;
@@ -467,7 +467,7 @@ static void si_emit_draw_packets(struct si_context *sctx,
 
                radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
                                      t->buf_filled_size, RADEON_USAGE_READ,
-                                     RADEON_PRIO_MIN);
+                                     RADEON_PRIO_SO_FILLED_SIZE);
        }
 
        /* draw packet */
@@ -521,7 +521,7 @@ static void si_emit_draw_packets(struct si_context *sctx,
 
                radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
                                      (struct r600_resource *)info->indirect,
-                                     RADEON_USAGE_READ, RADEON_PRIO_MIN);
+                                     RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT);
        }
 
        if (info->indexed) {
@@ -531,7 +531,7 @@ static void si_emit_draw_packets(struct si_context *sctx,
 
                radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
                                      (struct r600_resource *)ib->buffer,
-                                     RADEON_USAGE_READ, RADEON_PRIO_MIN);
+                                     RADEON_USAGE_READ, RADEON_PRIO_INDEX_BUFFER);
 
                if (info->indirect) {
                        uint64_t indirect_va = r600_resource(info->indirect)->gpu_address;
@@ -813,9 +813,9 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
                }
        }
 
-       /* TODO: VI should read index buffers through TC, so this shouldn't be
-        * needed on VI. */
-       if (info->indexed && r600_resource(ib.buffer)->TC_L2_dirty) {
+       /* VI reads index buffers through TC L2. */
+       if (info->indexed && sctx->b.chip_class <= CIK &&
+           r600_resource(ib.buffer)->TC_L2_dirty) {
                sctx->b.flags |= SI_CONTEXT_INV_TC_L2;
                r600_resource(ib.buffer)->TC_L2_dirty = false;
        }
@@ -858,6 +858,9 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
                struct r600_texture *rtex = (struct r600_texture *)surf->texture;
 
                rtex->dirty_level_mask |= 1 << surf->u.tex.level;
+
+               if (rtex->surface.flags & RADEON_SURF_SBUFFER)
+                       rtex->stencil_dirty_level_mask |= 1 << surf->u.tex.level;
        }
        if (sctx->framebuffer.compressed_cb_mask) {
                struct pipe_surface *surf;
@@ -883,7 +886,7 @@ void si_trace_emit(struct si_context *sctx)
 
        sctx->trace_id++;
        radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, sctx->trace_buf,
-                             RADEON_USAGE_READWRITE, RADEON_PRIO_MIN);
+                             RADEON_USAGE_READWRITE, RADEON_PRIO_TRACE);
        radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
        radeon_emit(cs, S_370_DST_SEL(V_370_MEMORY_SYNC) |
                    S_370_WR_CONFIRM(1) |
index b5e14ead1604e54735fff6d325692719d5f8ed75..f673388b121a8c8007311ac66cb622e360a6aa7f 100644 (file)
@@ -103,7 +103,7 @@ static void si_shader_ls(struct si_shader *shader)
                return;
 
        va = shader->bo->gpu_address;
-       si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
+       si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
 
        /* We need at least 2 components for LS.
         * VGPR0-3: (VertexID, RelAutoindex, ???, InstanceID). */
@@ -138,7 +138,7 @@ static void si_shader_hs(struct si_shader *shader)
                return;
 
        va = shader->bo->gpu_address;
-       si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
+       si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
 
        num_user_sgprs = SI_TCS_NUM_USER_SGPR;
        num_sgprs = shader->num_sgprs;
@@ -173,7 +173,7 @@ static void si_shader_es(struct si_shader *shader)
                return;
 
        va = shader->bo->gpu_address;
-       si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
+       si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
 
        if (shader->selector->type == PIPE_SHADER_VERTEX) {
                vgpr_comp_cnt = shader->uses_instanceid ? 3 : 0;
@@ -279,7 +279,7 @@ static void si_shader_gs(struct si_shader *shader)
                       S_028B90_ENABLE(gs_num_invocations > 0));
 
        va = shader->bo->gpu_address;
-       si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
+       si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
        si_pm4_set_reg(pm4, R_00B220_SPI_SHADER_PGM_LO_GS, va >> 8);
        si_pm4_set_reg(pm4, R_00B224_SPI_SHADER_PGM_HI_GS, va >> 40);
 
@@ -327,7 +327,7 @@ static void si_shader_vs(struct si_shader *shader)
                si_pm4_set_reg(pm4, R_028A84_VGT_PRIMITIVEID_EN, 0);
 
        va = shader->bo->gpu_address;
-       si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
+       si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
 
        if (shader->is_gs_copy_shader) {
                vgpr_comp_cnt = 0; /* only VertexID is needed for GS-COPY. */
@@ -400,7 +400,7 @@ static void si_shader_ps(struct si_shader *shader)
        struct si_pm4_state *pm4;
        unsigned i, spi_ps_in_control;
        unsigned num_sgprs, num_user_sgprs;
-       unsigned spi_baryc_cntl = 0, spi_ps_input_ena;
+       unsigned spi_baryc_cntl = 0;
        uint64_t va;
 
        pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
@@ -437,19 +437,6 @@ static void si_shader_ps(struct si_shader *shader)
                S_0286D8_BC_OPTIMIZE_DISABLE(1);
 
        si_pm4_set_reg(pm4, R_0286E0_SPI_BARYC_CNTL, spi_baryc_cntl);
-       spi_ps_input_ena = shader->spi_ps_input_ena;
-       /* we need to enable at least one of them, otherwise we hang the GPU */
-       assert(G_0286CC_PERSP_SAMPLE_ENA(spi_ps_input_ena) ||
-           G_0286CC_PERSP_CENTER_ENA(spi_ps_input_ena) ||
-           G_0286CC_PERSP_CENTROID_ENA(spi_ps_input_ena) ||
-           G_0286CC_PERSP_PULL_MODEL_ENA(spi_ps_input_ena) ||
-           G_0286CC_LINEAR_SAMPLE_ENA(spi_ps_input_ena) ||
-           G_0286CC_LINEAR_CENTER_ENA(spi_ps_input_ena) ||
-           G_0286CC_LINEAR_CENTROID_ENA(spi_ps_input_ena) ||
-           G_0286CC_LINE_STIPPLE_TEX_ENA(spi_ps_input_ena));
-
-       si_pm4_set_reg(pm4, R_0286CC_SPI_PS_INPUT_ENA, spi_ps_input_ena);
-       si_pm4_set_reg(pm4, R_0286D0_SPI_PS_INPUT_ADDR, spi_ps_input_ena);
        si_pm4_set_reg(pm4, R_0286D8_SPI_PS_IN_CONTROL, spi_ps_in_control);
 
        si_pm4_set_reg(pm4, R_028710_SPI_SHADER_Z_FORMAT, shader->spi_shader_z_format);
@@ -458,7 +445,7 @@ static void si_shader_ps(struct si_shader *shader)
        si_pm4_set_reg(pm4, R_02823C_CB_SHADER_MASK, shader->cb_shader_mask);
 
        va = shader->bo->gpu_address;
-       si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
+       si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
        si_pm4_set_reg(pm4, R_00B020_SPI_SHADER_PGM_LO_PS, va >> 8);
        si_pm4_set_reg(pm4, R_00B024_SPI_SHADER_PGM_HI_PS, va >> 40);
 
@@ -680,6 +667,34 @@ static void *si_create_shader_state(struct pipe_context *ctx,
        tgsi_scan_shader(state->tokens, &sel->info);
        p_atomic_inc(&sscreen->b.num_shaders_created);
 
+       /* First set which opcode uses which (i,j) pair. */
+       if (sel->info.uses_persp_opcode_interp_centroid)
+               sel->info.uses_persp_centroid = true;
+
+       if (sel->info.uses_linear_opcode_interp_centroid)
+               sel->info.uses_linear_centroid = true;
+
+       if (sel->info.uses_persp_opcode_interp_offset ||
+           sel->info.uses_persp_opcode_interp_sample)
+               sel->info.uses_persp_center = true;
+
+       if (sel->info.uses_linear_opcode_interp_offset ||
+           sel->info.uses_linear_opcode_interp_sample)
+               sel->info.uses_linear_center = true;
+
+       /* Determine if the shader has to use a conditional assignment when
+        * emulating force_persample_interp.
+        */
+       sel->forces_persample_interp_for_persp =
+               sel->info.uses_persp_center +
+               sel->info.uses_persp_centroid +
+               sel->info.uses_persp_sample >= 2;
+
+       sel->forces_persample_interp_for_linear =
+               sel->info.uses_linear_center +
+               sel->info.uses_linear_centroid +
+               sel->info.uses_linear_sample >= 2;
+
        switch (pipe_shader_type) {
        case PIPE_SHADER_GEOMETRY:
                sel->gs_output_prim =
@@ -1064,6 +1079,77 @@ bcolor:
        assert(ps->nparam == num_written);
 }
 
+static void si_emit_spi_ps_input(struct si_context *sctx, struct r600_atom *atom)
+{
+       struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+       struct si_shader *ps = sctx->ps_shader->current;
+       unsigned input_ena = ps->spi_ps_input_ena;
+
+       /* we need to enable at least one of them, otherwise we hang the GPU */
+       assert(G_0286CC_PERSP_SAMPLE_ENA(input_ena) ||
+           G_0286CC_PERSP_CENTER_ENA(input_ena) ||
+           G_0286CC_PERSP_CENTROID_ENA(input_ena) ||
+           G_0286CC_PERSP_PULL_MODEL_ENA(input_ena) ||
+           G_0286CC_LINEAR_SAMPLE_ENA(input_ena) ||
+           G_0286CC_LINEAR_CENTER_ENA(input_ena) ||
+           G_0286CC_LINEAR_CENTROID_ENA(input_ena) ||
+           G_0286CC_LINE_STIPPLE_TEX_ENA(input_ena));
+
+       if (sctx->force_persample_interp) {
+               unsigned num_persp = G_0286CC_PERSP_SAMPLE_ENA(input_ena) +
+                                    G_0286CC_PERSP_CENTER_ENA(input_ena) +
+                                    G_0286CC_PERSP_CENTROID_ENA(input_ena);
+               unsigned num_linear = G_0286CC_LINEAR_SAMPLE_ENA(input_ena) +
+                                     G_0286CC_LINEAR_CENTER_ENA(input_ena) +
+                                     G_0286CC_LINEAR_CENTROID_ENA(input_ena);
+
+               /* If only one set of (i,j) coordinates is used, we can disable
+                * CENTER/CENTROID, enable SAMPLE and it will load SAMPLE coordinates
+                * where CENTER/CENTROID are expected, effectively forcing per-sample
+                * interpolation.
+                */
+               if (num_persp == 1) {
+                       input_ena &= C_0286CC_PERSP_CENTER_ENA;
+                       input_ena &= C_0286CC_PERSP_CENTROID_ENA;
+                       input_ena |= G_0286CC_PERSP_SAMPLE_ENA(1);
+               }
+               if (num_linear == 1) {
+                       input_ena &= C_0286CC_LINEAR_CENTER_ENA;
+                       input_ena &= C_0286CC_LINEAR_CENTROID_ENA;
+                       input_ena |= G_0286CC_LINEAR_SAMPLE_ENA(1);
+               }
+
+               /* If at least 2 sets of coordinates are used, we can't use this
+                * trick and have to select SAMPLE using a conditional assignment
+                * in the shader with "force_persample_interp" being a shader constant.
+                */
+       }
+
+       radeon_set_context_reg_seq(cs, R_0286CC_SPI_PS_INPUT_ENA, 2);
+       radeon_emit(cs, input_ena);
+       radeon_emit(cs, input_ena);
+
+       if (ps->selector->forces_persample_interp_for_persp ||
+           ps->selector->forces_persample_interp_for_linear)
+               radeon_set_sh_reg(cs, R_00B030_SPI_SHADER_USER_DATA_PS_0 +
+                                     SI_SGPR_PS_STATE_BITS * 4,
+                                 sctx->force_persample_interp);
+}
+
+/**
+ * Writing CONFIG or UCONFIG VGT registers requires VGT_FLUSH before that.
+ */
+static void si_init_config_add_vgt_flush(struct si_context *sctx)
+{
+       if (sctx->init_config_has_vgt_flush)
+               return;
+
+       si_pm4_cmd_begin(sctx->init_config, PKT3_EVENT_WRITE);
+       si_pm4_cmd_add(sctx->init_config, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
+       si_pm4_cmd_end(sctx->init_config, false);
+       sctx->init_config_has_vgt_flush = true;
+}
+
 /* Initialize state related to ESGS / GSVS ring buffers */
 static void si_init_gs_rings(struct si_context *sctx)
 {
@@ -1084,6 +1170,8 @@ static void si_init_gs_rings(struct si_context *sctx)
                return;
        }
 
+       si_init_config_add_vgt_flush(sctx);
+
        /* Append these registers to the init config state. */
        if (sctx->b.chip_class >= CIK) {
                if (sctx->b.chip_class >= VI) {
@@ -1330,6 +1418,8 @@ static void si_init_tess_factor_ring(struct si_context *sctx)
 
        assert(((sctx->tf_ring->width0 / 4) & C_030938_SIZE) == 0);
 
+       si_init_config_add_vgt_flush(sctx);
+
        /* Append these registers to the init config state. */
        if (sctx->b.chip_class >= CIK) {
                si_pm4_set_reg(sctx->init_config, R_030938_VGT_TF_RING_SIZE,
@@ -1535,6 +1625,12 @@ bool si_update_shaders(struct si_context *sctx)
                si_mark_atom_dirty(sctx, &sctx->spi_map);
        }
 
+       if (si_pm4_state_changed(sctx, ps) ||
+           sctx->force_persample_interp != rs->force_persample_interp) {
+               sctx->force_persample_interp = rs->force_persample_interp;
+               si_mark_atom_dirty(sctx, &sctx->spi_ps_input);
+       }
+
        if (si_pm4_state_changed(sctx, ls) ||
            si_pm4_state_changed(sctx, hs) ||
            si_pm4_state_changed(sctx, es) ||
@@ -1563,6 +1659,7 @@ bool si_update_shaders(struct si_context *sctx)
 void si_init_shader_functions(struct si_context *sctx)
 {
        si_init_atom(sctx, &sctx->spi_map, &sctx->atoms.s.spi_map, si_emit_spi_map);
+       si_init_atom(sctx, &sctx->spi_ps_input, &sctx->atoms.s.spi_ps_input, si_emit_spi_ps_input);
 
        sctx->b.b.create_vs_state = si_create_vs_state;
        sctx->b.b.create_tcs_state = si_create_tcs_state;
index d8606f3c07e478e920387be684cb0a8ea6549246..d468cf4de5485158bd37c7823a84f8111871e125 100644 (file)
@@ -247,6 +247,7 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
    case PIPE_CAP_DEPTH_BOUNDS_TEST:
    case PIPE_CAP_TGSI_TXQS:
+   case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
       return 0;
    }
    /* should only get here on unhandled cases */
index 7ef36b367d3723e807bc389835d018d55189e3bd..57e37fcfe14af43463d722809fbc693ea07be9ce 100644 (file)
@@ -243,7 +243,7 @@ svga_buffer_transfer_map(struct pipe_context *pipe,
    } else {
       FREE(transfer);
    }
-   
+
    return map;
 }
 
@@ -275,9 +275,9 @@ svga_buffer_transfer_unmap( struct pipe_context *pipe,
    struct svga_screen *ss = svga_screen(pipe->screen);
    struct svga_context *svga = svga_context(pipe);
    struct svga_buffer *sbuf = svga_buffer(transfer->resource);
-   
+
    pipe_mutex_lock(ss->swc_mutex);
-   
+
    assert(sbuf->map.count);
    if (sbuf->map.count) {
       --sbuf->map.count;
@@ -296,7 +296,7 @@ svga_buffer_transfer_unmap( struct pipe_context *pipe,
           */
 
          SVGA_DBG(DEBUG_DMA, "flushing the whole buffer\n");
-   
+
          sbuf->dma.flags.discard = TRUE;
 
          svga_buffer_add_range(sbuf, 0, sbuf->b.b.width0);
@@ -316,28 +316,28 @@ svga_buffer_destroy( struct pipe_screen *screen,
    struct svga_buffer *sbuf = svga_buffer( buf );
 
    assert(!p_atomic_read(&buf->reference.count));
-   
+
    assert(!sbuf->dma.pending);
 
-   if(sbuf->handle)
+   if (sbuf->handle)
       svga_buffer_destroy_host_surface(ss, sbuf);
-   
-   if(sbuf->uploaded.buffer)
+
+   if (sbuf->uploaded.buffer)
       pipe_resource_reference(&sbuf->uploaded.buffer, NULL);
 
-   if(sbuf->hwbuf)
+   if (sbuf->hwbuf)
       svga_buffer_destroy_hw_storage(ss, sbuf);
-   
-   if(sbuf->swbuf && !sbuf->user)
+
+   if (sbuf->swbuf && !sbuf->user)
       align_free(sbuf->swbuf);
-   
+
    ss->total_resource_bytes -= sbuf->size;
 
    FREE(sbuf);
 }
 
 
-struct u_resource_vtbl svga_buffer_vtbl = 
+struct u_resource_vtbl svga_buffer_vtbl =
 {
    u_default_resource_get_handle,      /* get_handle */
    svga_buffer_destroy,                     /* resource_destroy */
@@ -355,11 +355,11 @@ svga_buffer_create(struct pipe_screen *screen,
 {
    struct svga_screen *ss = svga_screen(screen);
    struct svga_buffer *sbuf;
-   
+
    sbuf = CALLOC_STRUCT(svga_buffer);
-   if(!sbuf)
+   if (!sbuf)
       goto error1;
-   
+
    sbuf->b.b = *template;
    sbuf->b.vtbl = &svga_buffer_vtbl;
    pipe_reference_init(&sbuf->b.b.reference, 1);
@@ -378,7 +378,7 @@ svga_buffer_create(struct pipe_screen *screen,
       }
    }
 
-   if(svga_buffer_needs_hw_storage(template->bind)) {
+   if (svga_buffer_needs_hw_storage(template->bind)) {
 
       /* If the buffer will be used for vertex/index/stream data, set all
        * the flags so that the buffer will be accepted for all those uses.
@@ -396,22 +396,22 @@ svga_buffer_create(struct pipe_screen *screen,
             sbuf->bind_flags |= PIPE_BIND_STREAM_OUTPUT;
       }
 
-      if(svga_buffer_create_host_surface(ss, sbuf) != PIPE_OK)
+      if (svga_buffer_create_host_surface(ss, sbuf) != PIPE_OK)
          goto error2;
    }
    else {
       sbuf->swbuf = align_malloc(sbuf->b.b.width0, 64);
-      if(!sbuf->swbuf)
+      if (!sbuf->swbuf)
          goto error2;
    }
-      
+
    debug_reference(&sbuf->b.b.reference,
                    (debug_reference_descriptor)debug_describe_resource, 0);
 
    sbuf->size = util_resource_size(&sbuf->b.b);
    ss->total_resource_bytes += sbuf->size;
 
-   return &sbuf->b.b; 
+   return &sbuf->b.b;
 
 error2:
    FREE(sbuf);
@@ -419,6 +419,7 @@ error1:
    return NULL;
 }
 
+
 struct pipe_resource *
 svga_user_buffer_create(struct pipe_screen *screen,
                         void *ptr,
@@ -426,11 +427,11 @@ svga_user_buffer_create(struct pipe_screen *screen,
                        unsigned bind)
 {
    struct svga_buffer *sbuf;
-   
+
    sbuf = CALLOC_STRUCT(svga_buffer);
-   if(!sbuf)
+   if (!sbuf)
       goto no_sbuf;
-      
+
    pipe_reference_init(&sbuf->b.b.reference, 1);
    sbuf->b.vtbl = &svga_buffer_vtbl;
    sbuf->b.b.screen = screen;
@@ -448,8 +449,8 @@ svga_user_buffer_create(struct pipe_screen *screen,
 
    debug_reference(&sbuf->b.b.reference,
                    (debug_reference_descriptor)debug_describe_resource, 0);
-   
-   return &sbuf->b.b; 
+
+   return &sbuf->b.b;
 
 no_sbuf:
    return NULL;
index ffa5bce80c0bff3bec55ced699ebd5bcdc5363f9..9c33a79db0f039387fb6c2fd31d137fa3ef1b18c 100644 (file)
@@ -48,9 +48,11 @@ svga_debug_describe_sampler_view(char *buf, const struct svga_sampler_view *sv)
 {
    char res[128];
    debug_describe_resource(res, sv->texture);
-   util_sprintf(buf, "svga_sampler_view<%s,[%u,%u]>", res, sv->min_lod, sv->max_lod);
+   util_sprintf(buf, "svga_sampler_view<%s,[%u,%u]>",
+                res, sv->min_lod, sv->max_lod);
 }
 
+
 struct svga_sampler_view *
 svga_get_tex_sampler_view(struct pipe_context *pipe,
                          struct pipe_resource *pt,
@@ -58,10 +60,11 @@ svga_get_tex_sampler_view(struct pipe_context *pipe,
 {
    struct svga_context *svga = svga_context(pipe);
    struct svga_screen *ss = svga_screen(pipe->screen);
-   struct svga_texture *tex = svga_texture(pt); 
+   struct svga_texture *tex = svga_texture(pt);
    struct svga_sampler_view *sv = NULL;
    SVGA3dSurfaceFlags flags = SVGA3D_SURFACE_HINT_TEXTURE;
-   SVGA3dSurfaceFormat format = svga_translate_format(ss, pt->format, PIPE_BIND_SAMPLER_VIEW);
+   SVGA3dSurfaceFormat format = svga_translate_format(ss, pt->format,
+                                                      PIPE_BIND_SAMPLER_VIEW);
    boolean view = TRUE;
 
    assert(pt);
@@ -155,7 +158,8 @@ svga_get_tex_sampler_view(struct pipe_context *pipe,
       sv->key.cachable = 0;
       sv->handle = tex->handle;
       debug_reference(&sv->reference,
-                      (debug_reference_descriptor)svga_debug_describe_sampler_view, 0);
+                      (debug_reference_descriptor)
+                      svga_debug_describe_sampler_view, 0);
       return sv;
    }
 
@@ -164,13 +168,16 @@ svga_get_tex_sampler_view(struct pipe_context *pipe,
    pipe_mutex_unlock(ss->tex_mutex);
 
    debug_reference(&sv->reference,
-                   (debug_reference_descriptor)svga_debug_describe_sampler_view, 0);
+                   (debug_reference_descriptor)
+                   svga_debug_describe_sampler_view, 0);
 
    return sv;
 }
 
+
 void
-svga_validate_sampler_view(struct svga_context *svga, struct svga_sampler_view *v)
+svga_validate_sampler_view(struct svga_context *svga,
+                           struct svga_sampler_view *v)
 {
    struct svga_texture *tex = svga_texture(v->texture);
    unsigned numFaces;
@@ -186,7 +193,7 @@ svga_validate_sampler_view(struct svga_context *svga, struct svga_sampler_view *
 
    age = tex->age;
 
-   if(tex->b.b.target == PIPE_TEXTURE_CUBE)
+   if (tex->b.b.target == PIPE_TEXTURE_CUBE)
       numFaces = 6;
    else
       numFaces = 1;
@@ -207,12 +214,13 @@ svga_validate_sampler_view(struct svga_context *svga, struct svga_sampler_view *
    v->age = age;
 }
 
+
 void
 svga_destroy_sampler_view_priv(struct svga_sampler_view *v)
 {
    struct svga_texture *tex = svga_texture(v->texture);
 
-   if(v->handle != tex->handle) {
+   if (v->handle != tex->handle) {
       struct svga_screen *ss = svga_screen(v->texture->screen);
       SVGA_DBG(DEBUG_DMA, "unref sid %p (sampler view)\n", v->handle);
       svga_screen_surface_destroy(ss, &v->key, &v->handle);
index 44b6f4a42607103244c05d296a3005aeec02c5bb..e0a28788238b578dbcb7e1dce7634d3369e0a066 100644 (file)
@@ -380,6 +380,7 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
    case PIPE_CAP_DEPTH_BOUNDS_TEST:
    case PIPE_CAP_TGSI_TXQS:
+   case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
       return 0;
    }
 
index b6d6de0dca329fd486244d4cb3c1dcd42872c9e5..75592d3bf8bdcf0f7c98447f4f06d96d6478bb8a 100644 (file)
@@ -704,6 +704,24 @@ emit_consts_vgpu10(struct svga_context *svga, unsigned shader)
          assert(size == 0);
       }
 
+      if (size % 16 != 0) {
+         /* GL's buffer range sizes can be any number of bytes but the
+          * SVGA3D device requires a multiple of 16 bytes.
+          */
+         const unsigned total_size = buffer->b.b.width0;
+
+         if (offset + align(size, 16) <= total_size) {
+            /* round up size to multiple of 16 */
+            size = align(size, 16);
+         }
+         else {
+            /* round down to mulitple of 16 (this may cause rendering problems
+             * but should avoid a device error).
+             */
+            size &= ~16;
+         }
+      }
+
       assert(size % 16 == 0);
       ret = SVGA3D_vgpu10_SetSingleConstantBuffer(svga->swc,
                                                   index,
index 87d781d088d8eecd91146dc9607309b158667895..d490fad5167042c3724c10145edd82f705e4bf2d 100644 (file)
@@ -188,6 +188,9 @@ vc4_context_destroy(struct pipe_context *pctx)
         pipe_surface_reference(&vc4->framebuffer.cbufs[0], NULL);
         pipe_surface_reference(&vc4->framebuffer.zsbuf, NULL);
 
+        pipe_surface_reference(&vc4->color_write, NULL);
+        pipe_surface_reference(&vc4->color_read, NULL);
+
         vc4_program_fini(pctx);
 
         ralloc_free(vc4);
index 7502293180a8f8f2f81c95e628f0465c1d659671..c7698422951864b1e6deb45fb5f8e7a20219d1b9 100644 (file)
@@ -103,7 +103,6 @@ struct vc4_uncompiled_shader {
         /** How many variants of this program were compiled, for shader-db. */
         uint32_t compiled_variant_count;
         struct pipe_shader_state base;
-        const struct tgsi_token *twoside_tokens;
 };
 
 struct vc4_ubo_range {
index 01ea75449847a7e9edfbe13a6950bc0980d5a0d9..31c7e28ff57e0efa583f655f9012b3fa87ef15e6 100644 (file)
@@ -1738,27 +1738,6 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage,
         }
 
         const struct tgsi_token *tokens = key->shader_state->base.tokens;
-        if (c->fs_key && c->fs_key->light_twoside) {
-                if (!key->shader_state->twoside_tokens) {
-                        const struct tgsi_lowering_config lowering_config = {
-                                .color_two_side = true,
-                        };
-                        struct tgsi_shader_info info;
-                        key->shader_state->twoside_tokens =
-                                tgsi_transform_lowering(&lowering_config,
-                                                        key->shader_state->base.tokens,
-                                                        &info);
-
-                        /* If no transformation occurred, then NULL is
-                         * returned and we just use our original tokens.
-                         */
-                        if (!key->shader_state->twoside_tokens) {
-                                key->shader_state->twoside_tokens =
-                                        key->shader_state->base.tokens;
-                        }
-                }
-                tokens = key->shader_state->twoside_tokens;
-        }
 
         if (vc4_debug & VC4_DEBUG_TGSI) {
                 fprintf(stderr, "%s prog %d/%d TGSI:\n",
@@ -1772,6 +1751,8 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage,
         nir_convert_to_ssa(c->s);
         if (stage == QSTAGE_FRAG)
                 vc4_nir_lower_blend(c);
+        if (c->fs_key && c->fs_key->light_twoside)
+                nir_lower_two_sided_color(c->s);
         vc4_nir_lower_io(c);
         nir_lower_idiv(c->s);
         nir_lower_load_const_to_scalar(c->s);
@@ -2190,8 +2171,6 @@ vc4_shader_state_delete(struct pipe_context *pctx, void *hwcso)
         hash_table_foreach(vc4->vs_cache, entry)
                 delete_from_cache_if_matches(vc4->vs_cache, entry, so);
 
-        if (so->twoside_tokens != so->base.tokens)
-                free((void *)so->twoside_tokens);
         free((void *)so->base.tokens);
         free(so);
 }
index c4b52e1e61d750e775ff15a20fd5295092f04839..739ac86193a2be049ed3131bb03e8e8515ad883f 100644 (file)
@@ -181,6 +181,7 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
        case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
        case PIPE_CAP_DEPTH_BOUNDS_TEST:
        case PIPE_CAP_TGSI_TXQS:
+       case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
                 return 0;
 
                 /* Stream output. */
index 7cfd236349d3fc6b09a1bf7d4af69ab89e49b3d4..76980ca32af8c8fc1e7dc54bc3299152e9319c1e 100644 (file)
@@ -106,10 +106,15 @@ vc4_simulator_unpin_bos(struct vc4_exec_info *exec)
 {
         for (int i = 0; i < exec->bo_count; i++) {
                 struct drm_gem_cma_object *obj = exec->bo[i];
-                struct vc4_bo *bo = to_vc4_bo(&obj->base)->bo;
+                struct drm_vc4_bo *drm_bo = to_vc4_bo(&obj->base);
+                struct vc4_bo *bo = drm_bo->bo;
 
                 memcpy(bo->map, obj->vaddr, bo->size);
 
+                if (drm_bo->validated_shader) {
+                        free(drm_bo->validated_shader->texture_samples);
+                        free(drm_bo->validated_shader);
+                }
                 free(obj);
         }
 
index 47fa82a1e2014dc88f4030031fd86b053b308f93..a4947154f17e1c283ff9697bef0d3738cc7be278 100644 (file)
@@ -631,6 +631,7 @@ enum pipe_cap
    PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR,
    PIPE_CAP_DEPTH_BOUNDS_TEST,
    PIPE_CAP_TGSI_TXQS,
+   PIPE_CAP_FORCE_PERSAMPLE_INTERP,
 };
 
 #define PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50 (1 << 0)
index 266ebbafe36b0d4cfd114af1b65681aee97aa0fd..4bf8d46c68691946096f30ee501bb89c8d059296 100644 (file)
@@ -103,6 +103,7 @@ struct pipe_rasterizer_state
    unsigned point_tri_clip:1; /** large points clipped as tris or points */
    unsigned point_size_per_vertex:1; /**< size computed in vertex shader */
    unsigned multisample:1;         /* XXX maybe more ms state in future */
+   unsigned force_persample_interp:1;
    unsigned line_smooth:1;
    unsigned line_stipple_enable:1;
    unsigned line_last_pixel:1;
index 91b443147d64a4f65209a97eec579abbf7570e46..019414b56fec2ca21bc0de9c867104d8a9530cc1 100644 (file)
@@ -188,10 +188,10 @@ dri2_drawable_get_buffers(struct dri_drawable *drawable,
        * may occur as the stvis->color_format.
        */
       switch(format) {
-      case PIPE_FORMAT_B8G8R8A8_UNORM:
+      case PIPE_FORMAT_BGRA8888_UNORM:
         depth = 32;
         break;
-      case PIPE_FORMAT_B8G8R8X8_UNORM:
+      case PIPE_FORMAT_BGRX8888_UNORM:
         depth = 24;
         break;
       case PIPE_FORMAT_B5G6R5_UNORM:
@@ -261,13 +261,13 @@ dri_image_drawable_get_buffers(struct dri_drawable *drawable,
       case PIPE_FORMAT_B5G6R5_UNORM:
          image_format = __DRI_IMAGE_FORMAT_RGB565;
          break;
-      case PIPE_FORMAT_B8G8R8X8_UNORM:
+      case PIPE_FORMAT_BGRX8888_UNORM:
          image_format = __DRI_IMAGE_FORMAT_XRGB8888;
          break;
-      case PIPE_FORMAT_B8G8R8A8_UNORM:
+      case PIPE_FORMAT_BGRA8888_UNORM:
          image_format = __DRI_IMAGE_FORMAT_ARGB8888;
          break;
-      case PIPE_FORMAT_R8G8B8A8_UNORM:
+      case PIPE_FORMAT_RGBA8888_UNORM:
          image_format = __DRI_IMAGE_FORMAT_ABGR8888;
          break;
       default:
@@ -314,10 +314,10 @@ dri2_allocate_buffer(__DRIscreen *sPriv,
 
    switch (format) {
       case 32:
-         pf = PIPE_FORMAT_B8G8R8A8_UNORM;
+         pf = PIPE_FORMAT_BGRA8888_UNORM;
          break;
       case 24:
-         pf = PIPE_FORMAT_B8G8R8X8_UNORM;
+         pf = PIPE_FORMAT_BGRX8888_UNORM;
          break;
       case 16:
          pf = PIPE_FORMAT_Z16_UNORM;
@@ -724,13 +724,13 @@ dri2_create_image_from_winsys(__DRIscreen *_screen,
       pf = PIPE_FORMAT_B5G6R5_UNORM;
       break;
    case __DRI_IMAGE_FORMAT_XRGB8888:
-      pf = PIPE_FORMAT_B8G8R8X8_UNORM;
+      pf = PIPE_FORMAT_BGRX8888_UNORM;
       break;
    case __DRI_IMAGE_FORMAT_ARGB8888:
-      pf = PIPE_FORMAT_B8G8R8A8_UNORM;
+      pf = PIPE_FORMAT_BGRA8888_UNORM;
       break;
    case __DRI_IMAGE_FORMAT_ABGR8888:
-      pf = PIPE_FORMAT_R8G8B8A8_UNORM;
+      pf = PIPE_FORMAT_RGBA8888_UNORM;
       break;
    default:
       pf = PIPE_FORMAT_NONE;
@@ -845,13 +845,13 @@ dri2_create_image(__DRIscreen *_screen,
       pf = PIPE_FORMAT_B5G6R5_UNORM;
       break;
    case __DRI_IMAGE_FORMAT_XRGB8888:
-      pf = PIPE_FORMAT_B8G8R8X8_UNORM;
+      pf = PIPE_FORMAT_BGRX8888_UNORM;
       break;
    case __DRI_IMAGE_FORMAT_ARGB8888:
-      pf = PIPE_FORMAT_B8G8R8A8_UNORM;
+      pf = PIPE_FORMAT_BGRA8888_UNORM;
       break;
    case __DRI_IMAGE_FORMAT_ABGR8888:
-      pf = PIPE_FORMAT_R8G8B8A8_UNORM;
+      pf = PIPE_FORMAT_RGBA8888_UNORM;
       break;
    default:
       pf = PIPE_FORMAT_NONE;
@@ -1293,6 +1293,7 @@ dri2_load_opencl_interop(struct dri_screen *screen)
 }
 
 struct dri2_fence {
+   struct dri_screen *driscreen;
    struct pipe_fence_handle *pipe_fence;
    void *cl_event;
 };
@@ -1313,6 +1314,7 @@ dri2_create_fence(__DRIcontext *_ctx)
       return NULL;
    }
 
+   fence->driscreen = dri_screen(_ctx->driScreenPriv);
    return fence;
 }
 
@@ -1336,6 +1338,7 @@ dri2_get_fence_from_cl_event(__DRIscreen *_screen, intptr_t cl_event)
       return NULL;
    }
 
+   fence->driscreen = driscreen;
    return fence;
 }
 
@@ -1360,9 +1363,9 @@ static GLboolean
 dri2_client_wait_sync(__DRIcontext *_ctx, void *_fence, unsigned flags,
                       uint64_t timeout)
 {
-   struct dri_screen *driscreen = dri_screen(_ctx->driScreenPriv);
-   struct pipe_screen *screen = driscreen->base.screen;
    struct dri2_fence *fence = (struct dri2_fence*)_fence;
+   struct dri_screen *driscreen = fence->driscreen;
+   struct pipe_screen *screen = driscreen->base.screen;
 
    /* No need to flush. The context was flushed when the fence was created. */
 
index 0d2929aaaa1300b64546853e12e5864f08581c69..f0cc4a2a3ef042f2cff67f664978bf4c8e13b1dc 100644 (file)
@@ -231,11 +231,11 @@ dri_set_tex_buffer2(__DRIcontext *pDRICtx, GLint target,
       if (format == __DRI_TEXTURE_FORMAT_RGB)  {
          /* only need to cover the formats recognized by dri_fill_st_visual */
          switch (internal_format) {
-         case PIPE_FORMAT_B8G8R8A8_UNORM:
-            internal_format = PIPE_FORMAT_B8G8R8X8_UNORM;
+         case PIPE_FORMAT_BGRA8888_UNORM:
+            internal_format = PIPE_FORMAT_BGRX8888_UNORM;
             break;
-         case PIPE_FORMAT_A8R8G8B8_UNORM:
-            internal_format = PIPE_FORMAT_X8R8G8B8_UNORM;
+         case PIPE_FORMAT_ARGB8888_UNORM:
+            internal_format = PIPE_FORMAT_XRGB8888_UNORM;
             break;
          default:
             break;
index 84fc40b923d5c3665a85ce6ce1e205daab865526..48f76cfe8af9693aa0439246d86cc1c3f231a864 100644 (file)
@@ -370,7 +370,7 @@ amdgpu_cs_create(struct radeon_winsys_ctx *rwctx,
 
 #define OUT_CS(cs, value) (cs)->buf[(cs)->cdw++] = (value)
 
-int amdgpu_get_reloc(struct amdgpu_cs *cs, struct amdgpu_winsys_bo *bo)
+int amdgpu_lookup_buffer(struct amdgpu_cs *cs, struct amdgpu_winsys_bo *bo)
 {
    unsigned hash = bo->unique_id & (Elements(cs->buffer_indices_hashlist)-1);
    int i = cs->buffer_indices_hashlist[hash];
@@ -379,15 +379,15 @@ int amdgpu_get_reloc(struct amdgpu_cs *cs, struct amdgpu_winsys_bo *bo)
    if (i == -1 || cs->buffers[i].bo == bo)
       return i;
 
-   /* Hash collision, look for the BO in the list of relocs linearly. */
+   /* Hash collision, look for the BO in the list of buffers linearly. */
    for (i = cs->num_buffers - 1; i >= 0; i--) {
       if (cs->buffers[i].bo == bo) {
-         /* Put this reloc in the hash list.
+         /* Put this buffer in the hash list.
           * This will prevent additional hash collisions if there are
-          * several consecutive get_reloc calls for the same buffer.
+          * several consecutive lookup_buffer calls for the same buffer.
           *
           * Example: Assuming buffers A,B,C collide in the hash list,
-          * the following sequence of relocs:
+          * the following sequence of buffers:
           *         AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC
           * will collide here: ^ and here:   ^,
           * meaning that we should get very few collisions in the end. */
@@ -398,32 +398,33 @@ int amdgpu_get_reloc(struct amdgpu_cs *cs, struct amdgpu_winsys_bo *bo)
    return -1;
 }
 
-static unsigned amdgpu_add_reloc(struct amdgpu_cs *cs,
+static unsigned amdgpu_add_buffer(struct amdgpu_cs *cs,
                                  struct amdgpu_winsys_bo *bo,
                                  enum radeon_bo_usage usage,
                                  enum radeon_bo_domain domains,
                                  unsigned priority,
                                  enum radeon_bo_domain *added_domains)
 {
-   struct amdgpu_cs_buffer *reloc;
+   struct amdgpu_cs_buffer *buffer;
    unsigned hash = bo->unique_id & (Elements(cs->buffer_indices_hashlist)-1);
    int i = -1;
 
-   priority = MIN2(priority, 15);
+   assert(priority < 64);
    *added_domains = 0;
 
-   i = amdgpu_get_reloc(cs, bo);
+   i = amdgpu_lookup_buffer(cs, bo);
 
    if (i >= 0) {
-      reloc = &cs->buffers[i];
-      reloc->usage |= usage;
-      *added_domains = domains & ~reloc->domains;
-      reloc->domains |= domains;
-      cs->flags[i] = MAX2(cs->flags[i], priority);
+      buffer = &cs->buffers[i];
+      buffer->priority_usage |= 1llu << priority;
+      buffer->usage |= usage;
+      *added_domains = domains & ~buffer->domains;
+      buffer->domains |= domains;
+      cs->flags[i] = MAX2(cs->flags[i], priority / 4);
       return i;
    }
 
-   /* New relocation, check if the backing array is large enough. */
+   /* New buffer, check if the backing array is large enough. */
    if (cs->num_buffers >= cs->max_num_buffers) {
       uint32_t size;
       cs->max_num_buffers += 10;
@@ -437,16 +438,17 @@ static unsigned amdgpu_add_reloc(struct amdgpu_cs *cs,
       cs->flags = realloc(cs->flags, cs->max_num_buffers);
    }
 
-   /* Initialize the new relocation. */
+   /* Initialize the new buffer. */
    cs->buffers[cs->num_buffers].bo = NULL;
    amdgpu_winsys_bo_reference(&cs->buffers[cs->num_buffers].bo, bo);
    cs->handles[cs->num_buffers] = bo->bo;
-   cs->flags[cs->num_buffers] = priority;
+   cs->flags[cs->num_buffers] = priority / 4;
    p_atomic_inc(&bo->num_cs_references);
-   reloc = &cs->buffers[cs->num_buffers];
-   reloc->bo = bo;
-   reloc->usage = usage;
-   reloc->domains = domains;
+   buffer = &cs->buffers[cs->num_buffers];
+   buffer->bo = bo;
+   buffer->priority_usage = 1llu << priority;
+   buffer->usage = usage;
+   buffer->domains = domains;
 
    cs->buffer_indices_hashlist[hash] = cs->num_buffers;
 
@@ -454,7 +456,7 @@ static unsigned amdgpu_add_reloc(struct amdgpu_cs *cs,
    return cs->num_buffers++;
 }
 
-static unsigned amdgpu_cs_add_reloc(struct radeon_winsys_cs *rcs,
+static unsigned amdgpu_cs_add_buffer(struct radeon_winsys_cs *rcs,
                                     struct radeon_winsys_cs_handle *buf,
                                     enum radeon_bo_usage usage,
                                     enum radeon_bo_domain domains,
@@ -466,7 +468,7 @@ static unsigned amdgpu_cs_add_reloc(struct radeon_winsys_cs *rcs,
    struct amdgpu_cs *cs = amdgpu_cs(rcs);
    struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
    enum radeon_bo_domain added_domains;
-   unsigned index = amdgpu_add_reloc(cs, bo, usage, bo->initial_domain,
+   unsigned index = amdgpu_add_buffer(cs, bo, usage, bo->initial_domain,
                                      priority, &added_domains);
 
    if (added_domains & RADEON_DOMAIN_GTT)
@@ -477,12 +479,12 @@ static unsigned amdgpu_cs_add_reloc(struct radeon_winsys_cs *rcs,
    return index;
 }
 
-static int amdgpu_cs_get_reloc(struct radeon_winsys_cs *rcs,
+static int amdgpu_cs_lookup_buffer(struct radeon_winsys_cs *rcs,
                                struct radeon_winsys_cs_handle *buf)
 {
    struct amdgpu_cs *cs = amdgpu_cs(rcs);
 
-   return amdgpu_get_reloc(cs, (struct amdgpu_winsys_bo*)buf);
+   return amdgpu_lookup_buffer(cs, (struct amdgpu_winsys_bo*)buf);
 }
 
 static boolean amdgpu_cs_validate(struct radeon_winsys_cs *rcs)
@@ -500,6 +502,22 @@ static boolean amdgpu_cs_memory_below_limit(struct radeon_winsys_cs *rcs, uint64
    return status;
 }
 
+static unsigned amdgpu_cs_get_buffer_list(struct radeon_winsys_cs *rcs,
+                                          struct radeon_bo_list_item *list)
+{
+    struct amdgpu_cs *cs = amdgpu_cs(rcs);
+    int i;
+
+    if (list) {
+        for (i = 0; i < cs->num_buffers; i++) {
+            pb_reference(&list[i].buf, &cs->buffers[i].bo->base);
+            list[i].vm_address = cs->buffers[i].bo->va;
+            list[i].priority_usage = cs->buffers[i].priority_usage;
+        }
+    }
+    return cs->num_buffers;
+}
+
 static void amdgpu_cs_do_submission(struct amdgpu_cs *cs,
                                     struct pipe_fence_handle **out_fence)
 {
@@ -621,8 +639,8 @@ static void amdgpu_cs_flush(struct radeon_winsys_cs *rcs,
       fprintf(stderr, "amdgpu: command stream overflowed\n");
    }
 
-   amdgpu_cs_add_reloc(rcs, (void*)cs->big_ib_winsys_buffer,
-                      RADEON_USAGE_READ, 0, RADEON_PRIO_MIN);
+   amdgpu_cs_add_buffer(rcs, (void*)cs->big_ib_winsys_buffer,
+                      RADEON_USAGE_READ, 0, RADEON_PRIO_IB1);
 
    /* If the CS is not empty or overflowed.... */
    if (cs->base.cdw && cs->base.cdw <= cs->base.max_dw && !debug_get_option_noop()) {
@@ -682,10 +700,11 @@ void amdgpu_cs_init_functions(struct amdgpu_winsys *ws)
    ws->base.ctx_query_reset_status = amdgpu_ctx_query_reset_status;
    ws->base.cs_create = amdgpu_cs_create;
    ws->base.cs_destroy = amdgpu_cs_destroy;
-   ws->base.cs_add_reloc = amdgpu_cs_add_reloc;
-   ws->base.cs_get_reloc = amdgpu_cs_get_reloc;
+   ws->base.cs_add_buffer = amdgpu_cs_add_buffer;
+   ws->base.cs_lookup_buffer = amdgpu_cs_lookup_buffer;
    ws->base.cs_validate = amdgpu_cs_validate;
    ws->base.cs_memory_below_limit = amdgpu_cs_memory_below_limit;
+   ws->base.cs_get_buffer_list = amdgpu_cs_get_buffer_list;
    ws->base.cs_flush = amdgpu_cs_flush;
    ws->base.cs_is_buffer_referenced = amdgpu_bo_is_referenced;
    ws->base.cs_sync_flush = amdgpu_cs_sync_flush;
index 12c6b624b03b1ca9f305ad5acb28683deccc8ce1..bae5d73bb3cf0d9883a5590cb94f7ac2e154066f 100644 (file)
@@ -45,6 +45,7 @@ struct amdgpu_ctx {
 
 struct amdgpu_cs_buffer {
    struct amdgpu_winsys_bo *bo;
+   uint64_t priority_usage;
    enum radeon_bo_usage usage;
    enum radeon_bo_domain domains;
 };
@@ -68,7 +69,7 @@ struct amdgpu_cs {
    struct amdgpu_cs_request    request;
    struct amdgpu_cs_ib_info    ib;
 
-   /* Relocs. */
+   /* Buffers. */
    unsigned                    max_num_buffers;
    unsigned                    num_buffers;
    amdgpu_bo_handle            *handles;
@@ -115,7 +116,7 @@ static inline void amdgpu_fence_reference(struct pipe_fence_handle **dst,
    *rdst = rsrc;
 }
 
-int amdgpu_get_reloc(struct amdgpu_cs *csc, struct amdgpu_winsys_bo *bo);
+int amdgpu_lookup_buffer(struct amdgpu_cs *csc, struct amdgpu_winsys_bo *bo);
 
 static inline struct amdgpu_cs *
 amdgpu_cs(struct radeon_winsys_cs *base)
@@ -129,7 +130,7 @@ amdgpu_bo_is_referenced_by_cs(struct amdgpu_cs *cs,
 {
    int num_refs = bo->num_cs_references;
    return num_refs == bo->rws->num_cs ||
-         (num_refs && amdgpu_get_reloc(cs, bo) != -1);
+         (num_refs && amdgpu_lookup_buffer(cs, bo) != -1);
 }
 
 static inline boolean
@@ -142,7 +143,7 @@ amdgpu_bo_is_referenced_by_cs_with_usage(struct amdgpu_cs *cs,
    if (!bo->num_cs_references)
       return FALSE;
 
-   index = amdgpu_get_reloc(cs, bo);
+   index = amdgpu_lookup_buffer(cs, bo);
    if (index == -1)
       return FALSE;
 
index 2c4f990944c8cfc2746a928bb25e4d992a24c8ee..32b56f989cd2b5b8284a8141f1405243c6146b8d 100644 (file)
 /*
     This file replaces libdrm's radeon_cs_gem with our own implemention.
     It's optimized specifically for Radeon DRM.
-    Reloc writes and space checking are faster and simpler than their
+    Adding buffers and space checking are faster and simpler than their
     counterparts in libdrm (the time complexity of all the functions
     is O(1) in nearly all scenarios, thanks to hashing).
 
     It works like this:
 
-    cs_add_reloc(cs, buf, read_domain, write_domain) adds a new relocation and
+    cs_add_buffer(cs, buf, read_domain, write_domain) adds a new relocation and
     also adds the size of 'buf' to the used_gart and used_vram winsys variables
     based on the domains, which are simply or'd for the accounting purposes.
     The adding is skipped if the reloc is already present in the list, but it
@@ -58,8 +58,8 @@
     (done in the pipe driver)
 
     cs_write_reloc(cs, buf) just writes a reloc that has been added using
-    cs_add_reloc. The read_domain and write_domain parameters have been removed,
-    because we already specify them in cs_add_reloc.
+    cs_add_buffer. The read_domain and write_domain parameters have been removed,
+    because we already specify them in cs_add_buffer.
 */
 
 #include "radeon_drm_cs.h"
@@ -99,8 +99,8 @@ static boolean radeon_init_cs_context(struct radeon_cs_context *csc,
 
     csc->fd = ws->fd;
     csc->nrelocs = 512;
-    csc->relocs_bo = (struct radeon_bo**)
-                     CALLOC(1, csc->nrelocs * sizeof(struct radeon_bo*));
+    csc->relocs_bo = (struct radeon_bo_item*)
+                     CALLOC(1, csc->nrelocs * sizeof(csc->relocs_bo[0]));
     if (!csc->relocs_bo) {
         return FALSE;
     }
@@ -139,8 +139,8 @@ static void radeon_cs_context_cleanup(struct radeon_cs_context *csc)
     unsigned i;
 
     for (i = 0; i < csc->crelocs; i++) {
-        p_atomic_dec(&csc->relocs_bo[i]->num_cs_references);
-        radeon_bo_reference(&csc->relocs_bo[i], NULL);
+        p_atomic_dec(&csc->relocs_bo[i].bo->num_cs_references);
+        radeon_bo_reference(&csc->relocs_bo[i].bo, NULL);
     }
 
     csc->crelocs = 0;
@@ -221,21 +221,21 @@ static inline void update_reloc(struct drm_radeon_cs_reloc *reloc,
     reloc->flags = MAX2(reloc->flags, priority);
 }
 
-int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo)
+int radeon_lookup_buffer(struct radeon_cs_context *csc, struct radeon_bo *bo)
 {
     unsigned hash = bo->handle & (Elements(csc->reloc_indices_hashlist)-1);
     int i = csc->reloc_indices_hashlist[hash];
 
     /* not found or found */
-    if (i == -1 || csc->relocs_bo[i] == bo)
+    if (i == -1 || csc->relocs_bo[i].bo == bo)
         return i;
 
     /* Hash collision, look for the BO in the list of relocs linearly. */
     for (i = csc->crelocs - 1; i >= 0; i--) {
-        if (csc->relocs_bo[i] == bo) {
+        if (csc->relocs_bo[i].bo == bo) {
             /* Put this reloc in the hash list.
              * This will prevent additional hash collisions if there are
-             * several consecutive get_reloc calls for the same buffer.
+             * several consecutive lookup_buffer calls for the same buffer.
              *
              * Example: Assuming buffers A,B,C collide in the hash list,
              * the following sequence of relocs:
@@ -249,7 +249,7 @@ int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo)
     return -1;
 }
 
-static unsigned radeon_add_reloc(struct radeon_drm_cs *cs,
+static unsigned radeon_add_buffer(struct radeon_drm_cs *cs,
                                  struct radeon_bo *bo,
                                  enum radeon_bo_usage usage,
                                  enum radeon_bo_domain domains,
@@ -263,16 +263,17 @@ static unsigned radeon_add_reloc(struct radeon_drm_cs *cs,
     enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0;
     int i = -1;
 
-    priority = MIN2(priority, 15);
+    assert(priority < 64);
     *added_domains = 0;
 
-    i = radeon_get_reloc(csc, bo);
+    i = radeon_lookup_buffer(csc, bo);
 
     if (i >= 0) {
         reloc = &csc->relocs[i];
-        update_reloc(reloc, rd, wd, priority, added_domains);
+        update_reloc(reloc, rd, wd, priority / 4, added_domains);
+        csc->relocs_bo[i].priority_usage |= 1llu << priority;
 
-        /* For async DMA, every add_reloc call must add a buffer to the list
+        /* For async DMA, every add_buffer call must add a buffer to the list
          * no matter how many duplicates there are. This is due to the fact
          * the DMA CS checker doesn't use NOP packets for offset patching,
          * but always uses the i-th buffer from the list to patch the i-th
@@ -292,7 +293,7 @@ static unsigned radeon_add_reloc(struct radeon_drm_cs *cs,
         uint32_t size;
         csc->nrelocs += 10;
 
-        size = csc->nrelocs * sizeof(struct radeon_bo*);
+        size = csc->nrelocs * sizeof(csc->relocs_bo[0]);
         csc->relocs_bo = realloc(csc->relocs_bo, size);
 
         size = csc->nrelocs * sizeof(struct drm_radeon_cs_reloc);
@@ -302,14 +303,15 @@ static unsigned radeon_add_reloc(struct radeon_drm_cs *cs,
     }
 
     /* Initialize the new relocation. */
-    csc->relocs_bo[csc->crelocs] = NULL;
-    radeon_bo_reference(&csc->relocs_bo[csc->crelocs], bo);
+    csc->relocs_bo[csc->crelocs].bo = NULL;
+    csc->relocs_bo[csc->crelocs].priority_usage = 1llu << priority;
+    radeon_bo_reference(&csc->relocs_bo[csc->crelocs].bo, bo);
     p_atomic_inc(&bo->num_cs_references);
     reloc = &csc->relocs[csc->crelocs];
     reloc->handle = bo->handle;
     reloc->read_domains = rd;
     reloc->write_domain = wd;
-    reloc->flags = priority;
+    reloc->flags = priority / 4;
 
     csc->reloc_indices_hashlist[hash] = csc->crelocs;
 
@@ -319,7 +321,7 @@ static unsigned radeon_add_reloc(struct radeon_drm_cs *cs,
     return csc->crelocs++;
 }
 
-static unsigned radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs,
+static unsigned radeon_drm_cs_add_buffer(struct radeon_winsys_cs *rcs,
                                         struct radeon_winsys_cs_handle *buf,
                                         enum radeon_bo_usage usage,
                                         enum radeon_bo_domain domains,
@@ -328,7 +330,8 @@ static unsigned radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs,
     struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
     struct radeon_bo *bo = (struct radeon_bo*)buf;
     enum radeon_bo_domain added_domains;
-    unsigned index = radeon_add_reloc(cs, bo, usage, domains, priority, &added_domains);
+    unsigned index = radeon_add_buffer(cs, bo, usage, domains, priority,
+                                       &added_domains);
 
     if (added_domains & RADEON_DOMAIN_GTT)
         cs->csc->used_gart += bo->base.size;
@@ -338,12 +341,12 @@ static unsigned radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs,
     return index;
 }
 
-static int radeon_drm_cs_get_reloc(struct radeon_winsys_cs *rcs,
+static int radeon_drm_cs_lookup_buffer(struct radeon_winsys_cs *rcs,
                                    struct radeon_winsys_cs_handle *buf)
 {
     struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
 
-    return radeon_get_reloc(cs->csc, (struct radeon_bo*)buf);
+    return radeon_lookup_buffer(cs->csc, (struct radeon_bo*)buf);
 }
 
 static boolean radeon_drm_cs_validate(struct radeon_winsys_cs *rcs)
@@ -356,14 +359,14 @@ static boolean radeon_drm_cs_validate(struct radeon_winsys_cs *rcs)
     if (status) {
         cs->csc->validated_crelocs = cs->csc->crelocs;
     } else {
-        /* Remove lately-added relocations. The validation failed with them
+        /* Remove lately-added buffers. The validation failed with them
          * and the CS is about to be flushed because of that. Keep only
-         * the already-validated relocations. */
+         * the already-validated buffers. */
         unsigned i;
 
         for (i = cs->csc->validated_crelocs; i < cs->csc->crelocs; i++) {
-            p_atomic_dec(&cs->csc->relocs_bo[i]->num_cs_references);
-            radeon_bo_reference(&cs->csc->relocs_bo[i], NULL);
+            p_atomic_dec(&cs->csc->relocs_bo[i].bo->num_cs_references);
+            radeon_bo_reference(&cs->csc->relocs_bo[i].bo, NULL);
         }
         cs->csc->crelocs = cs->csc->validated_crelocs;
 
@@ -397,6 +400,22 @@ static boolean radeon_drm_cs_memory_below_limit(struct radeon_winsys_cs *rcs, ui
     return gtt < cs->ws->info.gart_size * 0.7;
 }
 
+static unsigned radeon_drm_cs_get_buffer_list(struct radeon_winsys_cs *rcs,
+                                              struct radeon_bo_list_item *list)
+{
+    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
+    int i;
+
+    if (list) {
+        for (i = 0; i < cs->csc->crelocs; i++) {
+            pb_reference(&list[i].buf, &cs->csc->relocs_bo[i].bo->base);
+            list[i].vm_address = cs->csc->relocs_bo[i].bo->va;
+            list[i].priority_usage = cs->csc->relocs_bo[i].priority_usage;
+        }
+    }
+    return cs->csc->crelocs;
+}
+
 void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_drm_cs *cs, struct radeon_cs_context *csc)
 {
     unsigned i;
@@ -425,7 +444,7 @@ void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_drm_cs *cs, struct radeon_cs
     }
 
     for (i = 0; i < csc->crelocs; i++)
-        p_atomic_dec(&csc->relocs_bo[i]->num_active_ioctls);
+        p_atomic_dec(&csc->relocs_bo[i].bo->num_active_ioctls);
 
     radeon_cs_context_cleanup(csc);
 }
@@ -513,7 +532,7 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs,
 
         for (i = 0; i < crelocs; i++) {
             /* Update the number of active asynchronous CS ioctls for the buffer. */
-            p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls);
+            p_atomic_inc(&cs->cst->relocs_bo[i].bo->num_active_ioctls);
         }
 
         switch (cs->base.ring_type) {
@@ -607,7 +626,7 @@ static boolean radeon_bo_is_referenced(struct radeon_winsys_cs *rcs,
     if (!bo->num_cs_references)
         return FALSE;
 
-    index = radeon_get_reloc(cs->csc, bo);
+    index = radeon_lookup_buffer(cs->csc, bo);
     if (index == -1)
         return FALSE;
 
@@ -631,9 +650,9 @@ radeon_cs_create_fence(struct radeon_winsys_cs *rcs)
     fence = cs->ws->base.buffer_create(&cs->ws->base, 1, 1, TRUE,
                                        RADEON_DOMAIN_GTT, 0);
     /* Add the fence as a dummy relocation. */
-    cs->ws->base.cs_add_reloc(rcs, cs->ws->base.buffer_get_cs_handle(fence),
+    cs->ws->base.cs_add_buffer(rcs, cs->ws->base.buffer_get_cs_handle(fence),
                               RADEON_USAGE_READWRITE, RADEON_DOMAIN_GTT,
-                              RADEON_PRIO_MIN);
+                              RADEON_PRIO_FENCE);
     return (struct pipe_fence_handle*)fence;
 }
 
@@ -657,10 +676,11 @@ void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws)
     ws->base.ctx_destroy = radeon_drm_ctx_destroy;
     ws->base.cs_create = radeon_drm_cs_create;
     ws->base.cs_destroy = radeon_drm_cs_destroy;
-    ws->base.cs_add_reloc = radeon_drm_cs_add_reloc;
-    ws->base.cs_get_reloc = radeon_drm_cs_get_reloc;
+    ws->base.cs_add_buffer = radeon_drm_cs_add_buffer;
+    ws->base.cs_lookup_buffer = radeon_drm_cs_lookup_buffer;
     ws->base.cs_validate = radeon_drm_cs_validate;
     ws->base.cs_memory_below_limit = radeon_drm_cs_memory_below_limit;
+    ws->base.cs_get_buffer_list = radeon_drm_cs_get_buffer_list;
     ws->base.cs_flush = radeon_drm_cs_flush;
     ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced;
     ws->base.cs_sync_flush = radeon_drm_cs_sync_flush;
index 6ceb8e98ee71959de92378b87724f7fe653bea3a..81f66f56d99dab07dde74e84e9a4d7a19c1e2621 100644 (file)
 
 #include "radeon_drm_bo.h"
 
+struct radeon_bo_item {
+    struct radeon_bo    *bo;
+    uint64_t            priority_usage;
+};
+
 struct radeon_cs_context {
     uint32_t                    buf[16 * 1024];
 
@@ -40,12 +45,13 @@ struct radeon_cs_context {
 
     uint32_t                    cs_trace_id;
 
-    /* Relocs. */
+    /* Buffers. */
     unsigned                    nrelocs;
     unsigned                    crelocs;
     unsigned                    validated_crelocs;
-    struct radeon_bo            **relocs_bo;
+    struct radeon_bo_item       *relocs_bo;
     struct drm_radeon_cs_reloc  *relocs;
+    uint64_t                    *priority_usage;
 
     int                         reloc_indices_hashlist[512];
 
@@ -77,7 +83,7 @@ struct radeon_drm_cs {
     struct radeon_bo                    *trace_buf;
 };
 
-int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo);
+int radeon_lookup_buffer(struct radeon_cs_context *csc, struct radeon_bo *bo);
 
 static inline struct radeon_drm_cs *
 radeon_drm_cs(struct radeon_winsys_cs *base)
@@ -91,7 +97,7 @@ radeon_bo_is_referenced_by_cs(struct radeon_drm_cs *cs,
 {
     int num_refs = bo->num_cs_references;
     return num_refs == bo->rws->num_cs ||
-           (num_refs && radeon_get_reloc(cs->csc, bo) != -1);
+           (num_refs && radeon_lookup_buffer(cs->csc, bo) != -1);
 }
 
 static inline boolean
@@ -103,7 +109,7 @@ radeon_bo_is_referenced_by_cs_for_write(struct radeon_drm_cs *cs,
     if (!bo->num_cs_references)
         return FALSE;
 
-    index = radeon_get_reloc(cs->csc, bo);
+    index = radeon_lookup_buffer(cs->csc, bo);
     if (index == -1)
         return FALSE;
 
index 09665df3048391e9bdde17ad84855f68590d9542..99585956a49f63744237646b5d303a4a760d7bb1 100644 (file)
@@ -54,7 +54,7 @@ void radeon_dump_cs_on_lockup(struct radeon_drm_cs *cs, struct radeon_cs_context
     }
 
     memset(&args, 0, sizeof(args));
-    args.handle = csc->relocs_bo[0]->handle;
+    args.handle = csc->relocs_bo[0].bo->handle;
     for (i = 0; i < RADEON_CS_DUMP_AFTER_MS_TIMEOUT; i++) {
         usleep(1);
         lockup = drmCommandWriteRead(csc->fd, DRM_RADEON_GEM_BUSY, &args, sizeof(args));
@@ -94,15 +94,15 @@ void radeon_dump_cs_on_lockup(struct radeon_drm_cs *cs, struct radeon_cs_context
     fprintf(dump, "\n");
 
     for (i = 0; i < csc->crelocs; i++) {
-        unsigned j, ndw = (csc->relocs_bo[i]->base.size + 3) >> 2;
+        unsigned j, ndw = (csc->relocs_bo[i].bo->base.size + 3) >> 2;
 
-        ptr = radeon_bo_do_map(csc->relocs_bo[i]);
+        ptr = radeon_bo_do_map(csc->relocs_bo[i].bo);
         if (ptr) {
             fprintf(dump, "static uint32_t bo_%04d_data[%d] = {\n   ", i, ndw);
             for (j = 0; j < ndw; j++) {
                 if (j && !(j % 8)) {
                     uint32_t offset = (j - 8) << 2;
-                    fprintf(dump, "  /* [0x%08x] va[0x%016"PRIx64"] */\n   ", offset, offset + csc->relocs_bo[i]->va);
+                    fprintf(dump, "  /* [0x%08x] va[0x%016"PRIx64"] */\n   ", offset, offset + csc->relocs_bo[i].bo->va);
                 }
                 fprintf(dump, " 0x%08x,", ptr[j]);
             }
@@ -139,16 +139,16 @@ void radeon_dump_cs_on_lockup(struct radeon_drm_cs *cs, struct radeon_cs_context
     fprintf(dump, "\n");
 
     for (i = 0; i < csc->crelocs; i++) {
-        unsigned ndw = (csc->relocs_bo[i]->base.size + 3) >> 2;
+        unsigned ndw = (csc->relocs_bo[i].bo->base.size + 3) >> 2;
         uint32_t *ptr;
 
-        ptr = radeon_bo_do_map(csc->relocs_bo[i]);
+        ptr = radeon_bo_do_map(csc->relocs_bo[i].bo);
         if (ptr) {
             fprintf(dump, "    bo[%d] = bo_new(&ctx, %d, bo_%04d_data, 0x%016"PRIx64", 0x%08x);\n",
-                    i, ndw, i, csc->relocs_bo[i]->va, csc->relocs_bo[i]->base.alignment);
+                    i, ndw, i, csc->relocs_bo[i].bo->va, csc->relocs_bo[i].bo->base.alignment);
         } else {
             fprintf(dump, "    bo[%d] = bo_new(&ctx, %d, NULL, 0x%016"PRIx64", 0x%08x);\n",
-                    i, ndw, csc->relocs_bo[i]->va, csc->relocs_bo[i]->base.alignment);
+                    i, ndw, csc->relocs_bo[i].bo->va, csc->relocs_bo[i].bo->base.alignment);
         }
     }
     fprintf(dump, "\n");
index 0cd3d2847564bab16d7d86975c139d898d83ccc6..65a26268c2e6ccf57c797c4bd49605e1fcf2482c 100644 (file)
@@ -187,7 +187,6 @@ LIBGLSL_FILES = \
        opt_constant_variable.cpp \
        opt_copy_propagation.cpp \
        opt_copy_propagation_elements.cpp \
-       opt_cse.cpp \
        opt_dead_builtin_variables.cpp \
        opt_dead_builtin_varyings.cpp \
        opt_dead_code.cpp \
index 849a8ea29fd126395ca2f7a08bd90f360e05e9f9..f38ca84d129989824841e30bc13d0079723604ab 100644 (file)
@@ -6358,7 +6358,7 @@ ast_interface_block::hir(exec_list *instructions,
          const glsl_type *block_array_type =
             process_array_type(&loc, block_type, this->array_specifier, state);
 
-          /* From section 4.3.9 (Interface Blocks) of the GLSL ES 3.10 spec:
+         /* From section 4.3.9 (Interface Blocks) of the GLSL ES 3.10 spec:
           *
           *     * Arrays of arrays of blocks are not allowed
           */
index f0abeb03215c3ffd02ebd1cdc767e759a0e78333..c1bcccc34f42c60562e8062f913257bb4b8a2e0b 100644 (file)
@@ -2786,6 +2786,17 @@ layout_defaults:
       if (!state->default_shader_storage_qualifier->merge_qualifier(& @1, state, $1)) {
          YYERROR;
       }
+
+      /* From the GLSL 4.50 spec, section 4.4.5:
+       *
+       *     "It is a compile-time error to specify the binding identifier for
+       *     the global scope or for block member declarations."
+       */
+      if (state->default_shader_storage_qualifier->flags.q.explicit_binding) {
+         _mesa_glsl_error(& @1, state,
+                          "binding qualifier cannot be set for default layout");
+      }
+
       $$ = NULL;
    }
 
index c02d28c0866dd72d959c9b5f88d685ca4ab5d2b3..692b1228ee9895f44d9478cb800f5da2c51fb150 100644 (file)
@@ -1901,7 +1901,6 @@ do_common_optimization(exec_list *ir, bool linked,
       progress = do_constant_variable_unlinked(ir) || progress;
    progress = do_constant_folding(ir) || progress;
    progress = do_minmax_prune(ir) || progress;
-   progress = do_cse(ir) || progress;
    progress = do_rebalance_tree(ir) || progress;
    progress = do_algebraic(ir, native_integers, options) || progress;
    progress = do_lower_jumps(ir) || progress;
index 112e5ffdf7ceb1b29d561040bf6659a6f69a7d60..9ef2fbf2525c38b2b735e4a41dbbeb8075112fe7 100644 (file)
@@ -1661,8 +1661,8 @@ glsl_type::std430_size(bool row_major) const
       unsigned int array_len;
 
       if (this->is_array()) {
-         element_type = this->fields.array;
-         array_len = this->length;
+         element_type = this->without_array();
+         array_len = this->arrays_of_arrays_size();
       } else {
          element_type = this;
          array_len = 1;
@@ -1685,10 +1685,12 @@ glsl_type::std430_size(bool row_major) const
    }
 
    if (this->is_array()) {
-      if (this->fields.array->is_record())
-         return this->length * this->fields.array->std430_size(row_major);
+      if (this->without_array()->is_record())
+         return this->arrays_of_arrays_size() *
+            this->without_array()->std430_size(row_major);
       else
-         return this->length * this->fields.array->std430_base_alignment(row_major);
+         return this->arrays_of_arrays_size() *
+            this->without_array()->std430_base_alignment(row_major);
    }
 
    if (this->is_record() || this->is_interface()) {
index 265b2234cb638ccde2bd4c4e83f646c236bd75b3..ce5c4929bfb0000458cb0679094c1e81b7f89e8a 100644 (file)
@@ -87,7 +87,6 @@ bool do_constant_variable_unlinked(exec_list *instructions);
 bool do_copy_propagation(exec_list *instructions);
 bool do_copy_propagation_elements(exec_list *instructions);
 bool do_constant_propagation(exec_list *instructions);
-bool do_cse(exec_list *instructions);
 void do_dead_builtin_varyings(struct gl_context *ctx,
                               gl_shader *producer, gl_shader *consumer,
                               unsigned num_tfeedback_decls,
index 858a7da6bb955e894df5eef5bfef93a1d4c64054..50fe76b7ea28fbd6fee39653a6722b1bf61fbe6c 100644 (file)
@@ -110,11 +110,7 @@ struct gl_uniform_storage {
     */
    bool initialized;
 
-   struct gl_opaque_uniform_index sampler[MESA_SHADER_STAGES];
-
-   struct gl_opaque_uniform_index image[MESA_SHADER_STAGES];
-
-   struct gl_opaque_uniform_index subroutine[MESA_SHADER_STAGES];
+   struct gl_opaque_uniform_index opaque[MESA_SHADER_STAGES];
 
    /**
     * Storage used by the driver for the uniform
index b0a4ec3f2fbfa93060a3a6d13828af6b47483251..e9e108a276551abf29533f460cd4c50d05772db5 100644 (file)
@@ -135,16 +135,16 @@ set_opaque_binding(gl_shader_program *prog, const char *name, int binding)
 
       if (shader) {
          if (storage->type->base_type == GLSL_TYPE_SAMPLER &&
-             storage->sampler[sh].active) {
+             storage->opaque[sh].active) {
             for (unsigned i = 0; i < elements; i++) {
-               const unsigned index = storage->sampler[sh].index + i;
+               const unsigned index = storage->opaque[sh].index + i;
                shader->SamplerUnits[index] = storage->storage[i].i;
             }
 
          } else if (storage->type->base_type == GLSL_TYPE_IMAGE &&
-                    storage->image[sh].active) {
+                    storage->opaque[sh].active) {
             for (unsigned i = 0; i < elements; i++) {
-               const unsigned index = storage->image[sh].index + i;
+               const unsigned index = storage->opaque[sh].index + i;
                shader->ImageUnits[index] = storage->storage[i].i;
             }
          }
@@ -244,8 +244,8 @@ set_uniform_initializer(void *mem_ctx, gl_shader_program *prog,
          for (int sh = 0; sh < MESA_SHADER_STAGES; sh++) {
             gl_shader *shader = prog->_LinkedShaders[sh];
 
-            if (shader && storage->sampler[sh].active) {
-               unsigned index = storage->sampler[sh].index;
+            if (shader && storage->opaque[sh].active) {
+               unsigned index = storage->opaque[sh].index;
 
                shader->SamplerUnits[index] = storage->storage[0].i;
             }
index 740b0a46aeec7aaeb55e80fd1ee39e83ab5878c4..0ccd9c8c8652b948fbe83ae2c35e663794c91cbd 100644 (file)
@@ -566,7 +566,7 @@ private:
                         struct gl_uniform_storage *uniform, const char *name)
    {
       if (base_type->is_sampler()) {
-         uniform->sampler[shader_type].active = true;
+         uniform->opaque[shader_type].active = true;
 
          /* Handle multiple samplers inside struct arrays */
          if (this->record_array_count > 1) {
@@ -586,8 +586,8 @@ private:
                /* In this case, we've already seen this uniform so we just use
                 * the next sampler index recorded the last time we visited.
                 */
-               uniform->sampler[shader_type].index = index;
-               index = inner_array_size + uniform->sampler[shader_type].index;
+               uniform->opaque[shader_type].index = index;
+               index = inner_array_size + uniform->opaque[shader_type].index;
                this->record_next_sampler->put(index, name_copy);
 
                ralloc_free(name_copy);
@@ -605,13 +605,13 @@ private:
                 * structs. This allows the offset to be easily calculated for
                 * indirect indexing.
                 */
-               uniform->sampler[shader_type].index = this->next_sampler;
+               uniform->opaque[shader_type].index = this->next_sampler;
                this->next_sampler +=
                   inner_array_size * this->record_array_count;
 
                /* Store the next index for future passes over the struct array
                 */
-               index = uniform->sampler[shader_type].index + inner_array_size;
+               index = uniform->opaque[shader_type].index + inner_array_size;
                this->record_next_sampler->put(index, name_copy);
                ralloc_free(name_copy);
             }
@@ -619,22 +619,19 @@ private:
             /* Increment the sampler by 1 for non-arrays and by the number of
              * array elements for arrays.
              */
-            uniform->sampler[shader_type].index = this->next_sampler;
+            uniform->opaque[shader_type].index = this->next_sampler;
             this->next_sampler += MAX2(1, uniform->array_elements);
          }
 
          const gl_texture_index target = base_type->sampler_index();
          const unsigned shadow = base_type->sampler_shadow;
-         for (unsigned i = uniform->sampler[shader_type].index;
+         for (unsigned i = uniform->opaque[shader_type].index;
               i < MIN2(this->next_sampler, MAX_SAMPLERS);
               i++) {
             this->targets[i] = target;
             this->shader_samplers_used |= 1U << i;
             this->shader_shadow_samplers |= shadow << i;
          }
-      } else {
-         uniform->sampler[shader_type].index = ~0;
-         uniform->sampler[shader_type].active = false;
       }
    }
 
@@ -642,17 +639,14 @@ private:
                       struct gl_uniform_storage *uniform)
    {
       if (base_type->is_image()) {
-         uniform->image[shader_type].index = this->next_image;
-         uniform->image[shader_type].active = true;
+         uniform->opaque[shader_type].index = this->next_image;
+         uniform->opaque[shader_type].active = true;
 
          /* Increment the image index by 1 for non-arrays and by the
           * number of array elements for arrays.
           */
          this->next_image += MAX2(1, uniform->array_elements);
 
-      } else {
-         uniform->image[shader_type].index = ~0;
-         uniform->image[shader_type].active = false;
       }
    }
 
@@ -660,17 +654,14 @@ private:
                            struct gl_uniform_storage *uniform)
    {
       if (base_type->is_subroutine()) {
-         uniform->subroutine[shader_type].index = this->next_subroutine;
-         uniform->subroutine[shader_type].active = true;
+         uniform->opaque[shader_type].index = this->next_subroutine;
+         uniform->opaque[shader_type].active = true;
 
          /* Increment the subroutine index by 1 for non-arrays and by the
           * number of array elements for arrays.
           */
          this->next_subroutine += MAX2(1, uniform->array_elements);
 
-      } else {
-         uniform->subroutine[shader_type].index = ~0;
-         uniform->subroutine[shader_type].active = false;
       }
    }
 
@@ -738,13 +729,17 @@ private:
         base_type = type;
       }
 
+      /* Initialise opaque data */
+      this->uniforms[id].opaque[shader_type].index = ~0;
+      this->uniforms[id].opaque[shader_type].active = false;
+
       /* This assigns uniform indices to sampler and image uniforms. */
       handle_samplers(base_type, &this->uniforms[id], name);
       handle_images(base_type, &this->uniforms[id]);
       handle_subroutines(base_type, &this->uniforms[id]);
 
       /* For array of arrays or struct arrays the base location may have
-       * already been set so dont set it again.
+       * already been set so don't set it again.
        */
       if (ubo_block_index == -1 && current_var->data.location == -1) {
          current_var->data.location = id;
@@ -769,7 +764,7 @@ private:
                this->explicit_location + field_counter;
             field_counter += entries;
          } else {
-         this->uniforms[id].remap_location = this->explicit_location;
+            this->uniforms[id].remap_location = this->explicit_location;
          }
       } else {
          /* Initialize to to indicate that no location is set */
@@ -820,12 +815,13 @@ private:
         if (type->without_array()->is_matrix()) {
             const glsl_type *matrix = type->without_array();
             const unsigned N = matrix->base_type == GLSL_TYPE_DOUBLE ? 8 : 4;
-            const unsigned items = row_major ? matrix->matrix_columns : matrix->vector_elements;
+            const unsigned items =
+               row_major ? matrix->matrix_columns : matrix->vector_elements;
 
             assert(items <= 4);
             if (packing == GLSL_INTERFACE_PACKING_STD430)
                this->uniforms[id].matrix_stride = items < 3 ? items * N :
-                                                          glsl_align(items * N, 16);
+                                                    glsl_align(items * N, 16);
             else
                this->uniforms[id].matrix_stride = glsl_align(items * N, 16);
            this->uniforms[id].row_major = row_major;
@@ -1029,7 +1025,7 @@ link_set_image_access_qualifiers(struct gl_shader_program *prog)
             assert(found);
             (void) found;
             const gl_uniform_storage *storage = &prog->UniformStorage[id];
-            const unsigned index = storage->image[i].index;
+            const unsigned index = storage->opaque[i].index;
             const GLenum access = (var->data.image_read_only ? GL_READ_ONLY :
                                    var->data.image_write_only ? GL_WRITE_ONLY :
                                    GL_READ_WRITE);
@@ -1159,7 +1155,8 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
       foreach_in_list(ir_instruction, node, prog->_LinkedShaders[i]->ir) {
         ir_variable *const var = node->as_variable();
 
-        if ((var == NULL) || (var->data.mode != ir_var_uniform && var->data.mode != ir_var_shader_storage))
+         if ((var == NULL) || (var->data.mode != ir_var_uniform &&
+                               var->data.mode != ir_var_shader_storage))
            continue;
 
         parcel.set_and_process(prog, var);
@@ -1168,7 +1165,8 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
       prog->_LinkedShaders[i]->active_samplers = parcel.shader_samplers_used;
       prog->_LinkedShaders[i]->shadow_samplers = parcel.shader_shadow_samplers;
 
-      STATIC_ASSERT(sizeof(prog->_LinkedShaders[i]->SamplerTargets) == sizeof(parcel.targets));
+      STATIC_ASSERT(sizeof(prog->_LinkedShaders[i]->SamplerTargets) ==
+                    sizeof(parcel.targets));
       memcpy(prog->_LinkedShaders[i]->SamplerTargets, parcel.targets,
              sizeof(prog->_LinkedShaders[i]->SamplerTargets));
    }
@@ -1238,7 +1236,7 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
          if (!sh)
             continue;
 
-         if (!uniforms[i].subroutine[j].active)
+         if (!uniforms[i].opaque[j].active)
             continue;
 
          /* How many new entries for this uniform? */
@@ -1268,7 +1266,7 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
          if (!sh)
             continue;
 
-         if (!uniforms[i].subroutine[j].active)
+         if (!uniforms[i].opaque[j].active)
             continue;
 
          sh->SubroutineUniformRemapTable =
index dbf300ac6915f32a86592d4257ad6104ba00ecd0..a97b4ef0a32d11e43268fe94a2b058bdd1eb8c8d 100644 (file)
@@ -2132,7 +2132,7 @@ link_intrastage_shaders(void *mem_ctx,
 
 
    if (!ok) {
-      ctx->Driver.DeleteShader(ctx, linked);
+      _mesa_delete_shader(ctx, linked);
       return NULL;
    }
 
@@ -3421,10 +3421,13 @@ build_program_resource_list(struct gl_shader_program *shProg)
    if (input_stage == MESA_SHADER_STAGES && output_stage == 0)
       return;
 
-   if (!add_packed_varyings(shProg, input_stage))
-      return;
-   if (!add_packed_varyings(shProg, output_stage))
-      return;
+   /* Program interface needs to expose varyings in case of SSO. */
+   if (shProg->SeparateShader) {
+      if (!add_packed_varyings(shProg, input_stage))
+         return;
+      if (!add_packed_varyings(shProg, output_stage))
+         return;
+   }
 
    /* Add inputs and outputs to the resource list. */
    if (!add_interface_variables(shProg, shProg->_LinkedShaders[input_stage]->ir,
@@ -3497,7 +3500,7 @@ build_program_resource_list(struct gl_shader_program *shProg)
          continue;
 
       for (int j = MESA_SHADER_VERTEX; j < MESA_SHADER_STAGES; j++) {
-         if (!shProg->UniformStorage[i].subroutine[j].active)
+         if (!shProg->UniformStorage[i].opaque[j].active)
             continue;
 
          type = _mesa_shader_stage_to_subroutine_uniform((gl_shader_stage)j);
@@ -3732,7 +3735,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
 
    for (unsigned int i = 0; i < MESA_SHADER_STAGES; i++) {
       if (prog->_LinkedShaders[i] != NULL)
-        ctx->Driver.DeleteShader(ctx, prog->_LinkedShaders[i]);
+        _mesa_delete_shader(ctx, prog->_LinkedShaders[i]);
 
       prog->_LinkedShaders[i] = NULL;
    }
@@ -3747,7 +3750,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
 
          if (!prog->LinkStatus) {
             if (sh)
-               ctx->Driver.DeleteShader(ctx, sh);
+               _mesa_delete_shader(ctx, sh);
             goto done;
          }
 
@@ -3770,7 +3773,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
          }
          if (!prog->LinkStatus) {
             if (sh)
-               ctx->Driver.DeleteShader(ctx, sh);
+               _mesa_delete_shader(ctx, sh);
             goto done;
          }
 
index 6cd23340e3f32d3e85199ead689e25cff7136ed3..6bedb4eb8e60edeb5b96cf1e17d27d848c2f7257 100644 (file)
@@ -166,6 +166,8 @@ glsl_to_nir(const struct gl_shader_program *shader_prog,
    shader->info.separate_shader = shader_prog->SeparateShader;
    shader->info.gs.vertices_out = sh->Geom.VerticesOut;
    shader->info.gs.invocations = sh->Geom.Invocations;
+   shader->info.has_transform_feedback_varyings =
+      shader_prog->TransformFeedback.NumVarying > 0;
 
    return shader;
 }
index 88ba363ae196c216acb59463b5c103a7f5f96dbd..dbd73f15b2208a9e985a97dde5666e792bd1c426 100644 (file)
@@ -1513,6 +1513,9 @@ typedef struct nir_shader_info {
    /* Whether or not separate shader objects were used */
    bool separate_shader;
 
+   /** Was this shader linked with any transform feedback varyings? */
+   bool has_transform_feedback_varyings;
+
    struct {
       /** The maximum number of vertices the geometry shader might write. */
       unsigned vertices_out;
index 44eff3b4da54acf3c5fdbdd56c445513f95599e6..1fd13cfc51082a4e676401a93d39f83a079ae630 100644 (file)
@@ -229,6 +229,7 @@ SYSTEM_VALUE(num_work_groups, 3, 0)
 LOAD(uniform, 0, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
 LOAD(ubo, 1, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
 LOAD(input, 0, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+LOAD(per_vertex_input, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
 LOAD(ssbo, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)
 
 /*
index 30fad855e6f1395c35a92c8bf5db467f45d6411f..688b48f4675e086ca80d52b2d2fed00fc5b1607a 100644 (file)
@@ -63,31 +63,46 @@ nir_assign_var_locations(struct exec_list *var_list, unsigned *size,
    *size = location;
 }
 
+/**
+ * Returns true if we're processing a stage whose inputs are arrays indexed
+ * by a vertex number (such as geometry shader inputs).
+ */
 static bool
-deref_has_indirect(nir_deref_var *deref)
+stage_uses_per_vertex_inputs(struct lower_io_state *state)
 {
-   for (nir_deref *tail = deref->deref.child; tail; tail = tail->child) {
-      if (tail->deref_type == nir_deref_type_array) {
-         nir_deref_array *arr = nir_deref_as_array(tail);
-         if (arr->deref_array_type == nir_deref_array_type_indirect)
-            return true;
-      }
-   }
-
-   return false;
+   gl_shader_stage stage = state->builder.shader->stage;
+   return stage == MESA_SHADER_GEOMETRY;
 }
 
 static unsigned
-get_io_offset(nir_deref_var *deref, nir_instr *instr, nir_src *indirect,
+get_io_offset(nir_deref_var *deref, nir_instr *instr,
+              nir_ssa_def **vertex_index,
+              nir_ssa_def **out_indirect,
               struct lower_io_state *state)
 {
-   bool found_indirect = false;
+   nir_ssa_def *indirect = NULL;
    unsigned base_offset = 0;
 
    nir_builder *b = &state->builder;
    b->cursor = nir_before_instr(instr);
 
    nir_deref *tail = &deref->deref;
+
+   /* For per-vertex input arrays (i.e. geometry shader inputs), keep the
+    * outermost array index separate.  Process the rest normally.
+    */
+   if (vertex_index != NULL) {
+      tail = tail->child;
+      assert(tail->deref_type == nir_deref_type_array);
+      nir_deref_array *deref_array = nir_deref_as_array(tail);
+
+      nir_ssa_def *vtx = nir_imm_int(b, deref_array->base_offset);
+      if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
+         vtx = nir_iadd(b, vtx, nir_ssa_for_src(b, deref_array->indirect, 1));
+      }
+      *vertex_index = vtx;
+   }
+
    while (tail->child != NULL) {
       const struct glsl_type *parent_type = tail->type;
       tail = tail->child;
@@ -103,14 +118,7 @@ get_io_offset(nir_deref_var *deref, nir_instr *instr, nir_src *indirect,
                nir_imul(b, nir_imm_int(b, size),
                         nir_ssa_for_src(b, deref_array->indirect, 1));
 
-            if (found_indirect) {
-               indirect->ssa =
-                  nir_iadd(b, nir_ssa_for_src(b, *indirect, 1), mul);
-            } else {
-               indirect->ssa = mul;
-            }
-            indirect->is_ssa = true;
-            found_indirect = true;
+            indirect = indirect ? nir_iadd(b, indirect, mul) : mul;
          }
       } else if (tail->deref_type == nir_deref_type_struct) {
          nir_deref_struct *deref_struct = nir_deref_as_struct(tail);
@@ -122,17 +130,24 @@ get_io_offset(nir_deref_var *deref, nir_instr *instr, nir_src *indirect,
       }
    }
 
+   *out_indirect = indirect;
    return base_offset;
 }
 
 static nir_intrinsic_op
-load_op(nir_variable_mode mode, bool has_indirect)
+load_op(struct lower_io_state *state,
+        nir_variable_mode mode, bool per_vertex, bool has_indirect)
 {
    nir_intrinsic_op op;
    switch (mode) {
    case nir_var_shader_in:
-      op = has_indirect ? nir_intrinsic_load_input_indirect :
-                          nir_intrinsic_load_input;
+      if (per_vertex) {
+         op = has_indirect ? nir_intrinsic_load_per_vertex_input_indirect :
+                             nir_intrinsic_load_per_vertex_input;
+      } else {
+         op = has_indirect ? nir_intrinsic_load_input_indirect :
+                             nir_intrinsic_load_input;
+      }
       break;
    case nir_var_uniform:
       op = has_indirect ? nir_intrinsic_load_uniform_indirect :
@@ -169,17 +184,22 @@ nir_lower_io_block(nir_block *block, void *void_state)
          if (mode != nir_var_shader_in && mode != nir_var_uniform)
             continue;
 
-         bool has_indirect = deref_has_indirect(intrin->variables[0]);
+         bool per_vertex = stage_uses_per_vertex_inputs(state) &&
+                           mode == nir_var_shader_in;
+
+         nir_ssa_def *indirect;
+         nir_ssa_def *vertex_index;
+
+         unsigned offset = get_io_offset(intrin->variables[0], &intrin->instr,
+                                         per_vertex ? &vertex_index : NULL,
+                                         &indirect, state);
 
          nir_intrinsic_instr *load =
             nir_intrinsic_instr_create(state->mem_ctx,
-                                       load_op(mode, has_indirect));
+                                       load_op(state, mode, per_vertex,
+                                               indirect));
          load->num_components = intrin->num_components;
 
-         nir_src indirect;
-         unsigned offset = get_io_offset(intrin->variables[0],
-                                         &intrin->instr, &indirect, state);
-
          unsigned location = intrin->variables[0]->var->data.driver_location;
          if (mode == nir_var_uniform) {
             load->const_index[0] = location;
@@ -188,8 +208,11 @@ nir_lower_io_block(nir_block *block, void *void_state)
             load->const_index[0] = location + offset;
          }
 
-         if (has_indirect)
-            load->src[0] = indirect;
+         if (per_vertex)
+            load->src[0] = nir_src_for_ssa(vertex_index);
+
+         if (indirect)
+            load->src[per_vertex ? 1 : 0] = nir_src_for_ssa(indirect);
 
          if (intrin->dest.is_ssa) {
             nir_ssa_dest_init(&load->instr, &load->dest,
@@ -209,10 +232,14 @@ nir_lower_io_block(nir_block *block, void *void_state)
          if (intrin->variables[0]->var->data.mode != nir_var_shader_out)
             continue;
 
-         bool has_indirect = deref_has_indirect(intrin->variables[0]);
+         nir_ssa_def *indirect;
+
+         unsigned offset = get_io_offset(intrin->variables[0], &intrin->instr,
+                                         NULL, &indirect, state);
+         offset += intrin->variables[0]->var->data.driver_location;
 
          nir_intrinsic_op store_op;
-         if (has_indirect) {
+         if (indirect) {
             store_op = nir_intrinsic_store_output_indirect;
          } else {
             store_op = nir_intrinsic_store_output;
@@ -221,18 +248,12 @@ nir_lower_io_block(nir_block *block, void *void_state)
          nir_intrinsic_instr *store = nir_intrinsic_instr_create(state->mem_ctx,
                                                                  store_op);
          store->num_components = intrin->num_components;
-
-         nir_src indirect;
-         unsigned offset = get_io_offset(intrin->variables[0],
-                                         &intrin->instr, &indirect, state);
-         offset += intrin->variables[0]->var->data.driver_location;
-
          store->const_index[0] = offset;
 
          nir_src_copy(&store->src[0], &intrin->src[0], store);
 
-         if (has_indirect)
-            store->src[1] = indirect;
+         if (indirect)
+            store->src[1] = nir_src_for_ssa(indirect);
 
          nir_instr_insert_before(&intrin->instr, &store->instr);
          nir_instr_remove(&intrin->instr);
index 33cd9c8b0cfc5b62ac66ce7fe21fc8a60da9bde1..aaeff2c223ef5ab33000eea614929b7821f9462e 100644 (file)
@@ -155,13 +155,13 @@ lower_sampler(nir_tex_instr *instr, const struct gl_shader_program *shader_progr
    }
 
    if (location > shader_program->NumUniformStorage - 1 ||
-       !shader_program->UniformStorage[location].sampler[stage].active) {
+       !shader_program->UniformStorage[location].opaque[stage].active) {
       assert(!"cannot return a sampler");
       return;
    }
 
    instr->sampler_index +=
-      shader_program->UniformStorage[location].sampler[stage].index;
+      shader_program->UniformStorage[location].opaque[stage].index;
 
    instr->sampler = NULL;
 }
index 3936bae078b07c525137e7a3beb746058038cc5b..09663996869784a323ece8a93530e3b1a1720e20 100644 (file)
@@ -443,6 +443,8 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
       break;
    case nir_intrinsic_load_input:
    case nir_intrinsic_load_input_indirect:
+   case nir_intrinsic_load_per_vertex_input:
+   case nir_intrinsic_load_per_vertex_input_indirect:
       var_list = &state->shader->inputs;
       break;
    case nir_intrinsic_store_output:
index d3549756a639e04267465681fd7ba2cc3f17e175..b6ce43b5224167a4344e6426f61f272f87069bd5 100644 (file)
@@ -154,6 +154,8 @@ nir_sweep(nir_shader *nir)
    /* First, move ownership of all the memory to a temporary context; assume dead. */
    ralloc_adopt(rubbish, nir);
 
+   ralloc_steal(nir, (char *)nir->info.name);
+
    /* Variables and registers are not dead.  Steal them back. */
    steal_list(nir, nir_variable, &nir->uniforms);
    steal_list(nir, nir_variable, &nir->inputs);
diff --git a/src/glsl/opt_cse.cpp b/src/glsl/opt_cse.cpp
deleted file mode 100644 (file)
index 4b8e9a0..0000000
+++ /dev/null
@@ -1,472 +0,0 @@
-/*
- * Copyright © 2013 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file opt_cse.cpp
- *
- * constant subexpression elimination at the GLSL IR level.
- *
- * Compare to brw_fs_cse.cpp for a more complete CSE implementation.  This one
- * is generic and handles texture operations, but it's rather simple currently
- * and doesn't support modification of variables in the available expressions
- * list, so it can't do variables other than uniforms or shader inputs.
- */
-
-#include "ir.h"
-#include "ir_visitor.h"
-#include "ir_rvalue_visitor.h"
-#include "ir_basic_block.h"
-#include "ir_optimization.h"
-#include "ir_builder.h"
-#include "glsl_types.h"
-
-using namespace ir_builder;
-
-static bool debug = false;
-
-namespace {
-
-/**
- * This is the record of an available expression for common subexpression
- * elimination.
- */
-class ae_entry : public exec_node
-{
-public:
-   ae_entry(ir_instruction *base_ir, ir_rvalue **val)
-      : val(val), base_ir(base_ir)
-   {
-      assert(val);
-      assert(*val);
-      assert(base_ir);
-
-      var = NULL;
-   }
-
-   void init(ir_instruction *base_ir, ir_rvalue **val)
-   {
-      this->val = val;
-      this->base_ir = base_ir;
-      this->var = NULL;
-
-      assert(val);
-      assert(*val);
-      assert(base_ir);
-   }
-
-   /**
-    * The pointer to the expression that we might be able to reuse
-    *
-    * Note the double pointer -- this is the place in the base_ir expression
-    * tree that we would rewrite to move the expression out to a new variable
-    * assignment.
-    */
-   ir_rvalue **val;
-
-   /**
-    * Root instruction in the basic block where the expression appeared.
-    *
-    * This is used so that we can insert the new variable declaration into the
-    * instruction stream (since *val is just somewhere in base_ir's expression
-    * tree).
-    */
-   ir_instruction *base_ir;
-
-   /**
-    * The variable that the expression has been stored in, if it's been CSEd
-    * once already.
-    */
-   ir_variable *var;
-};
-
-class cse_visitor : public ir_rvalue_visitor {
-public:
-   cse_visitor(exec_list *validate_instructions)
-      : validate_instructions(validate_instructions)
-   {
-      progress = false;
-      mem_ctx = ralloc_context(NULL);
-      this->ae = new(mem_ctx) exec_list;
-   }
-   ~cse_visitor()
-   {
-      ralloc_free(mem_ctx);
-   }
-
-   virtual ir_visitor_status visit_enter(ir_function_signature *ir);
-   virtual ir_visitor_status visit_enter(ir_loop *ir);
-   virtual ir_visitor_status visit_enter(ir_if *ir);
-   virtual ir_visitor_status visit_enter(ir_call *ir);
-   virtual void handle_rvalue(ir_rvalue **rvalue);
-
-   bool progress;
-
-private:
-   void *mem_ctx;
-
-   ir_rvalue *try_cse(ir_rvalue *rvalue);
-   void add_to_ae(ir_rvalue **rvalue);
-
-   /**
-    * Move all nodes from the ae list to the free list
-    */
-   void empty_ae_list();
-
-   /**
-    * Get and initialize a new ae_entry
-    *
-    * This will either come from the free list or be freshly allocated.
-    */
-   ae_entry *get_ae_entry(ir_rvalue **rvalue);
-
-   /** List of ae_entry: The available expressions to reuse */
-   exec_list *ae;
-
-   /**
-    * The whole shader, so that we can validate_ir_tree in debug mode.
-    *
-    * This proved quite useful when trying to get the tree manipulation
-    * right.
-    */
-   exec_list *validate_instructions;
-
-   /**
-    * List of available-for-use ae_entry objects.
-    */
-   exec_list free_ae_entries;
-};
-
-/**
- * Visitor to walk an expression tree to check that all variables referenced
- * are constants.
- */
-class is_cse_candidate_visitor : public ir_hierarchical_visitor
-{
-public:
-
-   is_cse_candidate_visitor()
-      : ok(true)
-   {
-   }
-
-   virtual ir_visitor_status visit(ir_dereference_variable *ir);
-
-   bool ok;
-};
-
-
-class contains_rvalue_visitor : public ir_rvalue_visitor
-{
-public:
-
-   contains_rvalue_visitor(ir_rvalue *val)
-      : val(val)
-   {
-      found = false;
-   }
-
-   virtual void handle_rvalue(ir_rvalue **rvalue);
-
-   bool found;
-
-private:
-   ir_rvalue *val;
-};
-
-} /* unnamed namespace */
-
-static void
-dump_ae(exec_list *ae)
-{
-   int i = 0;
-
-   printf("CSE: AE contents:\n");
-   foreach_in_list(ae_entry, entry, ae) {
-      printf("CSE:   AE %2d (%p): ", i, entry);
-      (*entry->val)->print();
-      printf("\n");
-
-      if (entry->var)
-         printf("CSE:     in var %p:\n", entry->var);
-
-      i++;
-   }
-}
-
-ir_visitor_status
-is_cse_candidate_visitor::visit(ir_dereference_variable *ir)
-{
-   /* Currently, since we don't handle kills of the ae based on variables
-    * getting assigned, we can only handle constant variables.
-    */
-   if (ir->var->data.read_only) {
-      return visit_continue;
-   } else {
-      if (debug)
-         printf("CSE: non-candidate: var %s is not read only\n", ir->var->name);
-      ok = false;
-      return visit_stop;
-   }
-}
-
-void
-contains_rvalue_visitor::handle_rvalue(ir_rvalue **rvalue)
-{
-   if (*rvalue == val)
-      found = true;
-}
-
-static bool
-contains_rvalue(ir_rvalue *haystack, ir_rvalue *needle)
-{
-   contains_rvalue_visitor v(needle);
-   haystack->accept(&v);
-   return v.found;
-}
-
-static bool
-is_cse_candidate(ir_rvalue *ir)
-{
-   /* Our temporary variable assignment generation isn't ready to handle
-    * anything bigger than a vector.
-    */
-   if (!ir->type->is_vector() && !ir->type->is_scalar()) {
-      if (debug)
-         printf("CSE: non-candidate: not a vector/scalar\n");
-      return false;
-   }
-
-   /* Only handle expressions and textures currently.  We may want to extend
-    * to variable-index array dereferences at some point.
-    */
-   switch (ir->ir_type) {
-   case ir_type_expression:
-   case ir_type_texture:
-      break;
-   default:
-      if (debug)
-         printf("CSE: non-candidate: not an expression/texture\n");
-      return false;
-   }
-
-   is_cse_candidate_visitor v;
-
-   ir->accept(&v);
-
-   return v.ok;
-}
-
-/**
- * Tries to find and return a reference to a previous computation of a given
- * expression.
- *
- * Walk the list of available expressions checking if any of them match the
- * rvalue, and if so, move the previous copy of the expression to a temporary
- * and return a reference of the temporary.
- */
-ir_rvalue *
-cse_visitor::try_cse(ir_rvalue *rvalue)
-{
-   foreach_in_list(ae_entry, entry, ae) {
-      if (debug) {
-         printf("Comparing to AE %p: ", entry);
-         (*entry->val)->print();
-         printf("\n");
-      }
-
-      if (!rvalue->equals(*entry->val))
-         continue;
-
-      if (debug) {
-         printf("CSE: Replacing: ");
-         (*entry->val)->print();
-         printf("\n");
-         printf("CSE:      with: ");
-         rvalue->print();
-         printf("\n");
-      }
-
-      if (!entry->var) {
-         ir_instruction *base_ir = entry->base_ir;
-
-         ir_variable *var = new(rvalue) ir_variable(rvalue->type,
-                                                    "cse",
-                                                    ir_var_temporary);
-
-         /* Write the previous expression result into a new variable. */
-         base_ir->insert_before(var);
-         ir_assignment *assignment = assign(var, *entry->val);
-         base_ir->insert_before(assignment);
-
-         /* Replace the expression in the original tree with a deref of the
-          * variable, but keep tracking the expression for further reuse.
-          */
-         *entry->val = new(rvalue) ir_dereference_variable(var);
-         entry->val = &assignment->rhs;
-
-         entry->var = var;
-
-         /* Update the base_irs in the AE list.  We have to be sure that
-          * they're correct -- expressions from our base_ir that weren't moved
-          * need to stay in this base_ir (so that later consumption of them
-          * puts new variables between our new variable and our base_ir), but
-          * expressions from our base_ir that we *did* move need base_ir
-          * updated so that any further elimination from inside gets its new
-          * assignments put before our new assignment.
-          */
-         foreach_in_list(ae_entry, fixup_entry, ae) {
-            if (contains_rvalue(assignment->rhs, *fixup_entry->val))
-               fixup_entry->base_ir = assignment;
-         }
-
-         if (debug)
-            dump_ae(ae);
-      }
-
-      /* Replace the expression in our current tree with the variable. */
-      return new(rvalue) ir_dereference_variable(entry->var);
-   }
-
-   return NULL;
-}
-
-void
-cse_visitor::empty_ae_list()
-{
-   free_ae_entries.append_list(ae);
-}
-
-ae_entry *
-cse_visitor::get_ae_entry(ir_rvalue **rvalue)
-{
-   ae_entry *entry = (ae_entry *) free_ae_entries.pop_head();
-   if (entry) {
-      entry->init(base_ir, rvalue);
-   } else {
-      entry = new(mem_ctx) ae_entry(base_ir, rvalue);
-   }
-
-   return entry;
-}
-
-/** Add the rvalue to the list of available expressions for CSE. */
-void
-cse_visitor::add_to_ae(ir_rvalue **rvalue)
-{
-   if (debug) {
-      printf("CSE: Add to AE: ");
-      (*rvalue)->print();
-      printf("\n");
-   }
-
-   ae->push_tail(get_ae_entry(rvalue));
-
-   if (debug)
-      dump_ae(ae);
-}
-
-void
-cse_visitor::handle_rvalue(ir_rvalue **rvalue)
-{
-   if (!*rvalue)
-      return;
-
-   if (debug) {
-      printf("CSE: handle_rvalue ");
-      (*rvalue)->print();
-      printf("\n");
-   }
-
-   if (!is_cse_candidate(*rvalue))
-      return;
-
-   ir_rvalue *new_rvalue = try_cse(*rvalue);
-   if (new_rvalue) {
-      *rvalue = new_rvalue;
-      progress = true;
-
-      if (debug)
-         validate_ir_tree(validate_instructions);
-   } else {
-      add_to_ae(rvalue);
-   }
-}
-
-ir_visitor_status
-cse_visitor::visit_enter(ir_if *ir)
-{
-   handle_rvalue(&ir->condition);
-
-   empty_ae_list();
-   visit_list_elements(this, &ir->then_instructions);
-
-   empty_ae_list();
-   visit_list_elements(this, &ir->else_instructions);
-
-   empty_ae_list();
-   return visit_continue_with_parent;
-}
-
-ir_visitor_status
-cse_visitor::visit_enter(ir_function_signature *ir)
-{
-   empty_ae_list();
-   visit_list_elements(this, &ir->body);
-
-   empty_ae_list();
-   return visit_continue_with_parent;
-}
-
-ir_visitor_status
-cse_visitor::visit_enter(ir_loop *ir)
-{
-   empty_ae_list();
-   visit_list_elements(this, &ir->body_instructions);
-
-   empty_ae_list();
-   return visit_continue_with_parent;
-}
-
-ir_visitor_status
-cse_visitor::visit_enter(ir_call *)
-{
-   /* Because call is an exec_list of ir_rvalues, handle_rvalue gets passed a
-    * pointer to the (ir_rvalue *) on the stack.  Since we save those pointers
-    * in the AE list, we can't let handle_rvalue get called.
-    */
-   return visit_continue_with_parent;
-}
-
-/**
- * Does a (uniform-value) constant subexpression elimination pass on the code
- * present in the instruction stream.
- */
-bool
-do_cse(exec_list *instructions)
-{
-   cse_visitor v(instructions);
-
-   visit_list_elements(&v, instructions);
-
-   return v.progress;
-}
index 1af50d6b68de6d0648e1e98788b34fed13b0bcc4..05140192893a3b107b2fcfb1cda87fa8294268e8 100644 (file)
@@ -91,6 +91,14 @@ _mesa_new_shader(struct gl_context *ctx, GLuint name, GLenum type)
    return shader;
 }
 
+void
+_mesa_delete_shader(struct gl_context *ctx, struct gl_shader *sh)
+{
+   free((void *)sh->Source);
+   free(sh->Label);
+   ralloc_free(sh);
+}
+
 void
 _mesa_clear_shader_program_data(struct gl_shader_program *shProg)
 {
index dc6fb640f15b1ef762f0768e4ba9f0ac8d962664..a9ca5e4e3d30c50fe04e4cd2890a4b01ab8c9835 100644 (file)
@@ -44,6 +44,9 @@ _mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr,
 extern "C" struct gl_shader *
 _mesa_new_shader(struct gl_context *ctx, GLuint name, GLenum type);
 
+extern "C" void
+_mesa_delete_shader(struct gl_context *ctx, struct gl_shader *sh);
+
 extern "C" void
 _mesa_clear_shader_program_data(struct gl_shader_program *);
 
index 91227d9487af5fc6957cad3317d9579476778809..0b1f66cb34286fa66a195b8fcd2745d5be46740d 100644 (file)
@@ -117,8 +117,8 @@ establish_uniform_storage(struct gl_shader_program *prog, unsigned num_storage,
    prog->UniformStorage[index_to_set].array_elements = array_size;
    prog->UniformStorage[index_to_set].initialized = false;
    for (int sh = 0; sh < MESA_SHADER_STAGES; sh++) {
-      prog->UniformStorage[index_to_set].sampler[sh].index = ~0;
-      prog->UniformStorage[index_to_set].sampler[sh].active = false;
+      prog->UniformStorage[index_to_set].opaque[sh].index = ~0;
+      prog->UniformStorage[index_to_set].opaque[sh].active = false;
    }
    prog->UniformStorage[index_to_set].num_driver_storage = 0;
    prog->UniformStorage[index_to_set].driver_storage = NULL;
@@ -138,8 +138,8 @@ establish_uniform_storage(struct gl_shader_program *prog, unsigned num_storage,
       prog->UniformStorage[i].array_elements = 0;
       prog->UniformStorage[i].initialized = false;
       for (int sh = 0; sh < MESA_SHADER_STAGES; sh++) {
-         prog->UniformStorage[i].sampler[sh].index = ~0;
-         prog->UniformStorage[i].sampler[sh].active = false;
+         prog->UniformStorage[i].opaque[sh].index = ~0;
+         prog->UniformStorage[i].opaque[sh].active = false;
       }
       prog->UniformStorage[i].num_driver_storage = 0;
       prog->UniformStorage[i].driver_storage = NULL;
index 4266d5cc67b8360908ef31788c79e6ef98606d57..f8f38ca2ec55d60ff0ce6ca06456985c225a5c44 100644 (file)
 #define __GLX_PUT_CHAR(offset,a)                \
    *((INT8 *) (pc + offset)) = a
 
-#ifndef _CRAY
 #define __GLX_PUT_SHORT(offset,a)               \
    *((INT16 *) (pc + offset)) = a
 
 #define __GLX_PUT_FLOAT(offset,a)               \
    *((FLOAT32 *) (pc + offset)) = a
 
-#else
-#define __GLX_PUT_SHORT(offset,a)               \
-   { GLubyte *cp = (pc+offset);                 \
-      int shift = (64-16) - ((int)(cp) >> (64-6));                      \
-      *(int *)cp = (*(int *)cp & ~(0xffff << shift)) | ((a & 0xffff) << shift); }
-
-#define __GLX_PUT_LONG(offset,a)                \
-   { GLubyte *cp = (pc+offset);                 \
-      int shift = (64-32) - ((int)(cp) >> (64-6));                      \
-      *(int *)cp = (*(int *)cp & ~(0xffffffff << shift)) | ((a & 0xffffffff) << shift); }
-
-#define __GLX_PUT_FLOAT(offset,a)               \
-   gl_put_float((pc + offset),a)
-
-#define __GLX_PUT_DOUBLE(offset,a)              \
-   gl_put_double(pc + offset, a)
-
-extern void gl_put_float( /*GLubyte *, struct cray_single */ );
-extern void gl_put_double( /*GLubyte *, struct cray_double */ );
-#endif
-
-#ifndef _CRAY
-
 #ifdef __GLX_ALIGN64
 /*
 ** This can certainly be done better for a particular machine
@@ -202,12 +178,9 @@ extern void gl_put_double( /*GLubyte *, struct cray_double */ );
    *((FLOAT64 *) (pc + offset)) = a
 #endif
 
-#endif
-
 #define __GLX_PUT_CHAR_ARRAY(offset,a,alen)                 \
    __GLX_MEM_COPY(pc + offset, a, alen * __GLX_SIZE_INT8)
 
-#ifndef _CRAY
 #define __GLX_PUT_SHORT_ARRAY(offset,a,alen)                \
    __GLX_MEM_COPY(pc + offset, a, alen * __GLX_SIZE_INT16)
 
@@ -220,24 +193,5 @@ extern void gl_put_double( /*GLubyte *, struct cray_double */ );
 #define __GLX_PUT_DOUBLE_ARRAY(offset,a,alen)                  \
    __GLX_MEM_COPY(pc + offset, a, alen * __GLX_SIZE_FLOAT64)
 
-#else
-#define __GLX_PUT_SHORT_ARRAY(offset,a,alen)                            \
-   gl_put_short_array((GLubyte *)(pc + offset), a, alen * __GLX_SIZE_INT16)
-
-#define __GLX_PUT_LONG_ARRAY(offset,a,alen)                             \
-   gl_put_long_array((GLubyte *)(pc + offset), (long *)a, alen * __GLX_SIZE_INT32)
-
-#define __GLX_PUT_FLOAT_ARRAY(offset,a,alen)                            \
-   gl_put_float_array((GLubyte *)(pc + offset), (float *)a, alen * __GLX_SIZE_FLOAT32)
-
-#define __GLX_PUT_DOUBLE_ARRAY(offset,a,alen)                           \
-   gl_put_double_array((GLubyte *)(pc + offset), (double *)a, alen * __GLX_SIZE_FLOAT64)
-
-extern gl_put_short_array(GLubyte *, short *, int);
-extern gl_put_long_array(GLubyte *, long *, int);
-extern gl_put_float_array(GLubyte *, float *, int);
-extern gl_put_double_array(GLubyte *, double *, int);
-
-#endif /* _CRAY */
 
 #endif /* !__GLX_packrender_h__ */
index 037265a7671f856c8166f6fbbe8e2597fe898946..fddcbf157f80064b1847a2a84883e0d897a05e85 100644 (file)
@@ -83,7 +83,6 @@
 #define __GLX_SINGLE_PUT_CHAR(offset,a)         \
    *((INT8 *) (pc + offset)) = a
 
-#ifndef CRAY
 #define __GLX_SINGLE_PUT_SHORT(offset,a)        \
    *((INT16 *) (pc + offset)) = a
 
 #define __GLX_SINGLE_PUT_FLOAT(offset,a)        \
    *((FLOAT32 *) (pc + offset)) = a
 
-#else
-#define __GLX_SINGLE_PUT_SHORT(offset,a)        \
-   { GLubyte *cp = (pc+offset);                    \
-      int shift = (64-16) - ((int)(cp) >> (64-6));                      \
-      *(int *)cp = (*(int *)cp & ~(0xffff << shift)) | ((a & 0xffff) << shift); }
-
-#define __GLX_SINGLE_PUT_LONG(offset,a)         \
-   { GLubyte *cp = (pc+offset);                    \
-      int shift = (64-32) - ((int)(cp) >> (64-6));                      \
-      *(int *)cp = (*(int *)cp & ~(0xffffffff << shift)) | ((a & 0xffffffff) << shift); }
-
-#define __GLX_SINGLE_PUT_FLOAT(offset,a)        \
-   gl_put_float(pc + offset, a)
-#endif
-
 /* Read support macros */
 #define __GLX_SINGLE_READ_XREPLY()                    \
    (void) _XReply(dpy, (xReply*) &reply, 0, False)
 #define __GLX_SINGLE_GET_SIZE(a)                \
    a = (GLint) reply.size
 
-#ifndef _CRAY
 #define __GLX_SINGLE_GET_CHAR(p)                \
    *p = *(GLbyte *)&reply.pad3;
 
 #define __GLX_SINGLE_GET_FLOAT(p)               \
    *p = *(GLfloat *)&reply.pad3;
 
-#else
-#define __GLX_SINGLE_GET_CHAR(p)                \
-   *p = reply.pad3 >> 24;
-
-#define __GLX_SINGLE_GET_SHORT(p)               \
-   {int t = reply.pad3 >> 16;                            \
-      *p = (t & 0x8000) ? (t | ~0xffff) : (t & 0xffff);}
-
-#define __GLX_SINGLE_GET_LONG(p)                \
-   {int t = reply.pad3;                                              \
-      *p = (t & 0x80000000) ? (t | ~0xffffffff) : (t & 0xffffffff);}
-
-#define PAD3OFFSET 16
-#define __GLX_SINGLE_GET_FLOAT(p)                        \
-   *p = gl_ntoh_float((GLubyte *)&reply + PAD3OFFSET);
-
-#define __GLX_SINGLE_GET_DOUBLE(p)                       \
-   *p = gl_ntoh_double((GLubyte *)&reply + PAD3OFFSET);
-
-extern float gl_ntoh_float(GLubyte *);
-extern float gl_ntoh_double(GLubyte *);
-#endif
-
-#ifndef _CRAY
-
 #ifdef __GLX_ALIGN64
 #define __GLX_SINGLE_GET_DOUBLE(p)              \
    __GLX_MEM_COPY(p, &reply.pad3, 8)
@@ -164,8 +122,6 @@ extern float gl_ntoh_double(GLubyte *);
    *p = *(GLdouble *)&reply.pad3
 #endif
 
-#endif
-
 /* Get an array of typed data */
 #define __GLX_SINGLE_GET_VOID_ARRAY(a,alen)     \
    {                                            \
@@ -192,22 +148,10 @@ extern float gl_ntoh_double(GLubyte *);
 #define __GLX_SINGLE_GET_LONG_ARRAY(a,alen)        \
    _XRead(dpy,(char *)a,alen*__GLX_SIZE_INT32);
 
-#ifndef _CRAY
 #define __GLX_SINGLE_GET_FLOAT_ARRAY(a,alen)       \
    _XRead(dpy,(char *)a,alen*__GLX_SIZE_FLOAT32);
 
 #define __GLX_SINGLE_GET_DOUBLE_ARRAY(a,alen)      \
    _XRead(dpy,(char *)a,alen*__GLX_SIZE_FLOAT64);
 
-#else
-#define __GLX_SINGLE_GET_FLOAT_ARRAY(a,alen)    \
-   gl_get_float_array(dpy,a,alen);
-
-#define __GLX_SINGLE_GET_DOUBLE_ARRAY(a,alen)   \
-   gl_get_double_array(dpy, a, alen);
-
-extern void gl_get_float_array(Display * dpy, float *a, int alen);
-extern void gl_get_double_array(Display * dpy, double *a, int alen);
-#endif
-
 #endif /* !__GLX_packsingle_h__ */
index 77956d61e385c45a40117518ec43b7db456920e1..e4a5a5b01e6720b17c9b55e241eab2105c7e0dab 100644 (file)
     <param name="label" type="GLchar *"/>
   </function>
 
+  <!-- ES extension has different suffixes -->
+  <function name="DebugMessageControlKHR" alias="DebugMessageControl" es1="1.0" es2="2.0">
+    <param name="source" type="GLenum"/>
+    <param name="type" type="GLenum"/>
+    <param name="severity" type="GLenum"/>
+    <param name="count" type="GLsizei" counter="true"/>
+    <param name="ids" type="const GLuint *" count="count"/>
+    <param name="enabled" type="GLboolean"/>
+  </function>
+
+  <function name="DebugMessageInsertKHR" alias="DebugMessageInsert" es1="1.0" es2="2.0">
+    <param name="source" type="GLenum"/>
+    <param name="type" type="GLenum"/>
+    <param name="id" type="GLuint"/>
+    <param name="severity" type="GLenum"/>
+    <param name="length" type="GLsizei"/>
+    <param name="buf" type="const GLchar *"/>
+  </function>
+
+  <function name="DebugMessageCallbackKHR" alias="DebugMessageCallback" es1="1.0" es2="2.0">
+    <param name="callback" type="GLDEBUGPROC"/>
+    <param name="userParam" type="const GLvoid *"/>
+  </function>
+
+  <function name="GetDebugMessageLogKHR" alias="GetDebugMessageLog" es1="1.0" es2="2.0">
+    <return type="GLuint"/>
+    <param name="count" type="GLuint"/>
+    <param name="bufsize" type="GLsizei"/>
+    <param name="sources" type="GLenum *" output="true"/>
+    <param name="types" type="GLenum *" output="true"/>
+    <param name="ids" type="GLuint *" output="true"/>
+    <param name="severities" type="GLenum *" output="true"/>
+    <param name="lengths" type="GLsizei *" output="true"/>
+    <param name="messageLog" type="GLchar *" output="true"/>
+  </function>
+
+  <function name="PushDebugGroupKHR" alias="PushDebugGroup" es1="1.0" es2="2.0">
+    <param name="source" type="GLenum"/>
+    <param name="id" type="GLuint"/>
+    <param name="length" type="GLsizei"/>
+    <param name="message" type="const GLchar *"/>
+  </function>
+
+  <function name="PopDebugGroupKHR" alias="PopDebugGroup" es1="1.0" es2="2.0"/>
+
+  <function name="ObjectLabelKHR" alias="ObjectLabel" es1="1.0" es2="2.0">
+    <param name="identifier" type="GLenum"/>
+    <param name="name" type="GLuint"/>
+    <param name="length" type="GLsizei"/>
+    <param name="label" type="const GLchar *"/>
+  </function>
+
+  <function name="GetObjectLabelKHR" alias="GetObjectLabel" es1="1.0" es2="2.0">
+    <param name="identifier" type="GLenum"/>
+    <param name="name" type="GLuint"/>
+    <param name="bufSize" type="GLsizei"/>
+    <param name="length" type="GLsizei *"/>
+    <param name="label" type="GLchar *"/>
+  </function>
+
+  <function name="ObjectPtrLabelKHR" alias="ObjectPtrLabel" es1="1.0" es2="2.0">
+    <param name="ptr" type="const GLvoid *"/>
+    <param name="length" type="GLsizei"/>
+    <param name="label" type="const GLchar *"/>
+  </function>
+
+  <function name="GetObjectPtrLabelKHR" alias="GetObjectPtrLabel" es1="1.0" es2="2.0">
+    <param name="ptr" type="const GLvoid *"/>
+    <param name="bufSize" type="GLsizei"/>
+    <param name="length" type="GLsizei *"/>
+    <param name="label" type="GLchar *"/>
+  </function>
+
 </category>
 
 </OpenGLAPI>
index 6fe42b1775c281a3737cf3d46bd8ae799978ab32..3d1fccb3ab4da005090792bca4b95ffb507dc927 100644 (file)
@@ -75,14 +75,12 @@ _mesa_init_driver_functions(struct dd_function_table *driver)
 
    driver->GetString = NULL;  /* REQUIRED! */
    driver->UpdateState = NULL;  /* REQUIRED! */
-   driver->ResizeBuffers = _mesa_resize_framebuffer;
 
    driver->Finish = NULL;
    driver->Flush = NULL;
 
    /* framebuffer/image functions */
    driver->Clear = _swrast_Clear;
-   driver->Accum = _mesa_accum;
    driver->RasterPos = _tnl_RasterPos;
    driver->DrawPixels = _swrast_DrawPixels;
    driver->ReadPixels = _mesa_readpixels;
@@ -135,7 +133,6 @@ _mesa_init_driver_functions(struct dd_function_table *driver)
    driver->DepthRange = NULL;
    driver->Enable = NULL;
    driver->Fogfv = NULL;
-   driver->Hint = NULL;
    driver->Lightfv = NULL;
    driver->LightModelfv = NULL;
    driver->LineStipple = NULL;
@@ -179,16 +176,8 @@ _mesa_init_driver_functions(struct dd_function_table *driver)
    driver->DiscardFramebuffer = NULL;
 
    _mesa_init_texture_barrier_functions(driver);
-
-   /* APPLE_vertex_array_object */
-   driver->NewArrayObject = _mesa_new_vao;
-   driver->DeleteArrayObject = _mesa_delete_vao;
-   driver->BindArrayObject = NULL;
-
    _mesa_init_shader_object_functions(driver);
-
    _mesa_init_transform_feedback_functions(driver);
-
    _mesa_init_sampler_object_functions(driver);
 
    /* T&L stuff */
@@ -198,17 +187,8 @@ _mesa_init_driver_functions(struct dd_function_table *driver)
    driver->SaveNeedFlush = 0;
 
    driver->ProgramStringNotify = _tnl_program_string;
-   driver->FlushVertices = NULL;
-   driver->SaveFlushVertices = NULL;
-   driver->NotifySaveBegin = NULL;
    driver->LightingSpaceChange = NULL;
 
-   /* display list */
-   driver->NewList = NULL;
-   driver->EndList = NULL;
-   driver->BeginCallList = NULL;
-   driver->EndCallList = NULL;
-
    /* GL_ARB_texture_storage */
    driver->AllocTextureStorage = _mesa_AllocTextureStorage_sw;
 
@@ -242,23 +222,11 @@ _mesa_init_driver_state(struct gl_context *ctx)
                                  ctx->Color.Blend[0].SrcA,
                                  ctx->Color.Blend[0].DstA);
 
-   if (ctx->Driver.ColorMaskIndexed) {
-      GLuint i;
-      for (i = 0; i < ctx->Const.MaxDrawBuffers; i++) {
-         ctx->Driver.ColorMaskIndexed(ctx, i,
-                                      ctx->Color.ColorMask[i][RCOMP],
-                                      ctx->Color.ColorMask[i][GCOMP],
-                                      ctx->Color.ColorMask[i][BCOMP],
-                                      ctx->Color.ColorMask[i][ACOMP]);
-      }
-   }
-   else {
-      ctx->Driver.ColorMask(ctx,
-                            ctx->Color.ColorMask[0][RCOMP],
-                            ctx->Color.ColorMask[0][GCOMP],
-                            ctx->Color.ColorMask[0][BCOMP],
-                            ctx->Color.ColorMask[0][ACOMP]);
-   }
+   ctx->Driver.ColorMask(ctx,
+                         ctx->Color.ColorMask[0][RCOMP],
+                         ctx->Color.ColorMask[0][GCOMP],
+                         ctx->Color.ColorMask[0][BCOMP],
+                         ctx->Color.ColorMask[0][ACOMP]);
 
    ctx->Driver.CullFace(ctx, ctx->Polygon.CullFaceMode);
    ctx->Driver.DepthFunc(ctx, ctx->Depth.Func);
index 5dc40a2aa33aef93ce054f1ab6aad9d161748654..4800278a467a6d452c8231d042fd32232fa7e704 100644 (file)
@@ -150,8 +150,7 @@ prepare_mipmap_level(struct gl_context *ctx,
 
 /**
  * Called via ctx->Driver.GenerateMipmap()
- * Note: We don't yet support 3D textures, 1D/2D array textures or texture
- * borders.
+ * Note: We don't yet support 3D textures, or texture borders.
  */
 void
 _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target,
index d35ac263a454005981f535820cc02236dd052418..5cfa2f8ca4f1a7e78c9459ac19ccf3df9c5c5113 100644 (file)
@@ -44,6 +44,7 @@
 #include "utils.h"
 #include "xmlpool.h"
 #include "main/mtypes.h"
+#include "main/framebuffer.h"
 #include "main/version.h"
 #include "main/errors.h"
 #include "main/macros.h"
@@ -793,7 +794,7 @@ driUpdateFramebufferSize(struct gl_context *ctx, const __DRIdrawable *dPriv)
 {
    struct gl_framebuffer *fb = (struct gl_framebuffer *) dPriv->driverPrivate;
    if (fb && (dPriv->w != fb->Width || dPriv->h != fb->Height)) {
-      ctx->Driver.ResizeBuffers(ctx, fb, dPriv->w, dPriv->h);
+      _mesa_resize_framebuffer(ctx, fb, dPriv->w, dPriv->h);
       /* if the driver needs the hw lock for ResizeBuffers, the drawable
          might have changed again by now */
       assert(fb->Width == dPriv->w);
index 1246bec6e02a4018be6152e85170bde1790d7de6..ae8fcabc14e47491013770e2d56a0ac25606a557 100644 (file)
 #include "utils.h"
 #include "dri_util.h"
 
-
-uint64_t
-driParseDebugString(const char *debug,
-                    const struct dri_debug_control *control)
-{
-   uint64_t flag = 0;
-
-   if (debug != NULL) {
-      for (; control->string != NULL; control++) {
-         if (!strcmp(debug, "all")) {
-            flag |= control->flag;
-
-         } else {
-            const char *s = debug;
-            unsigned n;
-
-            for (; n = strcspn(s, ", "), *s; s += MAX2(1, n)) {
-               if (strlen(control->string) == n &&
-                   !strncmp(control->string, s, n))
-                  flag |= control->flag;
-            }
-         }
-      }
-   }
-
-   return flag;
-}
-
-
-
 /**
  * Create the \c GL_RENDERER string for DRI drivers.
  * 
index 3760c38fcaf3aa8a15d1e12521d390dd1b816f51..f6b8d7c3a213d214228673c925b511a05399d079 100644 (file)
 #include <GL/internal/dri_interface.h>
 #include "main/context.h"
 
-struct dri_debug_control {
-    const char * string;
-    uint64_t     flag;
-};
-
-extern uint64_t driParseDebugString( const char * debug,
-    const struct dri_debug_control * control );
-
 extern unsigned driGetRendererString( char * buffer,
     const char * hardware_name, GLuint agp_mode );
 
index 906e942b020b9fba2d5c60b543fc646a125d1e28..7ce5ef7e41a0fb9cf6a1c2d448bd410c68cd100c 100644 (file)
@@ -601,6 +601,8 @@ i830CullFaceFrontFace(struct gl_context * ctx, GLenum unused)
    else if (ctx->Polygon.CullFaceMode != GL_FRONT_AND_BACK) {
       mode = CULLMODE_CW;
 
+      if (ctx->DrawBuffer && _mesa_is_user_fbo(ctx->DrawBuffer))
+         mode ^= (CULLMODE_CW ^ CULLMODE_CCW);
       if (ctx->Polygon.CullFaceMode == GL_FRONT)
          mode ^= (CULLMODE_CW ^ CULLMODE_CCW);
       if (ctx->Polygon.FrontFace != GL_CCW)
index 80bd249fa7b85a99c5e86822d91d44050d885f81..c41cd37bcc23564642b8d0d00f5e46515f7b21c8 100644 (file)
@@ -31,7 +31,6 @@
 #include "main/mtypes.h"
 #include "main/imports.h"
 #include "main/macros.h"
-#include "main/colormac.h"
 #include "main/renderbuffer.h"
 #include "main/framebuffer.h"
 
index 51eaea43a509160434b8582906dd59a6d77b3d03..386e032443a8101715baa5c22161076882606bbd 100644 (file)
@@ -55,20 +55,14 @@ intel_check_front_buffer_rendering(struct intel_context *intel)
 static void
 intelDrawBuffer(struct gl_context * ctx, GLenum mode)
 {
-   if (ctx->DrawBuffer && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
+   if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer)) {
       struct intel_context *const intel = intel_context(ctx);
-      const bool was_front_buffer_rendering =
-       intel->is_front_buffer_rendering;
 
-      intel->is_front_buffer_rendering = (mode == GL_FRONT_LEFT)
-       || (mode == GL_FRONT) || (mode == GL_FRONT_AND_BACK);
-
-      /* If we weren't front-buffer rendering before but we are now,
-       * invalidate our DRI drawable so we'll ask for new buffers
+      /* If we might be front-buffer rendering on this buffer for the first
+       * time, invalidate our DRI drawable so we'll ask for new buffers
        * (including the fake front) before we start rendering again.
        */
-      if (!was_front_buffer_rendering && intel->is_front_buffer_rendering)
-        dri2InvalidateDrawable(intel->driContext->driDrawablePriv);
+      dri2InvalidateDrawable(intel->driContext->driDrawablePriv);
    }
 
    intel_draw_buffer(ctx);
@@ -78,20 +72,14 @@ intelDrawBuffer(struct gl_context * ctx, GLenum mode)
 static void
 intelReadBuffer(struct gl_context * ctx, GLenum mode)
 {
-   if (ctx->ReadBuffer && _mesa_is_winsys_fbo(ctx->ReadBuffer)) {
+   if (_mesa_is_front_buffer_reading(ctx->ReadBuffer)) {
       struct intel_context *const intel = intel_context(ctx);
-      const bool was_front_buffer_reading =
-       intel->is_front_buffer_reading;
-
-      intel->is_front_buffer_reading = (mode == GL_FRONT_LEFT)
-       || (mode == GL_FRONT);
 
-      /* If we weren't front-buffer reading before but we are now,
-       * invalidate our DRI drawable so we'll ask for new buffers
+      /* If we might be front-buffer reading on this buffer for the first
+       * time, invalidate our DRI drawable so we'll ask for new buffers
        * (including the fake front) before we start reading again.
        */
-      if (!was_front_buffer_reading && intel->is_front_buffer_reading)
-        dri2InvalidateDrawable(intel->driContext->driReadablePriv);
+      dri2InvalidateDrawable(intel->driContext->driReadablePriv);
    }
 }
 
index 42d84bcfcb757cf2da44dcfde8f4079bf976dc90..6e22f5125b956f8384b6e978b2c15ed7a2fe9580 100644 (file)
@@ -34,7 +34,6 @@
 #include "intel_context.h"
 
 struct intel_context;
-struct intel_framebuffer;
 
 extern void intel_check_front_buffer_rendering(struct intel_context *intel);
 
index c780103228f5d3d8a31a593a480e1ea2c5aaa58c..3f429f25d105cb43b7c883ee4a1cb03479228fab 100644 (file)
@@ -56,6 +56,7 @@
 #include "intel_mipmap_tree.h"
 
 #include "utils.h"
+#include "util/debug.h"
 #include "util/ralloc.h"
 
 int INTEL_DEBUG = (0);
@@ -243,7 +244,7 @@ intel_prepare_render(struct intel_context *intel)
     * that will happen next will probably dirty the front buffer.  So
     * mark it as dirty here.
     */
-   if (intel->is_front_buffer_rendering)
+   if (_mesa_is_front_buffer_drawing(intel->ctx.DrawBuffer))
       intel->front_buffer_dirty = true;
 
    /* Wait for the swapbuffers before the one we just emitted, so we
@@ -290,7 +291,7 @@ intel_viewport(struct gl_context *ctx)
     intelCalcViewport(ctx);
 }
 
-static const struct dri_debug_control debug_control[] = {
+static const struct debug_control debug_control[] = {
    { "tex",   DEBUG_TEXTURE},
    { "state", DEBUG_STATE},
    { "blit",  DEBUG_BLIT},
@@ -356,7 +357,7 @@ intel_glFlush(struct gl_context *ctx)
 
    intel_flush(ctx);
    intel_flush_front(ctx);
-   if (intel->is_front_buffer_rendering)
+   if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer))
       intel->need_throttle = true;
 }
 
@@ -474,8 +475,8 @@ intelInitContext(struct intel_context *intel,
 
    ctx->Const.MinLineWidth = 1.0;
    ctx->Const.MinLineWidthAA = 1.0;
-   ctx->Const.MaxLineWidth = 5.0;
-   ctx->Const.MaxLineWidthAA = 5.0;
+   ctx->Const.MaxLineWidth = 7.0;
+   ctx->Const.MaxLineWidthAA = 7.0;
    ctx->Const.LineWidthGranularity = 0.5;
 
    ctx->Const.MinPointSize = 1.0;
@@ -512,7 +513,7 @@ intelInitContext(struct intel_context *intel,
 
    intelInitExtensions(ctx);
 
-   INTEL_DEBUG = driParseDebugString(getenv("INTEL_DEBUG"), debug_control);
+   INTEL_DEBUG = parse_debug_string(getenv("INTEL_DEBUG"), debug_control);
    if (INTEL_DEBUG & DEBUG_BUFMGR)
       dri_bufmgr_set_debug(intel->bufmgr, true);
    if (INTEL_DEBUG & DEBUG_PERF)
@@ -700,8 +701,8 @@ intel_query_dri2_buffers(struct intel_context *intel,
    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
 
    memset(attachments, 0, sizeof(attachments));
-   if ((intel->is_front_buffer_rendering ||
-       intel->is_front_buffer_reading ||
+   if ((_mesa_is_front_buffer_drawing(fb) ||
+        _mesa_is_front_buffer_reading(fb) ||
        !back_rb) && front_rb) {
       /* If a fake front buffer is in use, then querying for
        * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
@@ -866,8 +867,10 @@ intel_update_image_buffers(struct intel_context *intel, __DRIdrawable *drawable)
    else
       return;
 
-   if ((intel->is_front_buffer_rendering || intel->is_front_buffer_reading || !back_rb) && front_rb)
+   if (front_rb && (_mesa_is_front_buffer_drawing(fb) ||
+                    _mesa_is_front_buffer_reading(fb) || !back_rb)) {
       buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
+   }
 
    if (back_rb)
       buffer_mask |= __DRI_IMAGE_BUFFER_BACK;
index 4ec4015d453976e3efa205921eb4ae680b4628bb..aecd7c23f451ed8e291286a513b2c8c3c76c9d7c 100644 (file)
@@ -255,22 +255,6 @@ struct intel_context
     */
    bool front_buffer_dirty;
 
-   /**
-    * Track whether front-buffer rendering is currently enabled
-    *
-    * A separate flag is used to track this in order to support MRT more
-    * easily.
-    */
-   bool is_front_buffer_rendering;
-   /**
-    * Track whether front-buffer is the current read target.
-    *
-    * This is closely associated with is_front_buffer_rendering, but may
-    * be set separately.  The DRI2 fake front buffer must be referenced
-    * either way.
-    */
-   bool is_front_buffer_reading;
-
    bool use_early_z;
 
    __DRIcontext *driContext;
index 12cc7e3a71b42d4dec07e0b50931c04ea606139d..8750c601b849de69102e18dd7677f92bb96fef3f 100644 (file)
@@ -64,19 +64,6 @@ intel_get_rb_region(struct gl_framebuffer *fb, GLuint attIndex)
       return NULL;
 }
 
-/**
- * Create a new framebuffer object.
- */
-static struct gl_framebuffer *
-intel_new_framebuffer(struct gl_context * ctx, GLuint name)
-{
-   /* Only drawable state in intel_framebuffer at this time, just use Mesa's
-    * class
-    */
-   return _mesa_new_framebuffer(ctx, name);
-}
-
-
 /** Called by gl_renderbuffer::Delete() */
 static void
 intel_delete_renderbuffer(struct gl_context *ctx, struct gl_renderbuffer *rb)
@@ -770,7 +757,6 @@ intel_blit_framebuffer(struct gl_context *ctx,
 void
 intel_fbo_init(struct intel_context *intel)
 {
-   intel->ctx.Driver.NewFramebuffer = intel_new_framebuffer;
    intel->ctx.Driver.NewRenderbuffer = intel_new_renderbuffer;
    intel->ctx.Driver.MapRenderbuffer = intel_map_renderbuffer;
    intel->ctx.Driver.UnmapRenderbuffer = intel_unmap_renderbuffer;
index 6f139e19e11fb6e2087f27e9f96c7acf8c5967c3..feb1a3f97e8a778edba94d38efb53115f992c656 100644 (file)
@@ -126,7 +126,6 @@ intel_check_blit_fragment_ops(struct gl_context * ctx, bool src_alpha_is_one)
 void
 intelInitPixelFuncs(struct dd_function_table *functions)
 {
-   functions->Accum = _mesa_accum;
    functions->Bitmap = intelBitmap;
    functions->CopyPixels = intelCopyPixels;
    functions->DrawPixels = intelDrawPixels;
index 6c2ad6c6c95457b2a1e5a706fd021efcd8e17308..c1603565cc2a6157c0bdc6d6b1c6357336cc456e 100644 (file)
@@ -54,9 +54,7 @@
  * dma buffers.  Use strip/fan hardware primitives where possible.
  * Try to simulate missing primitives with indexed vertices.
  */
-#define HAVE_POINTS      0      /* Has it, but can't use because subpixel has to
-                                 * be adjusted for points on the INTEL/I845G
-                                 */
+#define HAVE_POINTS      1
 #define HAVE_LINES       1
 #define HAVE_LINE_STRIPS 1
 #define HAVE_TRIANGLES   1
 #define HAVE_ELTS        0
 
 static const uint32_t hw_prim[GL_POLYGON + 1] = {
-   0,
-   PRIM3D_LINELIST,
-   PRIM3D_LINESTRIP,
-   PRIM3D_LINESTRIP,
-   PRIM3D_TRILIST,
-   PRIM3D_TRISTRIP,
-   PRIM3D_TRIFAN,
-   0,
-   0,
-   PRIM3D_POLY
+   [GL_POINTS] = PRIM3D_POINTLIST,
+   [GL_LINES ] = PRIM3D_LINELIST,
+   [GL_LINE_LOOP] = PRIM3D_LINESTRIP,
+   [GL_LINE_STRIP] = PRIM3D_LINESTRIP,
+   [GL_TRIANGLES] = PRIM3D_TRILIST,
+   [GL_TRIANGLE_STRIP] = PRIM3D_TRISTRIP,
+   [GL_TRIANGLE_FAN] = PRIM3D_TRIFAN,
+   [GL_QUADS] = 0,
+   [GL_QUAD_STRIP] = 0,
+   [GL_POLYGON] = PRIM3D_POLY,
 };
 
 static const GLenum reduced_prim[GL_POLYGON + 1] = {
-   GL_POINTS,
-   GL_LINES,
-   GL_LINES,
-   GL_LINES,
-   GL_TRIANGLES,
-   GL_TRIANGLES,
-   GL_TRIANGLES,
-   GL_TRIANGLES,
-   GL_TRIANGLES,
-   GL_TRIANGLES
+   [GL_POINTS] = GL_POINTS,
+   [GL_LINES] = GL_LINES,
+   [GL_LINE_LOOP] = GL_LINES,
+   [GL_LINE_STRIP] = GL_LINES,
+   [GL_TRIANGLES] = GL_TRIANGLES,
+   [GL_TRIANGLE_STRIP] = GL_TRIANGLES,
+   [GL_TRIANGLE_FAN] = GL_TRIANGLES,
+   [GL_QUADS] = GL_TRIANGLES,
+   [GL_QUAD_STRIP] = GL_TRIANGLES,
+   [GL_POLYGON] = GL_TRIANGLES,
 };
 
 static const int scale_prim[GL_POLYGON + 1] = {
-   0,                           /* fallback case */
-   1,
-   2,
-   2,
-   1,
-   3,
-   3,
-   0,                           /* fallback case */
-   0,                           /* fallback case */
-   3
+   [GL_POINTS] = 1,
+   [GL_LINES] = 1,
+   [GL_LINE_LOOP] = 2,
+   [GL_LINE_STRIP] = 2,
+   [GL_TRIANGLES] = 1,
+   [GL_TRIANGLE_STRIP] = 3,
+   [GL_TRIANGLE_FAN] = 3,
+   [GL_QUADS] = 0,              /* fallback case */
+   [GL_QUAD_STRIP] = 0,         /* fallback case */
+   [GL_POLYGON] = 3,
 };
 
 
index c951ff731b85d5941b49ffe947147eddc44793f3..3de9d50a4d72f3bb53dbdd4728f4e4627b43fbcc 100644 (file)
@@ -30,7 +30,6 @@
 #include "main/context.h"
 #include "main/macros.h"
 #include "main/enums.h"
-#include "main/colormac.h"
 #include "main/dd.h"
 
 #include "intel_screen.h"
index ae62a800fb7b89bfb97d1cb0fc35bec63284d7b4..a093a0a7bd28d5fa3eb77348898f614d99c7c1b9 100644 (file)
@@ -426,11 +426,7 @@ intel_draw_point(struct intel_context *intel, intelVertexPtr v0)
    GLuint *vb = intel_get_prim_space(intel, 1);
    int j;
 
-   /* Adjust for sub pixel position -- still required for conform. */
-   *(float *) &vb[0] = v0->v.x;
-   *(float *) &vb[1] = v0->v.y;
-   for (j = 2; j < vertsize; j++)
-      vb[j] = v0->ui[j];
+   COPY_DWORDS(j, vb, vertsize, v0);
 }
 
 
@@ -670,16 +666,16 @@ do {                                                      \
  ***********************************************************************/
 
 static const GLuint hw_prim[GL_POLYGON + 1] = {
-   PRIM3D_POINTLIST,
-   PRIM3D_LINELIST,
-   PRIM3D_LINELIST,
-   PRIM3D_LINELIST,
-   PRIM3D_TRILIST,
-   PRIM3D_TRILIST,
-   PRIM3D_TRILIST,
-   PRIM3D_TRILIST,
-   PRIM3D_TRILIST,
-   PRIM3D_TRILIST
+   [GL_POINTS] = PRIM3D_POINTLIST,
+   [GL_LINES] = PRIM3D_LINELIST,
+   [GL_LINE_LOOP] = PRIM3D_LINELIST,
+   [GL_LINE_STRIP] = PRIM3D_LINELIST,
+   [GL_TRIANGLES] = PRIM3D_TRILIST,
+   [GL_TRIANGLE_STRIP] = PRIM3D_TRILIST,
+   [GL_TRIANGLE_FAN] = PRIM3D_TRILIST,
+   [GL_QUADS] = PRIM3D_TRILIST,
+   [GL_QUAD_STRIP] = PRIM3D_TRILIST,
+   [GL_POLYGON] = PRIM3D_TRILIST,
 };
 
 #define RASTERIZE(x) intelRasterPrimitive( ctx, x, hw_prim[x] )
@@ -891,18 +887,11 @@ intelRenderClippedPoly(struct gl_context * ctx, const GLuint * elts, GLuint n)
 {
    struct intel_context *intel = intel_context(ctx);
    TNLcontext *tnl = TNL_CONTEXT(ctx);
-   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
    GLuint prim = intel->render_primitive;
 
    /* Render the new vertices as an unclipped polygon.
     */
-   {
-      GLuint *tmp = VB->Elts;
-      VB->Elts = (GLuint *) elts;
-      tnl->Driver.Render.PrimTabElts[GL_POLYGON] (ctx, 0, n,
-                                                  PRIM_BEGIN | PRIM_END);
-      VB->Elts = tmp;
-   }
+   _tnl_RenderClippedPolygon(ctx, elts, n);
 
    /* Restore the render primitive
     */
@@ -910,14 +899,6 @@ intelRenderClippedPoly(struct gl_context * ctx, const GLuint * elts, GLuint n)
       tnl->Driver.Render.PrimitiveNotify(ctx, prim);
 }
 
-static void
-intelRenderClippedLine(struct gl_context * ctx, GLuint ii, GLuint jj)
-{
-   TNLcontext *tnl = TNL_CONTEXT(ctx);
-
-   tnl->Driver.Render.Line(ctx, ii, jj);
-}
-
 static void
 intelFastRenderClippedPoly(struct gl_context * ctx, const GLuint * elts, GLuint n)
 {
@@ -928,10 +909,18 @@ intelFastRenderClippedPoly(struct gl_context * ctx, const GLuint * elts, GLuint
    const GLuint *start = (const GLuint *) V(elts[0]);
    int i, j;
 
-   for (i = 2; i < n; i++) {
-      COPY_DWORDS(j, vb, vertsize, V(elts[i - 1]));
-      COPY_DWORDS(j, vb, vertsize, V(elts[i]));
-      COPY_DWORDS(j, vb, vertsize, start);
+   if (ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION) {
+      for (i = 2; i < n; i++) {
+         COPY_DWORDS(j, vb, vertsize, V(elts[i - 1]));
+         COPY_DWORDS(j, vb, vertsize, V(elts[i]));
+         COPY_DWORDS(j, vb, vertsize, start);
+      }
+   } else {
+      for (i = 2; i < n; i++) {
+         COPY_DWORDS(j, vb, vertsize, start);
+         COPY_DWORDS(j, vb, vertsize, V(elts[i - 1]));
+         COPY_DWORDS(j, vb, vertsize, V(elts[i]));
+      }
    }
 }
 
@@ -1036,23 +1025,23 @@ intelChooseRenderState(struct gl_context * ctx)
       else {
          tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts;
          tnl->Driver.Render.PrimTabElts = _tnl_render_tab_elts;
-         tnl->Driver.Render.ClippedLine = intelRenderClippedLine;
+         tnl->Driver.Render.ClippedLine = _tnl_RenderClippedLine;
          tnl->Driver.Render.ClippedPolygon = intelRenderClippedPoly;
       }
    }
 }
 
 static const GLenum reduced_prim[GL_POLYGON + 1] = {
-   GL_POINTS,
-   GL_LINES,
-   GL_LINES,
-   GL_LINES,
-   GL_TRIANGLES,
-   GL_TRIANGLES,
-   GL_TRIANGLES,
-   GL_TRIANGLES,
-   GL_TRIANGLES,
-   GL_TRIANGLES
+   [GL_POINTS] = GL_POINTS,
+   [GL_LINES] = GL_LINES,
+   [GL_LINE_LOOP] = GL_LINES,
+   [GL_LINE_STRIP] = GL_LINES,
+   [GL_TRIANGLES] = GL_TRIANGLES,
+   [GL_TRIANGLE_STRIP] = GL_TRIANGLES,
+   [GL_TRIANGLE_FAN] = GL_TRIANGLES,
+   [GL_QUADS] = GL_TRIANGLES,
+   [GL_QUAD_STRIP] = GL_TRIANGLES,
+   [GL_POLYGON] = GL_TRIANGLES
 };
 
 
index 3c8197226fe5d527685036fc3dfc3d69fccd9f23..2e241511049e02d2fc98efdcb261891434f4afe0 100644 (file)
@@ -43,28 +43,16 @@ AM_CXXFLAGS = $(AM_CFLAGS)
 
 noinst_LTLIBRARIES = libi965_dri.la libi965_compiler.la
 libi965_dri_la_SOURCES = $(i965_FILES)
-libi965_dri_la_LIBADD = $(INTEL_LIBS)
+libi965_dri_la_LIBADD = libi965_compiler.la $(INTEL_LIBS)
+
+libi965_compiler_la_SOURCES = $(i965_compiler_FILES)
 
 TEST_LIBS = \
-       libi965_dri.la \
-       ../common/libdricommon.la \
-       ../common/libxmlconfig.la \
-       ../common/libmegadriver_stub.la \
+       libi965_compiler.la \
         ../../../libmesa.la \
-       $(DRI_LIB_DEPS) \
-        $(CLOCK_LIB) \
+       -lpthread -ldl \
        ../common/libdri_test_stubs.la
 
-libi965_compiler_la_SOURCES = $(i965_FILES)
-libi965_compiler_la_LIBADD = $(INTEL_LIBS) \
-       ../common/libdricommon.la \
-       ../common/libxmlconfig.la \
-       ../common/libmegadriver_stub.la \
-        ../../../libmesa.la \
-       $(DRI_LIB_DEPS) \
-        $(CLOCK_LIB) \
-       ../common/libdri_test_stubs.la -lm
-
 TESTS = \
        test_fs_cmod_propagation \
        test_fs_saturate_propagation \
index eb8196d4845f6ed55dd9181e7026acd30c3db00b..81ef6283fa1b11af4828e0ca1c2163d60ccfe35c 100644 (file)
@@ -1,47 +1,18 @@
-i965_FILES = \
-       brw_binding_tables.c \
-       brw_blorp_blit.cpp \
-       brw_blorp_blit_eu.cpp \
-       brw_blorp_blit_eu.h \
-       brw_blorp.cpp \
-       brw_blorp.h \
-       brw_cc.c \
+i965_compiler_FILES = \
        brw_cfg.cpp \
        brw_cfg.h \
-       brw_clear.c \
-       brw_clip.c \
-       brw_clip.h \
-       brw_clip_line.c \
-       brw_clip_point.c \
-       brw_clip_state.c \
-       brw_clip_tri.c \
-       brw_clip_unfilled.c \
-       brw_clip_util.c \
-       brw_compute.c \
-       brw_conditional_render.c \
-       brw_context.c \
-       brw_context.h \
-       brw_cs.c \
-       brw_cs.h \
        brw_cubemap_normalize.cpp \
-       brw_curbe.c \
        brw_dead_control_flow.cpp \
        brw_dead_control_flow.h \
        brw_defines.h \
        brw_device_info.c \
        brw_device_info.h \
        brw_disasm.c \
-       brw_draw.c \
-       brw_draw.h \
-       brw_draw_upload.c \
        brw_eu.c \
        brw_eu_compact.c \
        brw_eu_emit.c \
        brw_eu.h \
        brw_eu_util.c \
-       brw_ff_gs.c \
-       brw_ff_gs_emit.c \
-       brw_ff_gs.h \
        brw_fs_builder.h \
        brw_fs_channel_expressions.cpp \
        brw_fs_cmod_propagation.cpp \
@@ -55,7 +26,6 @@ i965_FILES = \
        brw_fs_live_variables.cpp \
        brw_fs_live_variables.h \
        brw_fs_nir.cpp \
-       brw_fs_peephole_predicated_break.cpp \
        brw_fs_reg_allocate.cpp \
        brw_fs_register_coalesce.cpp \
        brw_fs_saturate_propagation.cpp \
@@ -65,10 +35,6 @@ i965_FILES = \
        brw_fs_validate.cpp \
        brw_fs_vector_splitting.cpp \
        brw_fs_visitor.cpp \
-       brw_gs.c \
-       brw_gs.h \
-       brw_gs_state.c \
-       brw_gs_surface_state.c \
        brw_inst.h \
        brw_interpolation_map.c \
        brw_ir_allocator.h \
@@ -76,45 +42,17 @@ i965_FILES = \
        brw_ir_vec4.h \
        brw_lower_texture_gradients.cpp \
        brw_lower_unnormalized_offset.cpp \
-       brw_meta_fast_clear.c \
-       brw_meta_stencil_blit.c \
-       brw_meta_updownsample.c \
-       brw_meta_util.c \
-       brw_meta_util.h \
-       brw_misc_state.c \
-       brw_multisample_state.h \
        brw_nir.h \
        brw_nir.c \
        brw_nir_analyze_boolean_resolves.c \
        brw_nir_uniforms.cpp \
-       brw_object_purgeable.c \
        brw_packed_float.c \
-       brw_performance_monitor.c \
-       brw_pipe_control.c \
-       brw_primitive_restart.c \
-       brw_program.c \
-       brw_program.h \
-       brw_queryobj.c \
+       brw_predicated_break.cpp \
        brw_reg.h \
-       brw_reset.c \
-       brw_sampler_state.c \
        brw_schedule_instructions.cpp \
-       brw_sf.c \
-       brw_sf_emit.c \
-       brw_sf.h \
-       brw_sf_state.c \
        brw_shader.cpp \
        brw_shader.h \
-       brw_state_batch.c \
-       brw_state_cache.c \
-       brw_state_dump.c \
-       brw_state.h \
-       brw_state_upload.c \
-       brw_structs.h \
        brw_surface_formats.c \
-       brw_tex.c \
-       brw_tex_layout.c \
-       brw_urb.c \
        brw_util.c \
        brw_util.h \
        brw_vec4_builder.h \
@@ -135,14 +73,88 @@ i965_FILES = \
        brw_vec4_surface_builder.h \
        brw_vec4_visitor.cpp \
        brw_vec4_vs_visitor.cpp \
+       brw_vue_map.c \
+       brw_wm_iz.cpp \
+       gen6_gs_visitor.cpp \
+       gen6_gs_visitor.h \
+       intel_asm_annotation.c \
+       intel_asm_annotation.h \
+       intel_debug.c \
+       intel_debug.h \
+       intel_reg.h \
+       intel_resolve_map.c \
+       intel_resolve_map.h
+
+i965_FILES = \
+       brw_binding_tables.c \
+       brw_blorp_blit.cpp \
+       brw_blorp_blit_eu.cpp \
+       brw_blorp_blit_eu.h \
+       brw_blorp.cpp \
+       brw_blorp.h \
+       brw_cc.c \
+       brw_clear.c \
+       brw_clip.c \
+       brw_clip.h \
+       brw_clip_line.c \
+       brw_clip_point.c \
+       brw_clip_state.c \
+       brw_clip_tri.c \
+       brw_clip_unfilled.c \
+       brw_clip_util.c \
+       brw_compute.c \
+       brw_conditional_render.c \
+       brw_context.c \
+       brw_context.h \
+       brw_cs.c \
+       brw_cs.h \
+       brw_curbe.c \
+       brw_draw.c \
+       brw_draw.h \
+       brw_draw_upload.c \
+       brw_ff_gs.c \
+       brw_ff_gs_emit.c \
+       brw_ff_gs.h \
+       brw_gs.c \
+       brw_gs.h \
+       brw_gs_state.c \
+       brw_gs_surface_state.c \
+       brw_link.cpp \
+       brw_meta_fast_clear.c \
+       brw_meta_stencil_blit.c \
+       brw_meta_updownsample.c \
+       brw_meta_util.c \
+       brw_meta_util.h \
+       brw_misc_state.c \
+       brw_multisample_state.h \
+       brw_object_purgeable.c \
+       brw_performance_monitor.c \
+       brw_pipe_control.c \
+       brw_program.c \
+       brw_program.h \
+       brw_primitive_restart.c \
+       brw_queryobj.c \
+       brw_reset.c \
+       brw_sampler_state.c \
+       brw_sf.c \
+       brw_sf_emit.c \
+       brw_sf.h \
+       brw_sf_state.c \
+       brw_state_batch.c \
+       brw_state_cache.c \
+       brw_state_dump.c \
+       brw_state.h \
+       brw_state_upload.c \
+       brw_structs.h \
+       brw_tex.c \
+       brw_tex_layout.c \
+       brw_urb.c \
        brw_vs.c \
        brw_vs.h \
        brw_vs_state.c \
        brw_vs_surface_state.c \
-       brw_vue_map.c \
        brw_wm.c \
        brw_wm.h \
-       brw_wm_iz.cpp \
        brw_wm_state.c \
        brw_wm_surface_state.c \
        gen6_blorp.cpp \
@@ -152,8 +164,6 @@ i965_FILES = \
        gen6_depth_state.c \
        gen6_depthstencil.c \
        gen6_gs_state.c \
-       gen6_gs_visitor.cpp \
-       gen6_gs_visitor.h \
        gen6_multisample_state.c \
        gen6_queryobj.c \
        gen6_sampler_state.c \
@@ -192,8 +202,6 @@ i965_FILES = \
        gen8_viewport_state.c \
        gen8_vs_state.c \
        gen8_wm_depth_stencil.c \
-       intel_asm_annotation.c \
-       intel_asm_annotation.h \
        intel_batchbuffer.c \
        intel_batchbuffer.h \
        intel_blit.c \
@@ -203,8 +211,6 @@ i965_FILES = \
        intel_buffers.c \
        intel_buffers.h \
        intel_copy_image.c \
-       intel_debug.c \
-       intel_debug.h \
        intel_extensions.c \
        intel_fbo.c \
        intel_fbo.h \
@@ -217,9 +223,6 @@ i965_FILES = \
        intel_pixel_draw.c \
        intel_pixel.h \
        intel_pixel_read.c \
-       intel_reg.h \
-       intel_resolve_map.c \
-       intel_resolve_map.h \
        intel_screen.c \
        intel_screen.h \
        intel_state.c \
index 3a73c64a88b1e3f5ae956d80e348358984a092fc..2d5abc70cb9cfae6d59b8a9d8e443cb34ae1602d 100644 (file)
@@ -223,6 +223,7 @@ brw_upload_clip_prog(struct brw_context *brw)
               /* _NEW_POLYGON, _NEW_BUFFERS */
               key.offset_units = ctx->Polygon.OffsetUnits * ctx->DrawBuffer->_MRD * 2;
               key.offset_factor = ctx->Polygon.OffsetFactor * ctx->DrawBuffer->_MRD;
+              key.offset_clamp = ctx->Polygon.OffsetClamp * ctx->DrawBuffer->_MRD;
            }
 
            if (!ctx->Polygon._FrontBit) {
index 4e38f2f2ed6a9b5fe811a5756d375ee1064da960..54c76822e224772a1bd148219e77686ce0d202ca 100644 (file)
@@ -62,6 +62,7 @@ struct brw_clip_prog_key {
 
    GLfloat offset_factor;
    GLfloat offset_units;
+   GLfloat offset_clamp;
 };
 
 
index 6baf620a1a7782f481430b89b4907ea99fdd04d3..9a4d2a9d6f950805b0b00762e026ab0a00e95d4b 100644 (file)
@@ -188,6 +188,12 @@ static void copy_bfc( struct brw_clip_compile *c )
   GLfloat bc   = dir.y * iz;
   offset = ctx->Polygon.OffsetUnits * DEPTH_SCALE;
   offset += MAX2( abs(ac), abs(bc) ) * ctx->Polygon.OffsetFactor;
+  if (ctx->Polygon.OffsetClamp && isfinite(ctx->Polygon.OffsetClamp)) {
+    if (ctx->Polygon.OffsetClamp < 0)
+      offset = MAX2( offset, ctx->Polygon.OffsetClamp );
+    else
+      offset = MIN2( offset, ctx->Polygon.OffsetClamp );
+  }
   offset *= MRD;
 */
 static void compute_offset( struct brw_clip_compile *c )
@@ -211,6 +217,14 @@ static void compute_offset( struct brw_clip_compile *c )
 
    brw_MUL(p, vec1(off), vec1(off), brw_imm_f(c->key.offset_factor));
    brw_ADD(p, vec1(off), vec1(off), brw_imm_f(c->key.offset_units));
+   if (c->key.offset_clamp && isfinite(c->key.offset_clamp)) {
+      brw_CMP(p,
+              vec1(brw_null_reg()),
+              c->key.offset_clamp < 0 ? BRW_CONDITIONAL_GE : BRW_CONDITIONAL_L,
+              vec1(off),
+              brw_imm_f(c->key.offset_clamp));
+      brw_SEL(p, vec1(off), vec1(off), brw_imm_f(c->key.offset_clamp));
+   }
 }
 
 
index 4d499295730288eb9e407379d45626ea93dc1eb0..6b2bbd217039ba11a9c18b5391314dcfbdb80003 100644 (file)
@@ -41,6 +41,7 @@
 #include "main/version.h"
 #include "main/vtxfmt.h"
 #include "main/texobj.h"
+#include "main/framebuffer.h"
 
 #include "vbo/vbo_context.h"
 
@@ -326,7 +327,7 @@ brw_initialize_context_constants(struct brw_context *brw)
    ctx->Const.MaxUniformBlockSize = 65536;
    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
       struct gl_program_constants *prog = &ctx->Const.Program[i];
-      prog->MaxUniformBlocks = 12;
+      prog->MaxUniformBlocks = BRW_MAX_UBO;
       prog->MaxCombinedUniformComponents =
          prog->MaxUniformComponents +
          ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks;
@@ -346,7 +347,7 @@ brw_initialize_context_constants(struct brw_context *brw)
       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = 0;
    if (_mesa_extension_override_enables.ARB_compute_shader) {
       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
-      ctx->Const.MaxUniformBufferBindings += 12;
+      ctx->Const.MaxUniformBufferBindings += BRW_MAX_UBO;
    } else {
       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = 0;
    }
@@ -582,17 +583,17 @@ brw_initialize_context_constants(struct brw_context *brw)
    /* FIXME: Tessellation stages are not yet supported in i965, so
     * MaxCombinedShaderStorageBlocks doesn't take them into account.
     */
-   ctx->Const.Program[MESA_SHADER_VERTEX].MaxShaderStorageBlocks = 12;
-   ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxShaderStorageBlocks = 12;
+   ctx->Const.Program[MESA_SHADER_VERTEX].MaxShaderStorageBlocks = BRW_MAX_SSBO;
+   ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxShaderStorageBlocks = BRW_MAX_SSBO;
    ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxShaderStorageBlocks = 0;
    ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxShaderStorageBlocks = 0;
-   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxShaderStorageBlocks = 12;
-   ctx->Const.Program[MESA_SHADER_COMPUTE].MaxShaderStorageBlocks = 12;
-   ctx->Const.MaxCombinedShaderStorageBlocks = 12 * 3;
-   ctx->Const.MaxShaderStorageBufferBindings = 36;
+   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxShaderStorageBlocks = BRW_MAX_SSBO;
+   ctx->Const.Program[MESA_SHADER_COMPUTE].MaxShaderStorageBlocks = BRW_MAX_SSBO;
+   ctx->Const.MaxCombinedShaderStorageBlocks = BRW_MAX_SSBO * 3;
+   ctx->Const.MaxShaderStorageBufferBindings = BRW_MAX_SSBO * 3;
 
    if (_mesa_extension_override_enables.ARB_compute_shader)
-      ctx->Const.MaxShaderStorageBufferBindings += 12;
+      ctx->Const.MaxShaderStorageBufferBindings += BRW_MAX_SSBO;
 
    if (brw->gen >= 6) {
       ctx->Const.MaxVarying = 32;
@@ -1298,7 +1299,7 @@ intel_prepare_render(struct brw_context *brw)
     * that will happen next will probably dirty the front buffer.  So
     * mark it as dirty here.
     */
-   if (brw_is_front_buffer_drawing(ctx->DrawBuffer))
+   if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer))
       brw->front_buffer_dirty = true;
 }
 
@@ -1337,8 +1338,8 @@ intel_query_dri2_buffers(struct brw_context *brw,
    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
 
    memset(attachments, 0, sizeof(attachments));
-   if ((brw_is_front_buffer_drawing(fb) ||
-        brw_is_front_buffer_reading(fb) ||
+   if ((_mesa_is_front_buffer_drawing(fb) ||
+        _mesa_is_front_buffer_reading(fb) ||
         !back_rb) && front_rb) {
       /* If a fake front buffer is in use, then querying for
        * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
@@ -1452,7 +1453,7 @@ intel_process_dri2_buffer(struct brw_context *brw,
                                             drawable->w, drawable->h,
                                             buffer->pitch);
 
-   if (brw_is_front_buffer_drawing(fb) &&
+   if (_mesa_is_front_buffer_drawing(fb) &&
        (buffer->attachment == __DRI_BUFFER_FRONT_LEFT ||
         buffer->attachment == __DRI_BUFFER_FAKE_FRONT_LEFT) &&
        rb->Base.Base.NumSamples > 1) {
@@ -1510,7 +1511,7 @@ intel_update_image_buffer(struct brw_context *intel,
                                             buffer->width, buffer->height,
                                             buffer->pitch);
 
-   if (brw_is_front_buffer_drawing(fb) &&
+   if (_mesa_is_front_buffer_drawing(fb) &&
        buffer_type == __DRI_IMAGE_BUFFER_FRONT &&
        rb->Base.Base.NumSamples > 1) {
       intel_renderbuffer_upsample(intel, rb);
@@ -1538,8 +1539,8 @@ intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
    else
       return;
 
-   if (front_rb && (brw_is_front_buffer_drawing(fb) ||
-                    brw_is_front_buffer_reading(fb) || !back_rb)) {
+   if (front_rb && (_mesa_is_front_buffer_drawing(fb) ||
+                    _mesa_is_front_buffer_reading(fb) || !back_rb)) {
       buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
    }
 
index d921a9bb7dd5cad55cc4b352c84ff70c4efeb5cb..a66c61e58f351e00a649cada2ca32ab30c7a381b 100644 (file)
@@ -490,6 +490,7 @@ struct brw_cs_prog_data {
    unsigned simd_size;
    bool uses_barrier;
    bool uses_num_work_groups;
+   unsigned local_invocation_id_regs;
 
    struct {
       /** @{
@@ -714,6 +715,15 @@ struct brw_vs_prog_data {
 /** Max number of render targets in a shader */
 #define BRW_MAX_DRAW_BUFFERS 8
 
+/** Max number of UBOs in a shader */
+#define BRW_MAX_UBO 12
+
+/** Max number of SSBOs in a shader */
+#define BRW_MAX_SSBO 12
+
+/** Max number of combined UBOs and SSBOs in a shader */
+#define BRW_MAX_COMBINED_UBO_SSBO (BRW_MAX_UBO + BRW_MAX_SSBO)
+
 /** Max number of atomic counter buffer objects in a shader */
 #define BRW_MAX_ABO 16
 
@@ -750,7 +760,8 @@ struct brw_vs_prog_data {
 
 #define BRW_MAX_SURFACES   (BRW_MAX_DRAW_BUFFERS +                      \
                             BRW_MAX_TEX_UNIT * 2 + /* normal, gather */ \
-                            12 + /* ubo */                              \
+                            BRW_MAX_UBO +                               \
+                            BRW_MAX_SSBO +                              \
                             BRW_MAX_ABO +                               \
                             BRW_MAX_IMAGES +                            \
                             2 + /* shader time, pull constants */       \
@@ -1453,6 +1464,8 @@ struct brw_context
        */
       drm_intel_bo *multisampled_null_render_target_bo;
       uint32_t fast_clear_op;
+
+      float offset_clamp;
    } wm;
 
    struct {
@@ -1716,7 +1729,12 @@ void brw_validate_textures( struct brw_context *brw );
  */
 void brwInitFragProgFuncs( struct dd_function_table *functions );
 
-int brw_get_scratch_size(int size);
+/* Per-thread scratch space is a power-of-two multiple of 1KB. */
+static inline int
+brw_get_scratch_size(int size)
+{
+   return util_next_power_of_two(size | 1023);
+}
 void brw_get_scratch_bo(struct brw_context *brw,
                        drm_intel_bo **scratch_bo, int size);
 void brw_init_shader_time(struct brw_context *brw);
index 6b64030a8689e7a00ffa369b036549176be53565..45fb816c160b5942316ffac04f409afa1db1c568 100644 (file)
@@ -98,8 +98,15 @@ brw_codegen_cs_prog(struct brw_context *brw,
       start_time = get_time();
    }
 
+   if (unlikely(INTEL_DEBUG & DEBUG_CS))
+      brw_dump_ir("compute", prog, &cs->base, &cp->program.Base);
+
+   int st_index = -1;
+   if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+      st_index = brw_get_shader_time_index(brw, prog, &cp->program.Base, ST_CS);
+
    program = brw_cs_emit(brw, mem_ctx, key, &prog_data,
-                         &cp->program, prog, &program_size);
+                         &cp->program, prog, st_index, &program_size);
    if (program == NULL) {
       ralloc_free(mem_ctx);
       return false;
index 0c0ed2bc909897a264bd8b643cfcdc3760bd8af6..17c2ff9871a85db3c185a5006b82ef658fabc36d 100644 (file)
@@ -46,10 +46,12 @@ brw_cs_emit(struct brw_context *brw,
             struct brw_cs_prog_data *prog_data,
             struct gl_compute_program *cp,
             struct gl_shader_program *prog,
+            int shader_time_index,
             unsigned *final_assembly_size);
 
-unsigned
-brw_cs_prog_local_id_payload_dwords(unsigned dispatch_width);
+void
+brw_cs_fill_local_id_payload(const struct brw_cs_prog_data *cs_prog_data,
+                             void *buffer, uint32_t threads, uint32_t stride);
 
 #ifdef __cplusplus
 }
index 6a75e067915753f104ac4ec38e672bf8c1e680c8..39a26b0520116bc1bcf0b1ebc897bf950672beec 100644 (file)
@@ -33,6 +33,7 @@
 #include "main/enums.h"
 #include "main/macros.h"
 #include "main/transformfeedback.h"
+#include "main/framebuffer.h"
 #include "tnl/tnl.h"
 #include "vbo/vbo_context.h"
 #include "swrast/swrast.h"
 #define FILE_DEBUG_FLAG DEBUG_PRIMS
 
 static const GLuint prim_to_hw_prim[GL_TRIANGLE_STRIP_ADJACENCY+1] = {
-   _3DPRIM_POINTLIST,
-   _3DPRIM_LINELIST,
-   _3DPRIM_LINELOOP,
-   _3DPRIM_LINESTRIP,
-   _3DPRIM_TRILIST,
-   _3DPRIM_TRISTRIP,
-   _3DPRIM_TRIFAN,
-   _3DPRIM_QUADLIST,
-   _3DPRIM_QUADSTRIP,
-   _3DPRIM_POLYGON,
-   _3DPRIM_LINELIST_ADJ,
-   _3DPRIM_LINESTRIP_ADJ,
-   _3DPRIM_TRILIST_ADJ,
-   _3DPRIM_TRISTRIP_ADJ,
+   [GL_POINTS] =_3DPRIM_POINTLIST,
+   [GL_LINES] = _3DPRIM_LINELIST,
+   [GL_LINE_LOOP] = _3DPRIM_LINELOOP,
+   [GL_LINE_STRIP] = _3DPRIM_LINESTRIP,
+   [GL_TRIANGLES] = _3DPRIM_TRILIST,
+   [GL_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
+   [GL_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
+   [GL_QUADS] = _3DPRIM_QUADLIST,
+   [GL_QUAD_STRIP] = _3DPRIM_QUADSTRIP,
+   [GL_POLYGON] = _3DPRIM_POLYGON,
+   [GL_LINES_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
+   [GL_LINE_STRIP_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
+   [GL_TRIANGLES_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
+   [GL_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
 };
 
 
 static const GLenum reduced_prim[GL_POLYGON+1] = {
-   GL_POINTS,
-   GL_LINES,
-   GL_LINES,
-   GL_LINES,
-   GL_TRIANGLES,
-   GL_TRIANGLES,
-   GL_TRIANGLES,
-   GL_TRIANGLES,
-   GL_TRIANGLES,
-   GL_TRIANGLES
+   [GL_POINTS] = GL_POINTS,
+   [GL_LINES] = GL_LINES,
+   [GL_LINE_LOOP] = GL_LINES,
+   [GL_LINE_STRIP] = GL_LINES,
+   [GL_TRIANGLES] = GL_TRIANGLES,
+   [GL_TRIANGLE_STRIP] = GL_TRIANGLES,
+   [GL_TRIANGLE_FAN] = GL_TRIANGLES,
+   [GL_QUADS] = GL_TRIANGLES,
+   [GL_QUAD_STRIP] = GL_TRIANGLES,
+   [GL_POLYGON] = GL_TRIANGLES
 };
 
 uint32_t
@@ -364,7 +365,7 @@ brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
    struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
    struct gl_renderbuffer_attachment *depth_att = &fb->Attachment[BUFFER_DEPTH];
 
-   if (brw_is_front_buffer_drawing(fb))
+   if (_mesa_is_front_buffer_drawing(fb))
       front_irb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
 
    if (front_irb)
index 0a6b23284d970fa3a897d161432286a676e42761..5049851c61725a6a144d6096686af9211e007e87 100644 (file)
@@ -210,7 +210,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld,
    inst->regs_written = regs_written;
 
    if (devinfo->gen < 7) {
-      inst->base_mrf = 13;
+      inst->base_mrf = FIRST_PULL_LOAD_MRF(devinfo->gen);
       inst->header_size = 1;
       if (devinfo->gen == 4)
          inst->mlen = 3;
@@ -2698,7 +2698,7 @@ fs_visitor::emit_repclear_shader()
 bool
 fs_visitor::remove_duplicate_mrf_writes()
 {
-   fs_inst *last_mrf_move[16];
+   fs_inst *last_mrf_move[BRW_MAX_MRF(devinfo->gen)];
    bool progress = false;
 
    /* Need to update the MRF tracking for compressed instructions. */
@@ -3019,7 +3019,7 @@ fs_visitor::lower_uniform_pull_constant_loads()
           * else does except for register spill/unspill, which generates and
           * uses its MRF within a single IR instruction.
           */
-         inst->base_mrf = 14;
+         inst->base_mrf = FIRST_PULL_LOAD_MRF(devinfo->gen) + 1;
          inst->mlen = 1;
       }
    }
@@ -4738,20 +4738,43 @@ fs_visitor::setup_vs_payload()
    payload.num_regs = 2;
 }
 
+/**
+ * We are building the local ID push constant data using the simplest possible
+ * method. We simply push the local IDs directly as they should appear in the
+ * registers for the uvec3 gl_LocalInvocationID variable.
+ *
+ * Therefore, for SIMD8, we use 3 full registers, and for SIMD16 we use 6
+ * registers worth of push constant space.
+ *
+ * Note: Any updates to brw_cs_prog_local_id_payload_dwords,
+ * fill_local_id_payload or fs_visitor::emit_cs_local_invocation_id_setup need
+ * to coordinated.
+ *
+ * FINISHME: There are a few easy optimizations to consider.
+ *
+ * 1. If gl_WorkGroupSize x, y or z is 1, we can just use zero, and there is
+ *    no need for using push constant space for that dimension.
+ *
+ * 2. Since GL_MAX_COMPUTE_WORK_GROUP_SIZE is currently 1024 or less, we can
+ *    easily use 16-bit words rather than 32-bit dwords in the push constant
+ *    data.
+ *
+ * 3. If gl_WorkGroupSize x, y or z is small, then we can use bytes for
+ *    conveying the data, and thereby reduce push constant usage.
+ *
+ */
 void
 fs_visitor::setup_cs_payload()
 {
    assert(devinfo->gen >= 7);
+   brw_cs_prog_data *prog_data = (brw_cs_prog_data*) this->prog_data;
 
    payload.num_regs = 1;
 
    if (nir->info.system_values_read & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
-      const unsigned local_id_dwords =
-         brw_cs_prog_local_id_payload_dwords(dispatch_width);
-      assert((local_id_dwords & 0x7) == 0);
-      const unsigned local_id_regs = local_id_dwords / 8;
+      prog_data->local_invocation_id_regs = dispatch_width * 3 / 8;
       payload.local_invocation_id_reg = payload.num_regs;
-      payload.num_regs += local_id_regs;
+      payload.num_regs += prog_data->local_invocation_id_regs;
    }
 }
 
@@ -4843,7 +4866,7 @@ fs_visitor::optimize()
       OPT(opt_algebraic);
       OPT(opt_cse);
       OPT(opt_copy_propagate);
-      OPT(opt_peephole_predicated_break);
+      OPT(opt_predicated_break, this);
       OPT(opt_cmod_propagation);
       OPT(dead_code_eliminate);
       OPT(opt_peephole_sel);
@@ -5118,25 +5141,13 @@ brw_wm_fs_emit(struct brw_context *brw,
                struct brw_wm_prog_data *prog_data,
                struct gl_fragment_program *fp,
                struct gl_shader_program *prog,
+               int shader_time_index8, int shader_time_index16,
                unsigned *final_assembly_size)
 {
-   struct brw_shader *shader = NULL;
-   if (prog)
-      shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
-
-   if (unlikely(INTEL_DEBUG & DEBUG_WM) && shader->base.ir)
-      brw_dump_ir("fragment", prog, &shader->base, &fp->Base);
-
-   int st_index8 = -1, st_index16 = -1;
-   if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
-      st_index8 = brw_get_shader_time_index(brw, prog, &fp->Base, ST_FS8);
-      st_index16 = brw_get_shader_time_index(brw, prog, &fp->Base, ST_FS16);
-   }
-
    /* Now the main event: Visit the shader IR and generate our FS IR for it.
     */
    fs_visitor v(brw->intelScreen->compiler, brw, mem_ctx, key,
-                &prog_data->base, &fp->Base, fp->Base.nir, 8, st_index8);
+                &prog_data->base, &fp->Base, fp->Base.nir, 8, shader_time_index8);
    if (!v.run_fs(false /* do_rep_send */)) {
       if (prog) {
          prog->LinkStatus = false;
@@ -5151,7 +5162,7 @@ brw_wm_fs_emit(struct brw_context *brw,
 
    cfg_t *simd16_cfg = NULL;
    fs_visitor v2(brw->intelScreen->compiler, brw, mem_ctx, key,
-                 &prog_data->base, &fp->Base, fp->Base.nir, 16, st_index16);
+                 &prog_data->base, &fp->Base, fp->Base.nir, 16, shader_time_index16);
    if (likely(!(INTEL_DEBUG & DEBUG_NO16) || brw->use_rep_send)) {
       if (!v.simd16_unsupported) {
          /* Try a SIMD16 compile */
@@ -5198,6 +5209,42 @@ brw_wm_fs_emit(struct brw_context *brw,
    return g.get_assembly(final_assembly_size);
 }
 
+void
+brw_cs_fill_local_id_payload(const struct brw_cs_prog_data *prog_data,
+                             void *buffer, uint32_t threads, uint32_t stride)
+{
+   if (prog_data->local_invocation_id_regs == 0)
+      return;
+
+   /* 'stride' should be an integer number of registers, that is, a multiple
+    * of 32 bytes.
+    */
+   assert(stride % 32 == 0);
+
+   unsigned x = 0, y = 0, z = 0;
+   for (unsigned t = 0; t < threads; t++) {
+      uint32_t *param = (uint32_t *) buffer + stride * t / 4;
+
+      for (unsigned i = 0; i < prog_data->simd_size; i++) {
+         param[0 * prog_data->simd_size + i] = x;
+         param[1 * prog_data->simd_size + i] = y;
+         param[2 * prog_data->simd_size + i] = z;
+
+         x++;
+         if (x == prog_data->local_size[0]) {
+            x = 0;
+            y++;
+            if (y == prog_data->local_size[1]) {
+               y = 0;
+               z++;
+               if (z == prog_data->local_size[2])
+                  z = 0;
+            }
+         }
+      }
+   }
+}
+
 fs_reg *
 fs_visitor::emit_cs_local_invocation_id_setup()
 {
@@ -5242,43 +5289,35 @@ brw_cs_emit(struct brw_context *brw,
             struct brw_cs_prog_data *prog_data,
             struct gl_compute_program *cp,
             struct gl_shader_program *prog,
+            int shader_time_index,
             unsigned *final_assembly_size)
 {
-   struct brw_shader *shader =
-      (struct brw_shader *) prog->_LinkedShaders[MESA_SHADER_COMPUTE];
-
-   if (unlikely(INTEL_DEBUG & DEBUG_CS))
-      brw_dump_ir("compute", prog, &shader->base, &cp->Base);
-
    prog_data->local_size[0] = cp->LocalSize[0];
    prog_data->local_size[1] = cp->LocalSize[1];
    prog_data->local_size[2] = cp->LocalSize[2];
    unsigned local_workgroup_size =
       cp->LocalSize[0] * cp->LocalSize[1] * cp->LocalSize[2];
+   unsigned max_cs_threads = brw->intelScreen->compiler->devinfo->max_cs_threads;
 
    cfg_t *cfg = NULL;
    const char *fail_msg = NULL;
 
-   int st_index = -1;
-   if (INTEL_DEBUG & DEBUG_SHADER_TIME)
-      st_index = brw_get_shader_time_index(brw, prog, &cp->Base, ST_CS);
-
    /* Now the main event: Visit the shader IR and generate our CS IR for it.
     */
    fs_visitor v8(brw->intelScreen->compiler, brw, mem_ctx, key,
-                 &prog_data->base, &cp->Base, cp->Base.nir, 8, st_index);
+                 &prog_data->base, &cp->Base, cp->Base.nir, 8, shader_time_index);
    if (!v8.run_cs()) {
       fail_msg = v8.fail_msg;
-   } else if (local_workgroup_size <= 8 * brw->max_cs_threads) {
+   } else if (local_workgroup_size <= 8 * max_cs_threads) {
       cfg = v8.cfg;
       prog_data->simd_size = 8;
    }
 
    fs_visitor v16(brw->intelScreen->compiler, brw, mem_ctx, key,
-                  &prog_data->base, &cp->Base, cp->Base.nir, 16, st_index);
+                  &prog_data->base, &cp->Base, cp->Base.nir, 16, shader_time_index);
    if (likely(!(INTEL_DEBUG & DEBUG_NO16)) &&
        !fail_msg && !v8.simd16_unsupported &&
-       local_workgroup_size <= 16 * brw->max_cs_threads) {
+       local_workgroup_size <= 16 * max_cs_threads) {
       /* Try a SIMD16 compile */
       v16.import_uniforms(&v8);
       if (!v16.run_cs()) {
diff --git a/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp b/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp
deleted file mode 100644 (file)
index b75f40b..0000000
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Copyright © 2013 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "brw_fs.h"
-#include "brw_cfg.h"
-
-using namespace brw;
-
-/** @file brw_fs_peephole_predicated_break.cpp
- *
- * Loops are often structured as
- *
- * loop:
- *    CMP.f0
- *    (+f0) IF
- *    BREAK
- *    ENDIF
- *    ...
- *    WHILE loop
- *
- * This peephole pass removes the IF and ENDIF instructions and predicates the
- * BREAK, dropping two instructions from the loop body.
- *
- * If the loop was a DO { ... } WHILE loop, it looks like
- *
- * loop:
- *    ...
- *    CMP.f0
- *    (+f0) IF
- *    BREAK
- *    ENDIF
- *    WHILE loop
- *
- * and we can remove the BREAK instruction and predicate the WHILE.
- */
-
-bool
-fs_visitor::opt_peephole_predicated_break()
-{
-   bool progress = false;
-
-   foreach_block (block, cfg) {
-      if (block->start_ip != block->end_ip)
-         continue;
-
-      /* BREAK and CONTINUE instructions, by definition, can only be found at
-       * the ends of basic blocks.
-       */
-      fs_inst *jump_inst = (fs_inst *)block->end();
-      if (jump_inst->opcode != BRW_OPCODE_BREAK &&
-          jump_inst->opcode != BRW_OPCODE_CONTINUE)
-         continue;
-
-      fs_inst *if_inst = (fs_inst *)block->prev()->end();
-      if (if_inst->opcode != BRW_OPCODE_IF)
-         continue;
-
-      fs_inst *endif_inst = (fs_inst *)block->next()->start();
-      if (endif_inst->opcode != BRW_OPCODE_ENDIF)
-         continue;
-
-      bblock_t *jump_block = block;
-      bblock_t *if_block = jump_block->prev();
-      bblock_t *endif_block = jump_block->next();
-
-      /* For Sandybridge with IF with embedded comparison we need to emit an
-       * instruction to set the flag register.
-       */
-      if (devinfo->gen == 6 && if_inst->conditional_mod) {
-         const fs_builder ibld(this, if_block, if_inst);
-         ibld.CMP(ibld.null_reg_d(), if_inst->src[0], if_inst->src[1],
-                  if_inst->conditional_mod);
-         jump_inst->predicate = BRW_PREDICATE_NORMAL;
-      } else {
-         jump_inst->predicate = if_inst->predicate;
-         jump_inst->predicate_inverse = if_inst->predicate_inverse;
-      }
-
-      bblock_t *earlier_block = if_block;
-      if (if_block->start_ip == if_block->end_ip) {
-         earlier_block = if_block->prev();
-      }
-
-      if_inst->remove(if_block);
-
-      bblock_t *later_block = endif_block;
-      if (endif_block->start_ip == endif_block->end_ip) {
-         later_block = endif_block->next();
-      }
-      endif_inst->remove(endif_block);
-
-      if (!earlier_block->ends_with_control_flow()) {
-         earlier_block->children.make_empty();
-         earlier_block->add_successor(cfg->mem_ctx, jump_block);
-      }
-
-      if (!later_block->starts_with_control_flow()) {
-         later_block->parents.make_empty();
-      }
-      jump_block->add_successor(cfg->mem_ctx, later_block);
-
-      if (earlier_block->can_combine_with(jump_block)) {
-         earlier_block->combine_with(jump_block);
-
-         block = earlier_block;
-      }
-
-      /* Now look at the first instruction of the block following the BREAK. If
-       * it's a WHILE, we can delete the break, predicate the WHILE, and join
-       * the two basic blocks.
-       */
-      bblock_t *while_block = earlier_block->next();
-      fs_inst *while_inst = (fs_inst *)while_block->start();
-
-      if (jump_inst->opcode == BRW_OPCODE_BREAK &&
-          while_inst->opcode == BRW_OPCODE_WHILE &&
-          while_inst->predicate == BRW_PREDICATE_NONE) {
-         jump_inst->remove(earlier_block);
-         while_inst->predicate = jump_inst->predicate;
-         while_inst->predicate_inverse = !jump_inst->predicate_inverse;
-
-         earlier_block->children.make_empty();
-         earlier_block->add_successor(cfg->mem_ctx, while_block);
-
-         assert(earlier_block->can_combine_with(while_block));
-         earlier_block->combine_with(while_block);
-
-         earlier_block->next()->parents.make_empty();
-         earlier_block->add_successor(cfg->mem_ctx, earlier_block->next());
-      }
-
-      progress = true;
-   }
-
-   if (progress)
-      invalidate_live_intervals();
-
-   return progress;
-}
index 6900cee86f44f687d4d70b532ac4757feb8ec6ca..c3a037be4b17c73bffabf5d2f477a121e4daa42f 100644 (file)
@@ -30,8 +30,6 @@
 #include "glsl/glsl_types.h"
 #include "glsl/ir_optimization.h"
 
-#define FIRST_SPILL_MRF(gen) (gen == 6 ? 21 : 13)
-
 using namespace brw;
 
 static void
index d190d8eb6b43cb09403e94105e48f4e01142dee8..8613725f6b9b233e1dac3f835289fda838027ba7 100644 (file)
@@ -155,18 +155,6 @@ fs_visitor::opt_peephole_sel()
       if (movs == 0)
          continue;
 
-      enum brw_predicate predicate;
-      bool predicate_inverse;
-      if (devinfo->gen == 6 && if_inst->conditional_mod) {
-         /* For Sandybridge with IF with embedded comparison */
-         predicate = BRW_PREDICATE_NORMAL;
-         predicate_inverse = false;
-      } else {
-         /* Separate CMP and IF instructions */
-         predicate = if_inst->predicate;
-         predicate_inverse = if_inst->predicate_inverse;
-      }
-
       /* Generate SEL instructions for pairs of MOVs to a common destination. */
       for (int i = 0; i < movs; i++) {
          if (!then_mov[i] || !else_mov[i])
@@ -195,13 +183,6 @@ fs_visitor::opt_peephole_sel()
       if (movs == 0)
          continue;
 
-      /* Emit a CMP if our IF used the embedded comparison */
-      if (devinfo->gen == 6 && if_inst->conditional_mod) {
-         const fs_builder ibld(this, block, if_inst);
-         ibld.CMP(ibld.null_reg_d(), if_inst->src[0], if_inst->src[1],
-                  if_inst->conditional_mod);
-      }
-
       for (int i = 0; i < movs; i++) {
          const fs_builder ibld = fs_builder(this, then_block, then_mov[i])
                                  .at(block, if_inst);
@@ -220,7 +201,7 @@ fs_visitor::opt_peephole_sel()
                ibld.MOV(src0, then_mov[i]->src[0]);
             }
 
-            set_predicate_inv(predicate, predicate_inverse,
+            set_predicate_inv(if_inst->predicate, if_inst->predicate_inverse,
                               ibld.SEL(then_mov[i]->dst, src0,
                                        else_mov[i]->src[0]));
          }
index 4d0b125ffe4024b651bd06ebfb086f16fe2b42d2..e0165fb4a23f6f8e5dbb3e40c394233b2822200e 100644 (file)
@@ -52,22 +52,17 @@ assign_gs_binding_table_offsets(const struct brw_device_info *devinfo,
 }
 
 bool
-brw_compile_gs_prog(struct brw_context *brw,
+brw_codegen_gs_prog(struct brw_context *brw,
                     struct gl_shader_program *prog,
                     struct brw_geometry_program *gp,
-                    struct brw_gs_prog_key *key,
-                    struct brw_gs_compile_output *output)
+                    struct brw_gs_prog_key *key)
 {
+   struct brw_stage_state *stage_state = &brw->gs.base;
    struct brw_gs_compile c;
    memset(&c, 0, sizeof(c));
    c.key = *key;
    c.gp = gp;
 
-   /* We get the bind map as input in the output struct...*/
-   c.prog_data.base.base.map_entries = output->prog_data.base.base.map_entries;
-   memcpy(c.prog_data.base.base.bind_map, output->prog_data.base.base.bind_map,
-          sizeof(c.prog_data.base.base.bind_map));
-
    c.prog_data.include_primitive_id =
       (gp->program.Base.InputsRead & VARYING_BIT_PRIMITIVE_ID) != 0;
 
@@ -296,48 +291,35 @@ brw_compile_gs_prog(struct brw_context *brw,
     */
    c.prog_data.base.urb_read_length = (c.input_vue_map.num_slots + 1) / 2;
 
+   if (unlikely(INTEL_DEBUG & DEBUG_GS))
+      brw_dump_ir("geometry", prog, gs, NULL);
+
+   int st_index = -1;
+   if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+      st_index = brw_get_shader_time_index(brw, prog, NULL, ST_GS);
+
    void *mem_ctx = ralloc_context(NULL);
    unsigned program_size;
    const unsigned *program =
-      brw_gs_emit(brw, prog, &c, mem_ctx, &program_size);
+      brw_gs_emit(brw, prog, &c, mem_ctx, st_index, &program_size);
    if (program == NULL) {
       ralloc_free(mem_ctx);
       return false;
    }
 
-   output->mem_ctx = mem_ctx;
-   output->program = program;
-   output->program_size = program_size;
-   memcpy(&output->prog_data, &c.prog_data,
-          sizeof(output->prog_data));
-
-   return true;
-}
-
-bool
-brw_codegen_gs_prog(struct brw_context *brw,
-                    struct gl_shader_program *prog,
-                    struct brw_geometry_program *gp,
-                    struct brw_gs_prog_key *key)
-{
-   struct brw_gs_compile_output output;
-   struct brw_stage_state *stage_state = &brw->gs.base;
-
-   if (brw_compile_gs_prog(brw, prog, gp, key, &output))
-      return false;
-
-   if (output.prog_data.base.base.total_scratch) {
+   /* Scratch space is used for register spilling */
+   if (c.prog_data.base.base.total_scratch) {
       brw_get_scratch_bo(brw, &stage_state->scratch_bo,
-                        output.prog_data.base.base.total_scratch *
+                        c.prog_data.base.base.total_scratch *
                          brw->max_gs_threads);
    }
 
    brw_upload_cache(&brw->cache, BRW_CACHE_GS_PROG,
-                    key, sizeof(*key),
-                    output.program, output.program_size,
-                    &output.prog_data, sizeof(output.prog_data),
+                    &c.key, sizeof(c.key),
+                    program, program_size,
+                    &c.prog_data, sizeof(c.prog_data),
                     &stage_state->prog_offset, &brw->gs.prog_data);
-   ralloc_free(output.mem_ctx);
+   ralloc_free(mem_ctx);
 
    return true;
 }
index 573bbdb16f8e34605f374e52099713afde105058..9879f3d6464d28e7c65b48eb72be060803cbf994 100644 (file)
@@ -37,24 +37,6 @@ struct gl_context;
 struct gl_shader_program;
 struct gl_program;
 
-struct brw_gs_compile_output {
-   void *mem_ctx;
-   const void *program;
-   uint32_t program_size;
-   struct brw_gs_prog_data prog_data;
-};
-
-struct brw_gs_prog_key;
-
-bool
-brw_compile_gs_prog(struct brw_context *brw,
-                    struct gl_shader_program *prog,
-                    struct brw_geometry_program *gp,
-                    struct brw_gs_prog_key *key,
-                    struct brw_gs_compile_output *output);
-
-bool brw_gs_prog_data_compare(const void *a, const void *b);
-
 void
 brw_upload_gs_prog(struct brw_context *brw);
 
index c5132ba15ed2e274d6232b5c58049a085bd075cb..ab37b709d653baf2ed7413d975105d92d607adfe 100644 (file)
@@ -42,6 +42,12 @@ extern "C" {
 /** Maximum SEND message length */
 #define BRW_MAX_MSG_LENGTH 15
 
+/** First MRF register used by pull loads */
+#define FIRST_SPILL_MRF(gen) ((gen) == 6 ? 21 : 13)
+
+/** First MRF register used by spills */
+#define FIRST_PULL_LOAD_MRF(gen) ((gen) == 6 ? 16 : 13)
+
 /* brw_context.h has a forward declaration of brw_inst, so name the struct. */
 typedef struct brw_inst {
    uint64_t data[2];
diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp b/src/mesa/drivers/dri/i965/brw_link.cpp
new file mode 100644 (file)
index 0000000..fc9bee4
--- /dev/null
@@ -0,0 +1,280 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "main/macros.h"
+#include "brw_context.h"
+#include "brw_vs.h"
+#include "brw_gs.h"
+#include "brw_fs.h"
+#include "brw_cfg.h"
+#include "brw_nir.h"
+#include "glsl/ir_optimization.h"
+#include "glsl/glsl_parser_extras.h"
+#include "main/shaderapi.h"
+
+/**
+ * Performs a compile of the shader stages even when we don't know
+ * what non-orthogonal state will be set, in the hope that it reflects
+ * the eventual NOS used, and thus allows us to produce link failures.
+ */
+static bool
+brw_shader_precompile(struct gl_context *ctx,
+                      struct gl_shader_program *sh_prog)
+{
+   struct gl_shader *vs = sh_prog->_LinkedShaders[MESA_SHADER_VERTEX];
+   struct gl_shader *gs = sh_prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
+   struct gl_shader *fs = sh_prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
+   struct gl_shader *cs = sh_prog->_LinkedShaders[MESA_SHADER_COMPUTE];
+
+   if (fs && !brw_fs_precompile(ctx, sh_prog, fs->Program))
+      return false;
+
+   if (gs && !brw_gs_precompile(ctx, sh_prog, gs->Program))
+      return false;
+
+   if (vs && !brw_vs_precompile(ctx, sh_prog, vs->Program))
+      return false;
+
+   if (cs && !brw_cs_precompile(ctx, sh_prog, cs->Program))
+      return false;
+
+   return true;
+}
+
+static void
+brw_lower_packing_builtins(struct brw_context *brw,
+                           gl_shader_stage shader_type,
+                           exec_list *ir)
+{
+   int ops = LOWER_PACK_SNORM_2x16
+           | LOWER_UNPACK_SNORM_2x16
+           | LOWER_PACK_UNORM_2x16
+           | LOWER_UNPACK_UNORM_2x16;
+
+   if (is_scalar_shader_stage(brw->intelScreen->compiler, shader_type)) {
+      ops |= LOWER_UNPACK_UNORM_4x8
+           | LOWER_UNPACK_SNORM_4x8
+           | LOWER_PACK_UNORM_4x8
+           | LOWER_PACK_SNORM_4x8;
+   }
+
+   if (brw->gen >= 7) {
+      /* Gen7 introduced the f32to16 and f16to32 instructions, which can be
+       * used to execute packHalf2x16 and unpackHalf2x16. For AOS code, no
+       * lowering is needed. For SOA code, the Half2x16 ops must be
+       * scalarized.
+       */
+      if (is_scalar_shader_stage(brw->intelScreen->compiler, shader_type)) {
+         ops |= LOWER_PACK_HALF_2x16_TO_SPLIT
+             |  LOWER_UNPACK_HALF_2x16_TO_SPLIT;
+      }
+   } else {
+      ops |= LOWER_PACK_HALF_2x16
+          |  LOWER_UNPACK_HALF_2x16;
+   }
+
+   lower_packing_builtins(ir, ops);
+}
+
+static void
+process_glsl_ir(gl_shader_stage stage,
+                struct brw_context *brw,
+                struct gl_shader_program *shader_prog,
+                struct gl_shader *shader)
+{
+   struct gl_context *ctx = &brw->ctx;
+   const struct gl_shader_compiler_options *options =
+      &ctx->Const.ShaderCompilerOptions[shader->Stage];
+
+   /* Temporary memory context for any new IR. */
+   void *mem_ctx = ralloc_context(NULL);
+
+   ralloc_adopt(mem_ctx, shader->ir);
+
+   /* lower_packing_builtins() inserts arithmetic instructions, so it
+    * must precede lower_instructions().
+    */
+   brw_lower_packing_builtins(brw, shader->Stage, shader->ir);
+   do_mat_op_to_vec(shader->ir);
+   const int bitfield_insert = brw->gen >= 7 ? BITFIELD_INSERT_TO_BFM_BFI : 0;
+   lower_instructions(shader->ir,
+                      MOD_TO_FLOOR |
+                      DIV_TO_MUL_RCP |
+                      SUB_TO_ADD_NEG |
+                      EXP_TO_EXP2 |
+                      LOG_TO_LOG2 |
+                      bitfield_insert |
+                      LDEXP_TO_ARITH |
+                      CARRY_TO_ARITH |
+                      BORROW_TO_ARITH);
+
+   /* Pre-gen6 HW can only nest if-statements 16 deep.  Beyond this,
+    * if-statements need to be flattened.
+    */
+   if (brw->gen < 6)
+      lower_if_to_cond_assign(shader->ir, 16);
+
+   do_lower_texture_projection(shader->ir);
+   brw_lower_texture_gradients(brw, shader->ir);
+   do_vec_index_to_cond_assign(shader->ir);
+   lower_vector_insert(shader->ir, true);
+   lower_offset_arrays(shader->ir);
+   brw_do_lower_unnormalized_offset(shader->ir);
+   lower_noise(shader->ir);
+   lower_quadop_vector(shader->ir, false);
+
+   bool lowered_variable_indexing =
+      lower_variable_index_to_cond_assign((gl_shader_stage)stage,
+                                          shader->ir,
+                                          options->EmitNoIndirectInput,
+                                          options->EmitNoIndirectOutput,
+                                          options->EmitNoIndirectTemp,
+                                          options->EmitNoIndirectUniform);
+
+   if (unlikely(brw->perf_debug && lowered_variable_indexing)) {
+      perf_debug("Unsupported form of variable indexing in %s; falling "
+                 "back to very inefficient code generation\n",
+                 _mesa_shader_stage_to_abbrev(shader->Stage));
+   }
+
+   lower_ubo_reference(shader, shader->ir);
+
+   bool progress;
+   do {
+      progress = false;
+
+      if (is_scalar_shader_stage(brw->intelScreen->compiler, shader->Stage)) {
+         brw_do_channel_expressions(shader->ir);
+         brw_do_vector_splitting(shader->ir);
+      }
+
+      progress = do_lower_jumps(shader->ir, true, true,
+                                true, /* main return */
+                                false, /* continue */
+                                false /* loops */
+                                ) || progress;
+
+      progress = do_common_optimization(shader->ir, true, true,
+                                        options, ctx->Const.NativeIntegers) || progress;
+   } while (progress);
+
+   validate_ir_tree(shader->ir);
+
+   /* Now that we've finished altering the linked IR, reparent any live IR back
+    * to the permanent memory context, and free the temporary one (discarding any
+    * junk we optimized away).
+    */
+   reparent_ir(shader->ir, shader->ir);
+   ralloc_free(mem_ctx);
+
+   if (ctx->_Shader->Flags & GLSL_DUMP) {
+      fprintf(stderr, "\n");
+      fprintf(stderr, "GLSL IR for linked %s program %d:\n",
+              _mesa_shader_stage_to_string(shader->Stage),
+              shader_prog->Name);
+      _mesa_print_ir(stderr, shader->ir, NULL);
+      fprintf(stderr, "\n");
+   }
+}
+
+GLboolean
+brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
+{
+   struct brw_context *brw = brw_context(ctx);
+   const struct brw_compiler *compiler = brw->intelScreen->compiler;
+   unsigned int stage;
+
+   for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) {
+      struct gl_shader *shader = shProg->_LinkedShaders[stage];
+      if (!shader)
+        continue;
+
+      struct gl_program *prog =
+        ctx->Driver.NewProgram(ctx, _mesa_shader_stage_to_program(stage),
+                                shader->Name);
+      if (!prog)
+       return false;
+      prog->Parameters = _mesa_new_parameter_list();
+
+      _mesa_copy_linked_program_data((gl_shader_stage) stage, shProg, prog);
+
+      process_glsl_ir((gl_shader_stage) stage, brw, shProg, shader);
+
+      /* Make a pass over the IR to add state references for any built-in
+       * uniforms that are used.  This has to be done now (during linking).
+       * Code generation doesn't happen until the first time this shader is
+       * used for rendering.  Waiting until then to generate the parameters is
+       * too late.  At that point, the values for the built-in uniforms won't
+       * get sent to the shader.
+       */
+      foreach_in_list(ir_instruction, node, shader->ir) {
+        ir_variable *var = node->as_variable();
+
+        if ((var == NULL) || (var->data.mode != ir_var_uniform)
+            || (strncmp(var->name, "gl_", 3) != 0))
+           continue;
+
+        const ir_state_slot *const slots = var->get_state_slots();
+        assert(slots != NULL);
+
+        for (unsigned int i = 0; i < var->get_num_state_slots(); i++) {
+           _mesa_add_state_reference(prog->Parameters,
+                                     (gl_state_index *) slots[i].tokens);
+        }
+      }
+
+      do_set_program_inouts(shader->ir, prog, shader->Stage);
+
+      prog->SamplersUsed = shader->active_samplers;
+      prog->ShadowSamplers = shader->shadow_samplers;
+      _mesa_update_shader_textures_used(shProg, prog);
+
+      _mesa_reference_program(ctx, &shader->Program, prog);
+
+      brw_add_texrect_params(prog);
+
+      prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage,
+                                 is_scalar_shader_stage(compiler, stage));
+
+      _mesa_reference_program(ctx, &prog, NULL);
+   }
+
+   if ((ctx->_Shader->Flags & GLSL_DUMP) && shProg->Name != 0) {
+      for (unsigned i = 0; i < shProg->NumShaders; i++) {
+         const struct gl_shader *sh = shProg->Shaders[i];
+         if (!sh)
+            continue;
+
+         fprintf(stderr, "GLSL %s shader %d source for linked program %d:\n",
+                 _mesa_shader_stage_to_string(sh->Stage),
+                 i, shProg->Name);
+         fprintf(stderr, "%s", sh->Source);
+         fprintf(stderr, "\n");
+      }
+   }
+
+   if (brw->precompile && !brw_shader_precompile(ctx, shProg))
+      return false;
+
+   return true;
+}
index 7d17edb9023531a5c30f2828789e37a2dc83c1bd..cf6ba5b4aeb5aef4a57e562522f9810c677686bd 100644 (file)
@@ -887,14 +887,6 @@ brw_upload_invariant_state(struct brw_context *brw)
    brw_emit_select_pipeline(brw, BRW_RENDER_PIPELINE);
    brw->last_pipeline = BRW_RENDER_PIPELINE;
 
-   if (brw->gen < 6) {
-      /* Disable depth offset clamping. */
-      BEGIN_BATCH(2);
-      OUT_BATCH(_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP << 16 | (2 - 2));
-      OUT_BATCH_F(0.0);
-      ADVANCE_BATCH();
-   }
-
    if (brw->gen >= 8) {
       BEGIN_BATCH(3);
       OUT_BATCH(CMD_STATE_SIP << 16 | (3 - 2));
index 7ab6afa845881c5e73ebd663dbdd75243022732e..19206600e6476fd4b8b4c8fbc72edef311cc8dec 100644 (file)
 static void
 brw_nir_lower_inputs(nir_shader *nir, bool is_scalar)
 {
-   nir_assign_var_locations(&nir->inputs, &nir->num_inputs,
-                            is_scalar ? type_size_scalar : type_size_vec4);
+   switch (nir->stage) {
+   case MESA_SHADER_GEOMETRY:
+      foreach_list_typed(nir_variable, var, node, &nir->inputs) {
+         var->data.driver_location = var->data.location;
+      }
+      break;
+   default:
+      nir_assign_var_locations(&nir->inputs, &nir->num_inputs,
+                               is_scalar ? type_size_scalar : type_size_vec4);
+      break;
+   }
 }
 
 static void
diff --git a/src/mesa/drivers/dri/i965/brw_predicated_break.cpp b/src/mesa/drivers/dri/i965/brw_predicated_break.cpp
new file mode 100644 (file)
index 0000000..607715d
--- /dev/null
@@ -0,0 +1,148 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_cfg.h"
+
+using namespace brw;
+
+/** @file brw_predicated_break.cpp
+ *
+ * Loops are often structured as
+ *
+ * loop:
+ *    CMP.f0
+ *    (+f0) IF
+ *    BREAK
+ *    ENDIF
+ *    ...
+ *    WHILE loop
+ *
+ * This peephole pass removes the IF and ENDIF instructions and predicates the
+ * BREAK, dropping two instructions from the loop body.
+ *
+ * If the loop was a DO { ... } WHILE loop, it looks like
+ *
+ * loop:
+ *    ...
+ *    CMP.f0
+ *    (+f0) IF
+ *    BREAK
+ *    ENDIF
+ *    WHILE loop
+ *
+ * and we can remove the BREAK instruction and predicate the WHILE.
+ */
+
+bool
+opt_predicated_break(backend_shader *s)
+{
+   bool progress = false;
+
+   foreach_block (block, s->cfg) {
+      if (block->start_ip != block->end_ip)
+         continue;
+
+      /* BREAK and CONTINUE instructions, by definition, can only be found at
+       * the ends of basic blocks.
+       */
+      backend_instruction *jump_inst = block->end();
+      if (jump_inst->opcode != BRW_OPCODE_BREAK &&
+          jump_inst->opcode != BRW_OPCODE_CONTINUE)
+         continue;
+
+      backend_instruction *if_inst = block->prev()->end();
+      if (if_inst->opcode != BRW_OPCODE_IF)
+         continue;
+
+      backend_instruction *endif_inst = block->next()->start();
+      if (endif_inst->opcode != BRW_OPCODE_ENDIF)
+         continue;
+
+      bblock_t *jump_block = block;
+      bblock_t *if_block = jump_block->prev();
+      bblock_t *endif_block = jump_block->next();
+
+      jump_inst->predicate = if_inst->predicate;
+      jump_inst->predicate_inverse = if_inst->predicate_inverse;
+
+      bblock_t *earlier_block = if_block;
+      if (if_block->start_ip == if_block->end_ip) {
+         earlier_block = if_block->prev();
+      }
+
+      if_inst->remove(if_block);
+
+      bblock_t *later_block = endif_block;
+      if (endif_block->start_ip == endif_block->end_ip) {
+         later_block = endif_block->next();
+      }
+      endif_inst->remove(endif_block);
+
+      if (!earlier_block->ends_with_control_flow()) {
+         earlier_block->children.make_empty();
+         earlier_block->add_successor(s->cfg->mem_ctx, jump_block);
+      }
+
+      if (!later_block->starts_with_control_flow()) {
+         later_block->parents.make_empty();
+      }
+      jump_block->add_successor(s->cfg->mem_ctx, later_block);
+
+      if (earlier_block->can_combine_with(jump_block)) {
+         earlier_block->combine_with(jump_block);
+
+         block = earlier_block;
+      }
+
+      /* Now look at the first instruction of the block following the BREAK. If
+       * it's a WHILE, we can delete the break, predicate the WHILE, and join
+       * the two basic blocks.
+       */
+      bblock_t *while_block = earlier_block->next();
+      backend_instruction *while_inst = while_block->start();
+
+      if (jump_inst->opcode == BRW_OPCODE_BREAK &&
+          while_inst->opcode == BRW_OPCODE_WHILE &&
+          while_inst->predicate == BRW_PREDICATE_NONE) {
+         jump_inst->remove(earlier_block);
+         while_inst->predicate = jump_inst->predicate;
+         while_inst->predicate_inverse = !jump_inst->predicate_inverse;
+
+         earlier_block->children.make_empty();
+         earlier_block->add_successor(s->cfg->mem_ctx, while_block);
+
+         assert(earlier_block->can_combine_with(while_block));
+         earlier_block->combine_with(while_block);
+
+         earlier_block->next()->parents.make_empty();
+         earlier_block->add_successor(s->cfg->mem_ctx, earlier_block->next());
+      }
+
+      progress = true;
+   }
+
+   if (progress)
+      s->invalidate_live_intervals();
+
+   return progress;
+}
index 0a9a99edf2d5b6bccd2f4810b63946a187e9f7f1..dbd0e50228b9eb4f37be8a4171db2fc8c346db70 100644 (file)
@@ -242,18 +242,6 @@ brw_add_texrect_params(struct gl_program *prog)
    }
 }
 
-/* Per-thread scratch space is a power-of-two multiple of 1KB. */
-int
-brw_get_scratch_size(int size)
-{
-   int i;
-
-   for (i = 1024; i < size; i *= 2)
-      ;
-
-   return i;
-}
-
 void
 brw_get_scratch_bo(struct brw_context *brw,
                   drm_intel_bo **scratch_bo, int size)
@@ -530,16 +518,6 @@ brw_destroy_shader_time(struct brw_context *brw)
    brw->shader_time.bo = NULL;
 }
 
-void
-brw_mark_surface_used(struct brw_stage_prog_data *prog_data,
-                      unsigned surf_index)
-{
-   assert(surf_index < BRW_MAX_SURFACES);
-
-   prog_data->binding_table.size_bytes =
-      MAX2(prog_data->binding_table.size_bytes, (surf_index + 1) * 4);
-}
-
 void
 brw_stage_prog_data_free(const void *p)
 {
index 8bc7d9d6aac6fa69154bfe3a6976109cba4cda71..3a58a58a00b0bd7d7e9f2ec87e4947578739161d 100644 (file)
@@ -72,7 +72,7 @@ shader_perf_log_mesa(void *data, const char *fmt, ...)
    va_end(args);
 }
 
-static bool
+bool
 is_scalar_shader_stage(const struct brw_compiler *compiler, int stage)
 {
    switch (stage) {
@@ -166,254 +166,16 @@ brw_new_shader(struct gl_context *ctx, GLuint name, GLuint type)
    return &shader->base;
 }
 
-/**
- * Performs a compile of the shader stages even when we don't know
- * what non-orthogonal state will be set, in the hope that it reflects
- * the eventual NOS used, and thus allows us to produce link failures.
- */
-static bool
-brw_shader_precompile(struct gl_context *ctx,
-                      struct gl_shader_program *sh_prog)
-{
-   struct gl_shader *vs = sh_prog->_LinkedShaders[MESA_SHADER_VERTEX];
-   struct gl_shader *gs = sh_prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
-   struct gl_shader *fs = sh_prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
-   struct gl_shader *cs = sh_prog->_LinkedShaders[MESA_SHADER_COMPUTE];
-
-   if (fs && !brw_fs_precompile(ctx, sh_prog, fs->Program))
-      return false;
-
-   if (gs && !brw_gs_precompile(ctx, sh_prog, gs->Program))
-      return false;
-
-   if (vs && !brw_vs_precompile(ctx, sh_prog, vs->Program))
-      return false;
-
-   if (cs && !brw_cs_precompile(ctx, sh_prog, cs->Program))
-      return false;
-
-   return true;
-}
-
-static void
-brw_lower_packing_builtins(struct brw_context *brw,
-                           gl_shader_stage shader_type,
-                           exec_list *ir)
-{
-   int ops = LOWER_PACK_SNORM_2x16
-           | LOWER_UNPACK_SNORM_2x16
-           | LOWER_PACK_UNORM_2x16
-           | LOWER_UNPACK_UNORM_2x16;
-
-   if (is_scalar_shader_stage(brw->intelScreen->compiler, shader_type)) {
-      ops |= LOWER_UNPACK_UNORM_4x8
-           | LOWER_UNPACK_SNORM_4x8
-           | LOWER_PACK_UNORM_4x8
-           | LOWER_PACK_SNORM_4x8;
-   }
-
-   if (brw->gen >= 7) {
-      /* Gen7 introduced the f32to16 and f16to32 instructions, which can be
-       * used to execute packHalf2x16 and unpackHalf2x16. For AOS code, no
-       * lowering is needed. For SOA code, the Half2x16 ops must be
-       * scalarized.
-       */
-      if (is_scalar_shader_stage(brw->intelScreen->compiler, shader_type)) {
-         ops |= LOWER_PACK_HALF_2x16_TO_SPLIT
-             |  LOWER_UNPACK_HALF_2x16_TO_SPLIT;
-      }
-   } else {
-      ops |= LOWER_PACK_HALF_2x16
-          |  LOWER_UNPACK_HALF_2x16;
-   }
-
-   lower_packing_builtins(ir, ops);
-}
-
-static void
-process_glsl_ir(gl_shader_stage stage,
-                struct brw_context *brw,
-                struct gl_shader_program *shader_prog,
-                struct gl_shader *shader)
-{
-   struct gl_context *ctx = &brw->ctx;
-   const struct gl_shader_compiler_options *options =
-      &ctx->Const.ShaderCompilerOptions[shader->Stage];
-
-   /* Temporary memory context for any new IR. */
-   void *mem_ctx = ralloc_context(NULL);
-
-   ralloc_adopt(mem_ctx, shader->ir);
-
-   /* lower_packing_builtins() inserts arithmetic instructions, so it
-    * must precede lower_instructions().
-    */
-   brw_lower_packing_builtins(brw, shader->Stage, shader->ir);
-   do_mat_op_to_vec(shader->ir);
-   const int bitfield_insert = brw->gen >= 7 ? BITFIELD_INSERT_TO_BFM_BFI : 0;
-   lower_instructions(shader->ir,
-                      MOD_TO_FLOOR |
-                      DIV_TO_MUL_RCP |
-                      SUB_TO_ADD_NEG |
-                      EXP_TO_EXP2 |
-                      LOG_TO_LOG2 |
-                      bitfield_insert |
-                      LDEXP_TO_ARITH |
-                      CARRY_TO_ARITH |
-                      BORROW_TO_ARITH);
-
-   /* Pre-gen6 HW can only nest if-statements 16 deep.  Beyond this,
-    * if-statements need to be flattened.
-    */
-   if (brw->gen < 6)
-      lower_if_to_cond_assign(shader->ir, 16);
-
-   do_lower_texture_projection(shader->ir);
-   brw_lower_texture_gradients(brw, shader->ir);
-   do_vec_index_to_cond_assign(shader->ir);
-   lower_vector_insert(shader->ir, true);
-   lower_offset_arrays(shader->ir);
-   brw_do_lower_unnormalized_offset(shader->ir);
-   lower_noise(shader->ir);
-   lower_quadop_vector(shader->ir, false);
-
-   bool lowered_variable_indexing =
-      lower_variable_index_to_cond_assign((gl_shader_stage)stage,
-                                          shader->ir,
-                                          options->EmitNoIndirectInput,
-                                          options->EmitNoIndirectOutput,
-                                          options->EmitNoIndirectTemp,
-                                          options->EmitNoIndirectUniform);
-
-   if (unlikely(brw->perf_debug && lowered_variable_indexing)) {
-      perf_debug("Unsupported form of variable indexing in %s; falling "
-                 "back to very inefficient code generation\n",
-                 _mesa_shader_stage_to_abbrev(shader->Stage));
-   }
-
-   lower_ubo_reference(shader, shader->ir);
-
-   bool progress;
-   do {
-      progress = false;
-
-      if (is_scalar_shader_stage(brw->intelScreen->compiler, shader->Stage)) {
-         brw_do_channel_expressions(shader->ir);
-         brw_do_vector_splitting(shader->ir);
-      }
-
-      progress = do_lower_jumps(shader->ir, true, true,
-                                true, /* main return */
-                                false, /* continue */
-                                false /* loops */
-                                ) || progress;
-
-      progress = do_common_optimization(shader->ir, true, true,
-                                        options, ctx->Const.NativeIntegers) || progress;
-   } while (progress);
-
-   validate_ir_tree(shader->ir);
-
-   /* Now that we've finished altering the linked IR, reparent any live IR back
-    * to the permanent memory context, and free the temporary one (discarding any
-    * junk we optimized away).
-    */
-   reparent_ir(shader->ir, shader->ir);
-   ralloc_free(mem_ctx);
-
-   if (ctx->_Shader->Flags & GLSL_DUMP) {
-      fprintf(stderr, "\n");
-      fprintf(stderr, "GLSL IR for linked %s program %d:\n",
-              _mesa_shader_stage_to_string(shader->Stage),
-              shader_prog->Name);
-      _mesa_print_ir(stderr, shader->ir, NULL);
-      fprintf(stderr, "\n");
-   }
-}
-
-GLboolean
-brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
+void
+brw_mark_surface_used(struct brw_stage_prog_data *prog_data,
+                      unsigned surf_index)
 {
-   struct brw_context *brw = brw_context(ctx);
-   const struct brw_compiler *compiler = brw->intelScreen->compiler;
-   unsigned int stage;
-
-   for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) {
-      struct gl_shader *shader = shProg->_LinkedShaders[stage];
-      if (!shader)
-        continue;
-
-      struct gl_program *prog =
-        ctx->Driver.NewProgram(ctx, _mesa_shader_stage_to_program(stage),
-                                shader->Name);
-      if (!prog)
-       return false;
-      prog->Parameters = _mesa_new_parameter_list();
-
-      _mesa_copy_linked_program_data((gl_shader_stage) stage, shProg, prog);
-
-      process_glsl_ir((gl_shader_stage) stage, brw, shProg, shader);
-
-      /* Make a pass over the IR to add state references for any built-in
-       * uniforms that are used.  This has to be done now (during linking).
-       * Code generation doesn't happen until the first time this shader is
-       * used for rendering.  Waiting until then to generate the parameters is
-       * too late.  At that point, the values for the built-in uniforms won't
-       * get sent to the shader.
-       */
-      foreach_in_list(ir_instruction, node, shader->ir) {
-        ir_variable *var = node->as_variable();
-
-        if ((var == NULL) || (var->data.mode != ir_var_uniform)
-            || (strncmp(var->name, "gl_", 3) != 0))
-           continue;
-
-        const ir_state_slot *const slots = var->get_state_slots();
-        assert(slots != NULL);
-
-        for (unsigned int i = 0; i < var->get_num_state_slots(); i++) {
-           _mesa_add_state_reference(prog->Parameters,
-                                     (gl_state_index *) slots[i].tokens);
-        }
-      }
-
-      do_set_program_inouts(shader->ir, prog, shader->Stage);
-
-      prog->SamplersUsed = shader->active_samplers;
-      prog->ShadowSamplers = shader->shadow_samplers;
-      _mesa_update_shader_textures_used(shProg, prog);
-
-      _mesa_reference_program(ctx, &shader->Program, prog);
-
-      brw_add_texrect_params(prog);
+   assert(surf_index < BRW_MAX_SURFACES);
 
-      prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage,
-                                 is_scalar_shader_stage(compiler, stage));
-
-      _mesa_reference_program(ctx, &prog, NULL);
-   }
-
-   if ((ctx->_Shader->Flags & GLSL_DUMP) && shProg->Name != 0) {
-      for (unsigned i = 0; i < shProg->NumShaders; i++) {
-         const struct gl_shader *sh = shProg->Shaders[i];
-         if (!sh)
-            continue;
-
-         fprintf(stderr, "GLSL %s shader %d source for linked program %d:\n",
-                 _mesa_shader_stage_to_string(sh->Stage),
-                 i, shProg->Name);
-         fprintf(stderr, "%s", sh->Source);
-         fprintf(stderr, "\n");
-      }
-   }
-
-   if (brw->precompile && !brw_shader_precompile(ctx, shProg))
-      return false;
-
-   return true;
+   prog_data->binding_table.size_bytes =
+      MAX2(prog_data->binding_table.size_bytes, (surf_index + 1) * 4);
 }
 
-
 enum brw_reg_type
 brw_type_for_base_type(const struct glsl_type *type)
 {
@@ -1310,13 +1072,15 @@ backend_shader::dump_instructions(const char *name)
    if (cfg) {
       int ip = 0;
       foreach_block_and_inst(block, backend_instruction, inst, cfg) {
-         fprintf(file, "%4d: ", ip++);
+         if (!unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER))
+            fprintf(file, "%4d: ", ip++);
          dump_instruction(inst, file);
       }
    } else {
       int ip = 0;
       foreach_in_list(backend_instruction, inst, &instructions) {
-         fprintf(file, "%4d: ", ip++);
+         if (!unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER))
+            fprintf(file, "%4d: ", ip++);
          dump_instruction(inst, file);
       }
    }
@@ -1367,6 +1131,7 @@ brw_assign_common_binding_table_offsets(gl_shader_stage stage,
    next_binding_table_offset += num_textures;
 
    if (shader) {
+      assert(shader->NumUniformBlocks <= BRW_MAX_COMBINED_UBO_SSBO);
       stage_prog_data->binding_table.ubo_start = next_binding_table_offset;
       next_binding_table_offset += shader->NumUniformBlocks;
    } else {
@@ -1439,7 +1204,7 @@ brw_setup_image_uniform_values(gl_shader_stage stage,
       &stage_prog_data->param[param_start_index];
 
    for (unsigned i = 0; i < MAX2(storage->array_elements, 1); i++) {
-      const unsigned image_idx = storage->image[stage].index + i;
+      const unsigned image_idx = storage->opaque[stage].index + i;
       const brw_image_param *image_param =
          &stage_prog_data->image_param[image_idx];
 
@@ -1465,3 +1230,29 @@ brw_setup_image_uniform_values(gl_shader_stage stage,
          stage_prog_data->binding_table.image_start + image_idx);
    }
 }
+
+/**
+ * Decide which set of clip planes should be used when clipping via
+ * gl_Position or gl_ClipVertex.
+ */
+gl_clip_plane *brw_select_clip_planes(struct gl_context *ctx)
+{
+   if (ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX]) {
+      /* There is currently a GLSL vertex shader, so clip according to GLSL
+       * rules, which means compare gl_ClipVertex (or gl_Position, if
+       * gl_ClipVertex wasn't assigned) against the eye-coordinate clip planes
+       * that were stored in EyeUserPlane at the time the clip planes were
+       * specified.
+       */
+      return ctx->Transform.EyeUserPlane;
+   } else {
+      /* Either we are using fixed function or an ARB vertex program.  In
+       * either case the clip planes are going to be compared against
+       * gl_Position (which is in clip coordinates) so we have to clip using
+       * _ClipUserPlane, which was transformed into clip coordinates by Mesa
+       * core.
+       */
+      return ctx->Transform._ClipUserPlane;
+   }
+}
+
index fd96740526b855e5006b4e14171ced7eb35cd15a..ad2de5eae2d5d999c904238c4b8c15fbe357b985 100644 (file)
@@ -219,7 +219,7 @@ enum instruction_scheduler_mode {
    SCHEDULE_POST,
 };
 
-class backend_shader {
+struct backend_shader {
 protected:
 
    backend_shader(const struct brw_compiler *compiler,
@@ -273,6 +273,8 @@ void brw_setup_image_uniform_values(gl_shader_stage stage,
                                     unsigned param_start_index,
                                     const gl_uniform_storage *storage);
 
+#else
+struct backend_shader;
 #endif /* __cplusplus */
 
 enum brw_reg_type brw_type_for_base_type(const struct glsl_type *type);
@@ -283,6 +285,8 @@ bool brw_saturate_immediate(enum brw_reg_type type, struct brw_reg *reg);
 bool brw_negate_immediate(enum brw_reg_type type, struct brw_reg *reg);
 bool brw_abs_immediate(enum brw_reg_type type, struct brw_reg *reg);
 
+bool opt_predicated_break(struct backend_shader *s);
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -314,6 +318,8 @@ bool brw_cs_precompile(struct gl_context *ctx,
 int type_size_scalar(const struct glsl_type *type);
 int type_size_vec4(const struct glsl_type *type);
 
+bool is_scalar_shader_stage(const struct brw_compiler *compiler, int stage);
+
 #ifdef __cplusplus
 }
 #endif
index 689c767d2d74c1314c4b9266ed45f24d7e988f6b..e966b96a5ca6153221ec69bfd59c80cebb30e8ba 100644 (file)
@@ -1862,6 +1862,7 @@ vec4_visitor::run()
       pass_num = 0;
       iteration++;
 
+      OPT(opt_predicated_break, this);
       OPT(opt_reduce_swizzle);
       OPT(dead_code_eliminate);
       OPT(dead_control_flow_eliminate, this);
@@ -1942,28 +1943,18 @@ brw_vs_emit(struct brw_context *brw,
             struct brw_vs_prog_data *prog_data,
             struct gl_vertex_program *vp,
             struct gl_shader_program *prog,
+            int shader_time_index,
             unsigned *final_assembly_size)
 {
    const unsigned *assembly = NULL;
 
-   struct brw_shader *shader = NULL;
-   if (prog)
-      shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX];
-
-   int st_index = -1;
-   if (INTEL_DEBUG & DEBUG_SHADER_TIME)
-      st_index = brw_get_shader_time_index(brw, prog, &vp->Base, ST_VS);
-
-   if (unlikely(INTEL_DEBUG & DEBUG_VS) && shader->base.ir)
-      brw_dump_ir("vertex", prog, &shader->base, &vp->Base);
-
    if (brw->intelScreen->compiler->scalar_vs) {
       prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8;
 
       fs_visitor v(brw->intelScreen->compiler, brw,
                    mem_ctx, key, &prog_data->base.base,
                    NULL, /* prog; Only used for TEXTURE_RECTANGLE on gen < 8 */
-                   vp->Base.nir, 8, st_index);
+                   vp->Base.nir, 8, shader_time_index);
       if (!v.run_vs(brw_select_clip_planes(&brw->ctx))) {
          if (prog) {
             prog->LinkStatus = false;
@@ -2001,7 +1992,7 @@ brw_vs_emit(struct brw_context *brw,
 
       vec4_vs_visitor v(brw->intelScreen->compiler, brw, key, prog_data,
                         vp->Base.nir, brw_select_clip_planes(&brw->ctx),
-                        mem_ctx, st_index,
+                        mem_ctx, shader_time_index,
                         !_mesa_is_gles3(&brw->ctx));
       if (!v.run()) {
          if (prog) {
index 51b3161f6593ddb3dcaa12a682d2b714540bf6a7..5e3500c0c9ac31f7cf3c2f3f6513988284a9311e 100644 (file)
@@ -76,7 +76,7 @@ public:
                void *mem_ctx,
                 bool no_spills,
                 int shader_time_index);
-   ~vec4_visitor();
+   virtual ~vec4_visitor();
 
    dst_reg dst_null_f()
    {
@@ -223,9 +223,6 @@ public:
 
    int implied_mrf_writes(vec4_instruction *inst);
 
-   void emit_vp_sop(enum brw_conditional_mod condmod, dst_reg dst,
-                    src_reg src0, src_reg src1, src_reg one);
-
    vec4_instruction *emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
                                  src_reg src0, src_reg src1);
 
@@ -238,11 +235,6 @@ public:
     */
    src_reg emit_uniformize(const src_reg &src);
 
-   /**
-    * Emit the correct dot-product instruction for the type of arguments
-    */
-   void emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements);
-
    src_reg fix_3src_operand(const src_reg &src);
    src_reg resolve_source_modifiers(const src_reg &src);
 
index af4c102c026dc7841d03fd31aebf366cec996e34..1b929b3df2ca65f0a575b342d650d1b3b4096873 100644 (file)
@@ -29,41 +29,6 @@ namespace brw {
 void
 vec4_gs_visitor::nir_setup_inputs()
 {
-   nir_inputs = ralloc_array(mem_ctx, src_reg, nir->num_inputs);
-
-   foreach_list_typed(nir_variable, var, node, &nir->inputs) {
-      int offset = var->data.driver_location;
-      if (var->type->base_type == GLSL_TYPE_ARRAY) {
-         /* Geometry shader inputs are arrays, but they use an unusual array
-          * layout: instead of all array elements for a given geometry shader
-          * input being stored consecutively, all geometry shader inputs are
-          * interleaved into one giant array. At this stage of compilation, we
-          * assume that the stride of the array is BRW_VARYING_SLOT_COUNT.
-          * Later, setup_attributes() will remap our accesses to the actual
-          * input array.
-          */
-         assert(var->type->length > 0);
-         int length = var->type->length;
-         int size = type_size_vec4(var->type) / length;
-         for (int i = 0; i < length; i++) {
-            int location = var->data.location + i * BRW_VARYING_SLOT_COUNT;
-            for (int j = 0; j < size; j++) {
-               src_reg src = src_reg(ATTR, location + j, var->type);
-               src = retype(src, brw_type_for_base_type(var->type));
-               nir_inputs[offset] = src;
-               offset++;
-            }
-         }
-      } else {
-         int size = type_size_vec4(var->type);
-         for (int i = 0; i < size; i++) {
-            src_reg src = src_reg(ATTR, var->data.location + i, var->type);
-            src = retype(src, brw_type_for_base_type(var->type));
-            nir_inputs[offset] = src;
-            offset++;
-         }
-      }
-   }
 }
 
 void
@@ -96,6 +61,29 @@ vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
    src_reg src;
 
    switch (instr->intrinsic) {
+   case nir_intrinsic_load_per_vertex_input_indirect:
+      assert(!"EmitNoIndirectInput should prevent this.");
+   case nir_intrinsic_load_per_vertex_input: {
+      /* The EmitNoIndirectInput flag guarantees our vertex index will
+       * be constant.  We should handle indirects someday.
+       */
+      nir_const_value *vertex = nir_src_as_const_value(instr->src[0]);
+
+      /* Make up a type...we have no way of knowing... */
+      const glsl_type *const type = glsl_type::ivec(instr->num_components);
+
+      src = src_reg(ATTR, BRW_VARYING_SLOT_COUNT * vertex->u[0] +
+                          instr->const_index[0], type);
+      dest = get_nir_dest(instr->dest, src.type);
+      dest.writemask = brw_writemask_for_size(instr->num_components);
+      emit(MOV(dest, src));
+      break;
+   }
+
+   case nir_intrinsic_load_input:
+   case nir_intrinsic_load_input_indirect:
+      unreachable("nir_lower_io should have produced per_vertex intrinsics");
+
    case nir_intrinsic_emit_vertex_with_counter: {
       this->vertex_count =
          retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD);
index c673ccd137caea614a92a33d56245b0aa7fd9645..4ce471e066952a92a1309393cd5254eb479a7b51 100644 (file)
@@ -37,7 +37,6 @@ namespace brw {
 vec4_gs_visitor::vec4_gs_visitor(const struct brw_compiler *compiler,
                                  void *log_data,
                                  struct brw_gs_compile *c,
-                                 struct gl_shader_program *prog,
                                  nir_shader *shader,
                                  void *mem_ctx,
                                  bool no_spills,
@@ -45,7 +44,6 @@ vec4_gs_visitor::vec4_gs_visitor(const struct brw_compiler *compiler,
    : vec4_visitor(compiler, log_data, &c->key.tex,
                   &c->prog_data.base, shader,  mem_ctx,
                   no_spills, shader_time_index),
-     shader_prog(prog),
      c(c)
 {
 }
@@ -473,7 +471,7 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id)
     * be recorded by transform feedback, we can simply discard all geometry
     * bound to these streams when transform feedback is disabled.
     */
-   if (stream_id > 0 && shader_prog->TransformFeedback.NumVarying == 0)
+   if (stream_id > 0 && !nir->info.has_transform_feedback_varyings)
       return;
 
    /* If we're outputting 32 control data bits or less, then we can wait
@@ -620,17 +618,11 @@ brw_gs_emit(struct brw_context *brw,
             struct gl_shader_program *prog,
             struct brw_gs_compile *c,
             void *mem_ctx,
+            int shader_time_index,
             unsigned *final_assembly_size)
 {
    struct gl_shader *shader = prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
 
-   if (unlikely(INTEL_DEBUG & DEBUG_GS))
-      brw_dump_ir("geometry", prog, shader, NULL);
-
-   int st_index = -1;
-   if (INTEL_DEBUG & DEBUG_SHADER_TIME)
-      st_index = brw_get_shader_time_index(brw, prog, NULL, ST_GS);
-
    if (brw->gen >= 7) {
       /* Compile the geometry shader in DUAL_OBJECT dispatch mode, if we can do
        * so without spilling. If the GS invocations count > 1, then we can't use
@@ -641,8 +633,8 @@ brw_gs_emit(struct brw_context *brw,
          c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
 
          vec4_gs_visitor v(brw->intelScreen->compiler, brw,
-                           c, prog, shader->Program->nir,
-                           mem_ctx, true /* no_spills */, st_index);
+                           c, shader->Program->nir,
+                           mem_ctx, true /* no_spills */, shader_time_index);
          if (v.run()) {
             return generate_assembly(brw, prog, &c->gp->program.Base,
                                      &c->prog_data.base, mem_ctx, v.cfg,
@@ -684,14 +676,14 @@ brw_gs_emit(struct brw_context *brw,
 
    if (brw->gen >= 7)
       gs = new vec4_gs_visitor(brw->intelScreen->compiler, brw,
-                               c, prog, shader->Program->nir,
+                               c, shader->Program->nir,
                                mem_ctx, false /* no_spills */,
-                               st_index);
+                               shader_time_index);
    else
       gs = new gen6_gs_visitor(brw->intelScreen->compiler, brw,
                                c, prog, shader->Program->nir,
                                mem_ctx, false /* no_spills */,
-                               st_index);
+                               shader_time_index);
 
    if (!gs->run()) {
       prog->LinkStatus = false;
index 85d80b8fc6327b6201795ed1249d8e2357c50329..3ff195c3e68d59d6380457809b138c057c80f308 100644 (file)
@@ -55,6 +55,7 @@ const unsigned *brw_gs_emit(struct brw_context *brw,
                             struct gl_shader_program *prog,
                             struct brw_gs_compile *c,
                             void *mem_ctx,
+                            int shader_time_index,
                             unsigned *final_assembly_size);
 
 #ifdef __cplusplus
@@ -70,7 +71,6 @@ public:
    vec4_gs_visitor(const struct brw_compiler *compiler,
                    void *log_data,
                    struct brw_gs_compile *c,
-                   struct gl_shader_program *prog,
                    nir_shader *shader,
                    void *mem_ctx,
                    bool no_spills,
@@ -97,8 +97,6 @@ protected:
    void emit_control_data_bits();
    void set_stream_control_data_bits(unsigned stream_id);
 
-   struct gl_shader_program *shader_prog;
-
    src_reg vertex_count;
    src_reg control_data_bits;
    const struct brw_gs_compile * const c;
index f80425a5b00f273cbcbb48ca698a619f8ca0fc08..98ea9be6ee493bc2655616faa96efd62e5c8b60b 100644 (file)
@@ -26,8 +26,6 @@
 #include "glsl/ir_uniform.h"
 #include "program/sampler.h"
 
-#define FIRST_SPILL_MRF(gen) (gen == 6 ? 21 : 13)
-
 namespace brw {
 
 vec4_instruction::vec4_instruction(enum opcode opcode, const dst_reg &dst,
@@ -278,16 +276,6 @@ vec4_visitor::SCRATCH_WRITE(const dst_reg &dst, const src_reg &src,
    return inst;
 }
 
-void
-vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements)
-{
-   static enum opcode dot_opcodes[] = {
-      BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4
-   };
-
-   emit(dot_opcodes[elements - 2], dst, src0, src1);
-}
-
 src_reg
 vec4_visitor::fix_3src_operand(const src_reg &src)
 {
@@ -803,7 +791,7 @@ vec4_visitor::emit_pull_constant_load_reg(dst_reg dst,
                                            dst,
                                            surf_index,
                                            offset_reg);
-      pull->base_mrf = FIRST_SPILL_MRF(devinfo->gen) + 1;
+      pull->base_mrf = FIRST_PULL_LOAD_MRF(devinfo->gen) + 1;
       pull->mlen = 1;
    }
 
index 3c6ee0a7a03ba6e4ff611ac996bfb84ac42a5bb7..38de98fab869406612aee7b2ae0174bee92d3ba9 100644 (file)
 
 #include "util/ralloc.h"
 
-/**
- * Decide which set of clip planes should be used when clipping via
- * gl_Position or gl_ClipVertex.
- */
-gl_clip_plane *brw_select_clip_planes(struct gl_context *ctx)
-{
-   if (ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX]) {
-      /* There is currently a GLSL vertex shader, so clip according to GLSL
-       * rules, which means compare gl_ClipVertex (or gl_Position, if
-       * gl_ClipVertex wasn't assigned) against the eye-coordinate clip planes
-       * that were stored in EyeUserPlane at the time the clip planes were
-       * specified.
-       */
-      return ctx->Transform.EyeUserPlane;
-   } else {
-      /* Either we are using fixed function or an ARB vertex program.  In
-       * either case the clip planes are going to be compared against
-       * gl_Position (which is in clip coordinates) so we have to clip using
-       * _ClipUserPlane, which was transformed into clip coordinates by Mesa
-       * core.
-       */
-      return ctx->Transform._ClipUserPlane;
-   }
-}
-
 bool
 brw_codegen_vs_prog(struct brw_context *brw,
                     struct gl_shader_program *prog,
@@ -195,10 +170,17 @@ brw_codegen_vs_prog(struct brw_context *brw,
       start_time = get_time();
    }
 
+   if (unlikely(INTEL_DEBUG & DEBUG_VS))
+      brw_dump_ir("vertex", prog, &vs->base, &vp->program.Base);
+
+   int st_index = -1;
+   if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+      st_index = brw_get_shader_time_index(brw, prog, &vp->program.Base, ST_VS);
+
    /* Emit GEN4 code.
     */
    program = brw_vs_emit(brw, mem_ctx, key, &prog_data,
-                         &vp->program, prog, &program_size);
+                         &vp->program, prog, st_index, &program_size);
    if (program == NULL) {
       ralloc_free(mem_ctx);
       return false;
index 96d2435a51568fe46f9cc9e2ff5ac64e0075f64e..f1242f61b33b9414ad6f22e07fc2504c6dbddffd 100644 (file)
@@ -60,6 +60,7 @@ const unsigned *brw_vs_emit(struct brw_context *brw,
                             struct brw_vs_prog_data *prog_data,
                             struct gl_vertex_program *vp,
                             struct gl_shader_program *shader_prog,
+                            int shader_time_index,
                             unsigned *program_size);
 void brw_vs_debug_recompile(struct brw_context *brw,
                             struct gl_shader_program *prog,
@@ -105,16 +106,11 @@ protected:
 
 private:
    int setup_attributes(int payload_reg);
-   void setup_vp_regs();
    void setup_uniform_clipplane_values();
    void emit_clip_distances(dst_reg reg, int offset);
-   dst_reg get_vp_dst_reg(const prog_dst_register &dst);
-   src_reg get_vp_src_reg(const prog_src_register &src);
 
    const struct brw_vs_prog_key *const key;
    struct brw_vs_prog_data * const vs_prog_data;
-   src_reg *vp_temp_regs;
-   src_reg vp_addr_reg;
 
    gl_clip_plane *clip_planes;
 
index 21048885755abd37c63497627df7390078f29464..4d5e7f67bd608b93478ee38875b1bb83b4cc7526 100644 (file)
@@ -43,7 +43,7 @@
  * Return a bitfield where bit n is set if barycentric interpolation mode n
  * (see enum brw_wm_barycentric_interp_mode) is needed by the fragment shader.
  */
-unsigned
+static unsigned
 brw_compute_barycentric_interp_modes(const struct brw_device_info *devinfo,
                                      bool shade_model_flat,
                                      bool persample_shading,
@@ -221,8 +221,17 @@ brw_codegen_wm_prog(struct brw_context *brw,
       start_time = get_time();
    }
 
+   if (unlikely(INTEL_DEBUG & DEBUG_WM))
+      brw_dump_ir("fragment", prog, &fs->base, &fp->program.Base);
+
+   int st_index8 = -1, st_index16 = -1;
+   if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
+      st_index8 = brw_get_shader_time_index(brw, prog, &fp->program.Base, ST_FS8);
+      st_index16 = brw_get_shader_time_index(brw, prog, &fp->program.Base, ST_FS16);
+   }
+
    program = brw_wm_fs_emit(brw, mem_ctx, key, &prog_data,
-                            &fp->program, prog, &program_size);
+                            &fp->program, prog, st_index8, st_index16, &program_size);
    if (program == NULL) {
       ralloc_free(mem_ctx);
       return false;
index 053f2ee62dd927ce515341370e3ffb612c8dc3e0..6ee22b2f907479e2c6aaab82e7e684f4bf5e51f4 100644 (file)
@@ -72,6 +72,8 @@ const unsigned *brw_wm_fs_emit(struct brw_context *brw,
                                struct brw_wm_prog_data *prog_data,
                                struct gl_fragment_program *fp,
                                struct gl_shader_program *prog,
+                               int shader_time_index8,
+                               int shader_time_index16,
                                unsigned *final_assembly_size);
 
 GLboolean brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog);
@@ -89,14 +91,6 @@ void brw_wm_debug_recompile(struct brw_context *brw,
 void
 brw_upload_wm_prog(struct brw_context *brw);
 
-struct nir_shader;
-
-unsigned
-brw_compute_barycentric_interp_modes(const struct brw_device_info *devinfo,
-                                     bool shade_model_flat,
-                                     bool persample_shading,
-                                     struct nir_shader *shader);
-
 #ifdef __cplusplus
 } // extern "C"
 #endif
index cd0b56ba60c1a1a2a1007f9019f02c0cd85acd9c..ec54ef2acd9750357e13d7ce795f66a6b1c47be5 100644 (file)
@@ -31,6 +31,7 @@
 
 
 
+#include "intel_batchbuffer.h"
 #include "intel_fbo.h"
 #include "brw_context.h"
 #include "brw_state.h"
@@ -251,6 +252,16 @@ brw_upload_wm_unit(struct brw_context *brw)
    }
 
    brw->ctx.NewDriverState |= BRW_NEW_GEN4_UNIT_STATE;
+
+   /* _NEW_POLGYON */
+   if (brw->wm.offset_clamp != ctx->Polygon.OffsetClamp) {
+      BEGIN_BATCH(2);
+      OUT_BATCH(_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP << 16 | (2 - 2));
+      OUT_BATCH_F(ctx->Polygon.OffsetClamp);
+      ADVANCE_BATCH();
+
+      brw->wm.offset_clamp = ctx->Polygon.OffsetClamp;
+   }
 }
 
 const struct brw_tracked_state brw_wm_unit = {
index def21d80b2453ae90d03d2ef09030515555d7afb..59a765591033bf91f4bb85f93ebd17f401c436b7 100644 (file)
@@ -273,6 +273,18 @@ gen6_gs_visitor::emit_urb_write_header(int mrf)
    emit(GS_OPCODE_SET_DWORD_2, dst_reg(MRF, mrf), flags_data);
 }
 
+static int
+align_interleaved_urb_mlen(int mlen)
+{
+   /* URB data written (does not include the message header reg) must
+    * be a multiple of 256 bits, or 2 VS registers.  See vol5c.5,
+    * section 5.4.3.2.2: URB_INTERLEAVED.
+    */
+   if ((mlen % 2) != 1)
+      mlen++;
+   return mlen;
+}
+
 void
 gen6_gs_visitor::emit_urb_write_opcode(bool complete, int base_mrf,
                                        int last_mrf, int urb_offset)
@@ -299,14 +311,7 @@ gen6_gs_visitor::emit_urb_write_opcode(bool complete, int base_mrf,
    }
 
    inst->base_mrf = base_mrf;
-   /* URB data written (does not include the message header reg) must
-    * be a multiple of 256 bits, or 2 VS registers.  See vol5c.5,
-    * section 5.4.3.2.2: URB_INTERLEAVED.
-    */
-   int mlen = last_mrf - base_mrf;
-   if ((mlen % 2) != 1)
-      mlen++;
-   inst->mlen = mlen;
+   inst->mlen = align_interleaved_urb_mlen(last_mrf - base_mrf);
    inst->offset = urb_offset;
 }
 
@@ -339,9 +344,9 @@ gen6_gs_visitor::emit_thread_end()
 
    /* In the process of generating our URB write message contents, we
     * may need to unspill a register or load from an array.  Those
-    * reads would use MRFs 14-15.
+    * reads would use MRFs 21..23
     */
-   int max_usable_mrf = 13;
+   int max_usable_mrf = FIRST_SPILL_MRF(devinfo->gen);
 
    /* Issue the FF_SYNC message and obtain the initial VUE handle. */
    emit(CMP(dst_null_d(), this->vertex_count, 0u, BRW_CONDITIONAL_G));
@@ -416,9 +421,10 @@ gen6_gs_visitor::emit_thread_end()
                         this->vertex_output_offset, 1u));
 
                /* If this was max_usable_mrf, we can't fit anything more into
-                * this URB WRITE.
+                * this URB WRITE. Same if we reached the max. message length.
                 */
-               if (mrf > max_usable_mrf) {
+               if (mrf > max_usable_mrf ||
+                   align_interleaved_urb_mlen(mrf - base_mrf + 1) > BRW_MAX_MSG_LENGTH) {
                   slot++;
                   break;
                }
index 41c6d183acd74324cb716e50d5c9b72ef370690b..e75d6aa10b8b0f28c9cbdd7edae8ed53320934c8 100644 (file)
@@ -43,8 +43,11 @@ public:
                    void *mem_ctx,
                    bool no_spills,
                    int shader_time_index) :
-      vec4_gs_visitor(comp, log_data, c, prog, shader, mem_ctx, no_spills,
-                      shader_time_index) {}
+      vec4_gs_visitor(comp, log_data, c, shader, mem_ctx, no_spills,
+                      shader_time_index),
+      shader_prog(prog)
+      {
+      }
 
 protected:
    virtual void emit_prolog();
@@ -64,6 +67,8 @@ private:
    void xfb_setup();
    int get_vertex_output_offset_for_varying(int vertex, int varying);
 
+   const struct gl_shader_program *shader_prog;
+
    src_reg vertex_output;
    src_reg vertex_output_offset;
    src_reg temp;
index 5edc4fc98423b08fae80cad6f34f9c7d021a81f1..6aeb0cb243fec4f89b55aa8c30cdcb052320896c 100644 (file)
@@ -70,10 +70,8 @@ brw_upload_cs_state(struct brw_context *brw)
 
    unsigned local_id_dwords = 0;
 
-   if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
-      local_id_dwords =
-         brw_cs_prog_local_id_payload_dwords(cs_prog_data->simd_size);
-   }
+   if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID)
+      local_id_dwords = cs_prog_data->local_invocation_id_regs * 8;
 
    unsigned push_constant_data_size =
       (prog_data->nr_params + local_id_dwords) * sizeof(gl_constant_value);
@@ -190,63 +188,6 @@ const struct brw_tracked_state brw_cs_state = {
 };
 
 
-/**
- * We are building the local ID push constant data using the simplest possible
- * method. We simply push the local IDs directly as they should appear in the
- * registers for the uvec3 gl_LocalInvocationID variable.
- *
- * Therefore, for SIMD8, we use 3 full registers, and for SIMD16 we use 6
- * registers worth of push constant space.
- *
- * Note: Any updates to brw_cs_prog_local_id_payload_dwords,
- * fill_local_id_payload or fs_visitor::emit_cs_local_invocation_id_setup need
- * to coordinated.
- *
- * FINISHME: There are a few easy optimizations to consider.
- *
- * 1. If gl_WorkGroupSize x, y or z is 1, we can just use zero, and there is
- *    no need for using push constant space for that dimension.
- *
- * 2. Since GL_MAX_COMPUTE_WORK_GROUP_SIZE is currently 1024 or less, we can
- *    easily use 16-bit words rather than 32-bit dwords in the push constant
- *    data.
- *
- * 3. If gl_WorkGroupSize x, y or z is small, then we can use bytes for
- *    conveying the data, and thereby reduce push constant usage.
- *
- */
-unsigned
-brw_cs_prog_local_id_payload_dwords(unsigned dispatch_width)
-{
-   return 3 * dispatch_width;
-}
-
-
-static void
-fill_local_id_payload(const struct brw_cs_prog_data *cs_prog_data,
-                      void *buffer, unsigned *x, unsigned *y, unsigned *z)
-{
-   uint32_t *param = (uint32_t *)buffer;
-   for (unsigned i = 0; i < cs_prog_data->simd_size; i++) {
-      param[0 * cs_prog_data->simd_size + i] = *x;
-      param[1 * cs_prog_data->simd_size + i] = *y;
-      param[2 * cs_prog_data->simd_size + i] = *z;
-
-      (*x)++;
-      if (*x == cs_prog_data->local_size[0]) {
-         *x = 0;
-         (*y)++;
-         if (*y == cs_prog_data->local_size[1]) {
-            *y = 0;
-            (*z)++;
-            if (*z == cs_prog_data->local_size[2])
-               *z = 0;
-         }
-      }
-   }
-}
-
-
 /**
  * Creates a region containing the push constants for the CS on gen7+.
  *
@@ -269,10 +210,8 @@ brw_upload_cs_push_constants(struct brw_context *brw,
       (struct brw_stage_prog_data*) cs_prog_data;
    unsigned local_id_dwords = 0;
 
-   if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
-      local_id_dwords =
-         brw_cs_prog_local_id_payload_dwords(cs_prog_data->simd_size);
-   }
+   if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID)
+      local_id_dwords = cs_prog_data->local_invocation_id_regs * 8;
 
    /* Updates the ParamaterValues[i] pointers for all parameters of the
     * basic type of PROGRAM_STATE_VAR.
@@ -302,14 +241,13 @@ brw_upload_cs_push_constants(struct brw_context *brw,
 
       STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));
 
+      brw_cs_fill_local_id_payload(cs_prog_data, param, threads,
+                                   reg_aligned_constant_size);
+
       /* _NEW_PROGRAM_CONSTANTS */
-      unsigned x = 0, y = 0, z = 0;
       for (t = 0; t < threads; t++) {
-         gl_constant_value *next_param = &param[t * param_aligned_count];
-         if (local_id_dwords > 0) {
-            fill_local_id_payload(cs_prog_data, (void*)next_param, &x, &y, &z);
-            next_param += local_id_dwords;
-         }
+         gl_constant_value *next_param =
+            &param[t * param_aligned_count + local_id_dwords];
          for (i = 0; i < prog_data->nr_params; i++) {
             next_param[i] = *prog_data->param[i];
          }
index 46fccc8d6ced630baa54ec8332a00e7c1703de94..bd204aa3ce8d5aa9bfaba42b29ae037ed090b046 100644 (file)
@@ -27,7 +27,6 @@
 #include "main/blit.h"
 #include "main/context.h"
 #include "main/enums.h"
-#include "main/colormac.h"
 #include "main/fbobject.h"
 
 #include "brw_context.h"
index c98e19382c36daef4ac9f0083bc606a0d1f6943b..fd522cc4f4d47ba35ffc042aedfcdb4e80e47d37 100644 (file)
 #include "main/framebuffer.h"
 #include "main/renderbuffer.h"
 
-
-bool
-brw_is_front_buffer_reading(struct gl_framebuffer *fb)
-{
-   if (!fb || _mesa_is_user_fbo(fb))
-      return false;
-
-   return fb->_ColorReadBufferIndex == BUFFER_FRONT_LEFT;
-}
-
-bool
-brw_is_front_buffer_drawing(struct gl_framebuffer *fb)
-{
-   if (!fb || _mesa_is_user_fbo(fb))
-      return false;
-
-   return (fb->_NumColorDrawBuffers >= 1 &&
-           fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT);
-}
-
 static void
 intelDrawBuffer(struct gl_context * ctx, GLenum mode)
 {
-   if (brw_is_front_buffer_drawing(ctx->DrawBuffer)) {
+   if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer)) {
       struct brw_context *const brw = brw_context(ctx);
 
       /* If we might be front-buffer rendering on this buffer for the first
@@ -71,7 +51,7 @@ intelDrawBuffer(struct gl_context * ctx, GLenum mode)
 static void
 intelReadBuffer(struct gl_context * ctx, GLenum mode)
 {
-   if (brw_is_front_buffer_reading(ctx->ReadBuffer)) {
+   if (_mesa_is_front_buffer_reading(ctx->ReadBuffer)) {
       struct brw_context *const brw = brw_context(ctx);
 
       /* If we might be front-buffer reading on this buffer for the first
index 85f54b2c653092fab55809bfdef65c92833f11e5..0e0d9c31f2b36807513773df90acbe0a520f3d5e 100644 (file)
 #include "drm.h"
 #include "brw_context.h"
 
-struct intel_framebuffer;
-
 extern void intelInitBufferFuncs(struct dd_function_table *functions);
 
-bool brw_is_front_buffer_reading(struct gl_framebuffer *fb);
-bool brw_is_front_buffer_drawing(struct gl_framebuffer *fb);
-
 #endif /* INTEL_BUFFERS_H */
index 5a9c9533fde7242c3d681a909e7b3fda47230488..f7c02c8a38de83348aee7438764b953a821612de 100644 (file)
 #include "intel_debug.h"
 #include "utils.h"
 #include "util/u_atomic.h" /* for p_atomic_cmpxchg */
+#include "util/debug.h"
 
 uint64_t INTEL_DEBUG = 0;
 
-static const struct dri_debug_control debug_control[] = {
+static const struct debug_control debug_control[] = {
    { "tex",         DEBUG_TEXTURE},
    { "state",       DEBUG_STATE},
    { "blit",        DEBUG_BLIT},
@@ -91,22 +92,10 @@ intel_debug_flag_for_shader_stage(gl_shader_stage stage)
 }
 
 void
-brw_process_intel_debug_variable(struct intel_screen *screen)
+brw_process_intel_debug_variable(void)
 {
-   uint64_t intel_debug = driParseDebugString(getenv("INTEL_DEBUG"), debug_control);
+   uint64_t intel_debug = parse_debug_string(getenv("INTEL_DEBUG"), debug_control);
    (void) p_atomic_cmpxchg(&INTEL_DEBUG, 0, intel_debug);
-
-   if (INTEL_DEBUG & DEBUG_BUFMGR)
-      dri_bufmgr_set_debug(screen->bufmgr, true);
-
-   if ((INTEL_DEBUG & DEBUG_SHADER_TIME) && screen->devinfo->gen < 7) {
-      fprintf(stderr,
-              "shader_time debugging requires gen7 (Ivybridge) or better.\n");
-      INTEL_DEBUG &= ~DEBUG_SHADER_TIME;
-   }
-
-   if (INTEL_DEBUG & DEBUG_AUB)
-      drm_intel_bufmgr_gem_set_aub_dump(screen->bufmgr, true);
 }
 
 /**
index b7d0c823fa86716a06bd6f6513831f89534c067d..0a6e1b90b9874f90dfc4ecc57e1f0b37c6422ee3 100644 (file)
@@ -115,8 +115,6 @@ extern uint64_t INTEL_DEBUG;
 
 extern uint64_t intel_debug_flag_for_shader_stage(gl_shader_stage stage);
 
-struct intel_screen;
-
-extern void brw_process_intel_debug_variable(struct intel_screen *);
+extern void brw_process_intel_debug_variable(void);
 
 extern bool brw_env_var_as_boolean(const char *var_name, bool default_value);
index 3c77f4773c63f77766115d20384159508d021360..3f9afd16c716e0235d30f5a0d43d8b6929cd1a5e 100644 (file)
@@ -229,6 +229,7 @@ intelInitExtensions(struct gl_context *ctx)
    ctx->Extensions.EXT_packed_float = true;
    ctx->Extensions.EXT_pixel_buffer_object = true;
    ctx->Extensions.EXT_point_parameters = true;
+   ctx->Extensions.EXT_polygon_offset_clamp = true;
    ctx->Extensions.EXT_provoking_vertex = true;
    ctx->Extensions.EXT_stencil_two_side = true;
    ctx->Extensions.EXT_texture_array = true;
@@ -300,7 +301,6 @@ intelInitExtensions(struct gl_context *ctx)
       ctx->Extensions.AMD_vertex_shader_layer = true;
       ctx->Extensions.EXT_framebuffer_multisample = true;
       ctx->Extensions.EXT_framebuffer_multisample_blit_scaled = true;
-      ctx->Extensions.EXT_polygon_offset_clamp = true;
       ctx->Extensions.EXT_transform_feedback = true;
       ctx->Extensions.OES_depth_texture_cube_map = true;
 
index 6b2349e8b69a18fd930f0db09ad6a0d3f37c1544..5a6b0dd1ec586c0e573b45e1b66e8c28da9bc050 100644 (file)
 
 #define FILE_DEBUG_FLAG DEBUG_FBO
 
-/**
- * Create a new framebuffer object.
- */
-static struct gl_framebuffer *
-intel_new_framebuffer(struct gl_context * ctx, GLuint name)
-{
-   /* Only drawable state in intel_framebuffer at this time, just use Mesa's
-    * class
-    */
-   return _mesa_new_framebuffer(ctx, name);
-}
-
-
 /** Called by gl_renderbuffer::Delete() */
 static void
 intel_delete_renderbuffer(struct gl_context *ctx, struct gl_renderbuffer *rb)
@@ -1093,7 +1080,6 @@ void
 intel_fbo_init(struct brw_context *brw)
 {
    struct dd_function_table *dd = &brw->ctx.Driver;
-   dd->NewFramebuffer = intel_new_framebuffer;
    dd->NewRenderbuffer = intel_new_renderbuffer;
    dd->MapRenderbuffer = intel_map_renderbuffer;
    dd->UnmapRenderbuffer = intel_unmap_renderbuffer;
index ffc356c9240ea55df9e962f6c1ed7806762f76cb..a169c41790e82f1389db619dbda1e9bb483a60eb 100644 (file)
@@ -160,7 +160,7 @@ intel_get_non_msrt_mcs_alignment(struct intel_mipmap_tree *mt,
    }
 }
 
-bool
+static bool
 intel_tiling_supports_non_msrt_mcs(struct brw_context *brw, unsigned tiling)
 {
    /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
@@ -193,9 +193,9 @@ intel_tiling_supports_non_msrt_mcs(struct brw_context *brw, unsigned tiling)
  *     - MCS buffer for non-MSRT is supported only for RT formats 32bpp,
  *       64bpp, and 128bpp.
  */
-bool
-intel_miptree_is_fast_clear_capable(struct brw_context *brw,
-                                    struct intel_mipmap_tree *mt)
+static bool
+intel_miptree_supports_non_msrt_fast_clear(struct brw_context *brw,
+                                           struct intel_mipmap_tree *mt)
 {
    /* MCS support does not exist prior to Gen7 */
    if (brw->gen < 7)
@@ -204,6 +204,10 @@ intel_miptree_is_fast_clear_capable(struct brw_context *brw,
    if (mt->disable_aux_buffers)
       return false;
 
+   /* This function applies only to non-multisampled render targets. */
+   if (mt->num_samples > 1)
+      return false;
+
    /* MCS is only supported for color buffers */
    switch (_mesa_get_format_base_format(mt->format)) {
    case GL_DEPTH_COMPONENT:
@@ -222,7 +226,16 @@ intel_miptree_is_fast_clear_capable(struct brw_context *brw,
 
       return false;
    }
+
+   /* Check for layered surfaces. */
    if (mt->physical_depth0 != 1) {
+       /* Multisample surfaces with the CMS layout are not layered surfaces,
+        * yet still have physical_depth0 > 1. Assert that we don't
+        * accidentally reject a multisampled surface here. We should have
+        * rejected it earlier by explicitly checking the sample count.
+        */
+      assert(mt->num_samples <= 1);
+
       if (brw->gen >= 8) {
          perf_debug("Layered fast clear - giving up. (%dx%d%d)\n",
                     mt->logical_width0, mt->logical_height0,
@@ -494,7 +507,7 @@ intel_miptree_create_layout(struct brw_context *brw,
     *  7   |      ?         |        ?
     *  6   |      ?         |        ?
     */
-   if (intel_miptree_is_fast_clear_capable(brw, mt)) {
+   if (intel_miptree_supports_non_msrt_fast_clear(brw, mt)) {
       if (brw->gen >= 9 || (brw->gen == 8 && num_samples <= 1))
          layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16;
    } else if (brw->gen >= 9 && num_samples > 1) {
@@ -692,7 +705,7 @@ intel_miptree_create(struct brw_context *brw,
     * clear actually occurs.
     */
    if (intel_tiling_supports_non_msrt_mcs(brw, mt->tiling) &&
-       intel_miptree_is_fast_clear_capable(brw, mt)) {
+       intel_miptree_supports_non_msrt_fast_clear(brw, mt)) {
       mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
       assert(brw->gen < 8 || mt->halign == 16 || num_samples <= 1);
    }
@@ -800,8 +813,9 @@ intel_update_winsys_renderbuffer_miptree(struct brw_context *intel,
     * clear actually occurs.
     */
    if (intel_tiling_supports_non_msrt_mcs(intel, singlesample_mt->tiling) &&
-       intel_miptree_is_fast_clear_capable(intel, singlesample_mt))
+       intel_miptree_supports_non_msrt_fast_clear(intel, singlesample_mt)) {
       singlesample_mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
+   }
 
    if (num_samples == 0) {
       intel_miptree_release(&irb->mt);
index 486e5c6f43b7b3accde8e753a0aa56b5a7254e3f..805cd714d882e64511478e9d877c8da682d459ec 100644 (file)
@@ -658,11 +658,7 @@ struct intel_mipmap_tree
 void
 intel_get_non_msrt_mcs_alignment(struct intel_mipmap_tree *mt,
                                  unsigned *width_px, unsigned *height);
-bool
-intel_tiling_supports_non_msrt_mcs(struct brw_context *brw, unsigned tiling);
-bool
-intel_miptree_is_fast_clear_capable(struct brw_context *brw,
-                                    struct intel_mipmap_tree *mt);
+
 bool
 intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw,
                                  struct intel_mipmap_tree *mt);
index 30d3a521ec84fafd5e12c3c7a3af7db67f594cdb..d4f86fdffe07c6e6600566f459964c44afc17f43 100644 (file)
@@ -128,7 +128,6 @@ intel_check_blit_fragment_ops(struct gl_context * ctx, bool src_alpha_is_one)
 void
 intelInitPixelFuncs(struct dd_function_table *functions)
 {
-   functions->Accum = _mesa_accum;
    functions->Bitmap = intelBitmap;
    functions->CopyPixels = intelCopyPixels;
    functions->DrawPixels = intelDrawPixels;
index 17838350cdaf3ed030538b659233e4412f8edbff..590c45d93eae82634e27bb9816198b744a9a6724 100644 (file)
@@ -1421,7 +1421,19 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp)
    if (!intelScreen->devinfo)
       return false;
 
-   brw_process_intel_debug_variable(intelScreen);
+   brw_process_intel_debug_variable();
+
+   if (INTEL_DEBUG & DEBUG_BUFMGR)
+      dri_bufmgr_set_debug(intelScreen->bufmgr, true);
+
+   if ((INTEL_DEBUG & DEBUG_SHADER_TIME) && intelScreen->devinfo->gen < 7) {
+      fprintf(stderr,
+              "shader_time debugging requires gen7 (Ivybridge) or better.\n");
+      INTEL_DEBUG &= ~DEBUG_SHADER_TIME;
+   }
+
+   if (INTEL_DEBUG & DEBUG_AUB)
+      drm_intel_bufmgr_gem_set_aub_dump(intelScreen->bufmgr, true);
 
    intelScreen->hw_must_use_separate_stencil = intelScreen->devinfo->gen >= 7;
 
index 498cab49ec48bf4ce90af3e3fcd7d9a141db9553..2f5c901fdf81ce0213df99eafef33eac09bf772f 100644 (file)
@@ -27,7 +27,6 @@
 #include "main/context.h"
 #include "main/macros.h"
 #include "main/enums.h"
-#include "main/colormac.h"
 #include "main/dd.h"
 
 #include "intel_screen.h"
index c02a4f399eec34afc431a2435069dc39659d38c3..7c6f48008a1ff3d39eca2ebc9cf55bd7b8de0933 100644 (file)
@@ -42,7 +42,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 #include "main/macros.h"
 #include "main/mtypes.h"
-#include "main/colormac.h"
 #include "r200_reg.h"
 #include "r200_vertprog.h"
 
index 3cfc03d10ccc30a48cf2d9085f42393cbb72fae7..9b16cf84cf5495d798107232f9582cc171a6175e 100644 (file)
@@ -34,7 +34,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 #include "main/glheader.h"
 #include "main/mtypes.h"
-#include "main/colormac.h"
 #include "main/imports.h"
 #include "main/macros.h"
 
index cca176d7f9b168dc934ab19f90a107c6e06ab0f8..b4acf985ee8db81f9903e6467248014a253ebd37 100644 (file)
@@ -37,7 +37,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "main/imports.h"
 #include "main/api_arrayelt.h"
 #include "main/enums.h"
-#include "main/colormac.h"
 #include "main/light.h"
 #include "main/framebuffer.h"
 #include "main/fbobject.h"
@@ -2389,7 +2388,6 @@ void r200InitStateFuncs( radeonContextPtr radeon, struct dd_function_table *func
    functions->Enable                   = r200Enable;
    functions->Fogfv                    = r200Fogfv;
    functions->FrontFace                        = r200FrontFace;
-   functions->Hint                     = NULL;
    functions->LightModelfv             = r200LightModelfv;
    functions->Lightfv                  = r200Lightfv;
    functions->LineStipple              = r200LineStipple;
index ad64f788b9fc206df2042f0753fc335dbc6a7525..8cffa92c10b8d2aa8c1cf6f68341aa0d5fe10da8 100644 (file)
@@ -34,7 +34,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "main/glheader.h"
 #include "main/imports.h"
 #include "main/enums.h"
-#include "main/colormac.h"
 #include "main/api_arrayelt.h"
 
 #include "swrast/swrast.h"
index bb9be2105674cc14d6f473c3492a9ea32e4906fa..72f09ae40566705b3c59944bda30c4cbd4b3565d 100644 (file)
@@ -34,7 +34,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 #include "main/glheader.h"
 #include "main/mtypes.h"
-#include "main/colormac.h"
 #include "main/enums.h"
 #include "main/image.h"
 #include "main/imports.h"
index 747275334b6795f58844bf64abb0307a7efa058d..c042aae0ef31038cd28519d9c92277a315a08cb2 100644 (file)
@@ -36,7 +36,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "main/imports.h"
 #include "main/mtypes.h"
 #include "main/enums.h"
-#include "main/colormac.h"
 #include "main/light.h"
 #include "main/state.h"
 
index feee0b2ba3f009577ff4b4ae751266275b26de69..ca921100c125503bb9989c8bb62c9b87c894a2f1 100644 (file)
@@ -33,7 +33,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 #include "main/glheader.h"
 #include "main/imports.h"
-#include "main/colormac.h"
 #include "main/context.h"
 #include "main/enums.h"
 #include "main/image.h"
index a9e2ab563d3288598695a94290a20822bcd5f0de..5e15b46fb32b1de22318b1bb2876e1566c0971fe 100644 (file)
@@ -341,8 +341,8 @@ r100CreateContext( gl_api api,
 
 
 #if DO_DEBUG
-   RADEON_DEBUG = driParseDebugString( getenv( "RADEON_DEBUG" ),
-                                      debug_control );
+   RADEON_DEBUG = parse_debug_string( getenv( "RADEON_DEBUG" ),
+                                      debug_control );
 #endif
 
    tcl_mode = driQueryOptioni(&rmesa->radeon.optionCache, "tcl_mode");
index badabd9508c356f0fcccef2fea256c7c0b86df21..88a295386ca3eea4d1d07f74ed6da3af50fe9708 100644 (file)
@@ -49,7 +49,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "radeon_drm.h"
 #include "main/macros.h"
 #include "main/mtypes.h"
-#include "main/colormac.h"
 #include "radeon_screen.h"
 
 #include "radeon_common.h"
index 7ddba1ae85f172cf2172947a6d82ddad8b456a93..383a5df674985603759eefa92c98b9ad5a65017b 100644 (file)
@@ -27,7 +27,7 @@
  *      Pauli Nieminen <suokkos@gmail.com>
  */
 
-#include "utils.h"
+#include "util/debug.h"
 
 #include "radeon_common_context.h"
 #include "radeon_debug.h"
@@ -35,7 +35,7 @@
 #include <stdarg.h>
 #include <stdio.h>
 
-static const struct dri_debug_control debug_control[] = {
+static const struct debug_control debug_control[] = {
        {"fall", RADEON_FALLBACKS},
        {"tex", RADEON_TEXTURE},
        {"ioctl", RADEON_IOCTL},
@@ -61,7 +61,7 @@ radeon_debug_type_t radeon_enabled_debug_types;
 
 void radeon_init_debug(void)
 {
-       radeon_enabled_debug_types = driParseDebugString(getenv("RADEON_DEBUG"), debug_control);
+       radeon_enabled_debug_types = parse_debug_string(getenv("RADEON_DEBUG"), debug_control);
 
        radeon_enabled_debug_types |= RADEON_GENERAL;
 }
index 5eece518c95734bb95ed60cf5a64139add6b78d1..4d75d149b274631bb626ef1686a0941fb0c7079f 100644 (file)
                 printf(__VA_ARGS__);                      \
 } while(0)
 
-static struct gl_framebuffer *
-radeon_new_framebuffer(struct gl_context *ctx, GLuint name)
-{
-  return _mesa_new_framebuffer(ctx, name);
-}
-
 static void
 radeon_delete_renderbuffer(struct gl_context *ctx, struct gl_renderbuffer *rb)
 {
@@ -868,7 +862,6 @@ radeon_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb)
 
 void radeon_fbo_init(struct radeon_context *radeon)
 {
-  radeon->glCtx.Driver.NewFramebuffer = radeon_new_framebuffer;
   radeon->glCtx.Driver.NewRenderbuffer = radeon_new_renderbuffer;
   radeon->glCtx.Driver.MapRenderbuffer = radeon_map_renderbuffer;
   radeon->glCtx.Driver.UnmapRenderbuffer = radeon_unmap_renderbuffer;
index 74c1fc6c902775c36371a7c19ff03aff60dfc092..8a1b81d8f3214dacfee588de57e7bda22f73e61f 100644 (file)
@@ -2148,7 +2148,6 @@ void radeonInitStateFuncs( struct gl_context *ctx )
    ctx->Driver.Enable                  = radeonEnable;
    ctx->Driver.Fogfv                   = radeonFogfv;
    ctx->Driver.FrontFace               = radeonFrontFace;
-   ctx->Driver.Hint                    = NULL;
    ctx->Driver.LightModelfv            = radeonLightModelfv;
    ctx->Driver.Lightfv                 = radeonLightfv;
    ctx->Driver.LineStipple              = radeonLineStipple;
index b671a3be143ce3da5132d2b8acbe523bae41b207..1e19cf7c7c0235dd204c2d1f62516f689f2da150 100644 (file)
@@ -34,7 +34,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 #include "main/glheader.h"
 #include "main/mtypes.h"
-#include "main/colormac.h"
 #include "main/enums.h"
 #include "main/imports.h"
 #include "main/macros.h"
@@ -359,16 +358,16 @@ void r100_swtcl_flush(struct gl_context *ctx, uint32_t current_offset)
 #define HAVE_ELTS        0
 
 static const GLuint hw_prim[GL_POLYGON+1] = {
-   RADEON_CP_VC_CNTL_PRIM_TYPE_POINT,
-   RADEON_CP_VC_CNTL_PRIM_TYPE_LINE,
-   0,
-   RADEON_CP_VC_CNTL_PRIM_TYPE_LINE_STRIP,
-   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
-   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_STRIP,
-   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN,
-   0,
-   0,
-   0
+   [GL_POINTS] = RADEON_CP_VC_CNTL_PRIM_TYPE_POINT,
+   [GL_LINES] = RADEON_CP_VC_CNTL_PRIM_TYPE_LINE,
+   [GL_LINE_LOOP] = 0,
+   [GL_LINE_STRIP] = RADEON_CP_VC_CNTL_PRIM_TYPE_LINE_STRIP,
+   [GL_TRIANGLES] = RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
+   [GL_TRIANGLE_STRIP] = RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_STRIP,
+   [GL_TRIANGLE_FAN] = RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN,
+   [GL_QUADS] = 0,
+   [GL_QUAD_STRIP] = 0,
+   [GL_POLYGON] = 0
 };
 
 static inline void
@@ -468,16 +467,16 @@ const struct tnl_pipeline_stage _radeon_render_stage =
 
 
 static const GLuint reduced_hw_prim[GL_POLYGON+1] = {
-   RADEON_CP_VC_CNTL_PRIM_TYPE_POINT,
-   RADEON_CP_VC_CNTL_PRIM_TYPE_LINE,
-   RADEON_CP_VC_CNTL_PRIM_TYPE_LINE,
-   RADEON_CP_VC_CNTL_PRIM_TYPE_LINE,
-   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
-   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
-   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
-   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
-   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
-   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST
+   [GL_POINTS] = RADEON_CP_VC_CNTL_PRIM_TYPE_POINT,
+   [GL_LINES] = RADEON_CP_VC_CNTL_PRIM_TYPE_LINE,
+   [GL_LINE_LOOP] = RADEON_CP_VC_CNTL_PRIM_TYPE_LINE,
+   [GL_LINE_STRIP] = RADEON_CP_VC_CNTL_PRIM_TYPE_LINE,
+   [GL_TRIANGLES] = RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
+   [GL_TRIANGLE_STRIP] = RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
+   [GL_TRIANGLE_FAN] = RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
+   [GL_QUADS] = RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
+   [GL_QUAD_STRIP] = RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
+   [GL_POLYGON] = RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST
 };
 
 static void radeonRasterPrimitive( struct gl_context *ctx, GLuint hwprim );
index 0955a135de8728c5e9e638e94d96998e97af522b..d1aa1a187376b7174d084aa38d4ceaeaa14b7e4c 100644 (file)
@@ -33,7 +33,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 #include "main/glheader.h"
 #include "main/imports.h"
-#include "main/colormac.h"
 #include "main/context.h"
 #include "main/enums.h"
 #include "main/image.h"
index ec835f248eb09223b1086c05d6a93f9651ef47ed..35b1538d9e12bf177cc85a277c5c53f8e32e529a 100644 (file)
@@ -35,7 +35,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 #include "main/glheader.h"
 #include "main/imports.h"
-#include "main/colormac.h"
 #include "main/context.h"
 #include "main/macros.h"
 #include "main/teximage.h"
index 28851434133afc18dfe3f466e143a04c6e911c5c..061e557a3971b0e4ebdbc78ac3a700ae113129d7 100644 (file)
@@ -151,7 +151,6 @@ unbind_array_object_vbos(struct gl_context *ctx, struct gl_vertex_array_object *
  * Allocate and initialize a new vertex array object.
  *
  * This function is intended to be called via
- * \c dd_function_table::NewArrayObject.
  */
 struct gl_vertex_array_object *
 _mesa_new_vao(struct gl_context *ctx, GLuint name)
@@ -203,10 +202,8 @@ _mesa_reference_vao_(struct gl_context *ctx,
       deleteFlag = (oldObj->RefCount == 0);
       mtx_unlock(&oldObj->Mutex);
 
-      if (deleteFlag) {
-        assert(ctx->Driver.DeleteArrayObject);
-         ctx->Driver.DeleteArrayObject(ctx, oldObj);
-      }
+      if (deleteFlag)
+         _mesa_delete_vao(ctx, oldObj);
 
       *ptr = NULL;
    }
@@ -408,7 +405,7 @@ bind_vertex_array(struct gl_context *ctx, GLuint id, GLboolean genRequired)
          }
 
          /* For APPLE version, generate a new array object now */
-        newObj = (*ctx->Driver.NewArrayObject)(ctx, id);
+        newObj = _mesa_new_vao(ctx, id);
          if (!newObj) {
             _mesa_error(ctx, GL_OUT_OF_MEMORY, "glBindVertexArrayAPPLE");
             return;
@@ -446,10 +443,6 @@ bind_vertex_array(struct gl_context *ctx, GLuint id, GLboolean genRequired)
 
    ctx->NewState |= _NEW_ARRAY;
    _mesa_reference_vao(ctx, &ctx->Array.VAO, newObj);
-
-   /* Pass BindVertexArray call to device driver */
-   if (ctx->Driver.BindArrayObject && newObj)
-      ctx->Driver.BindArrayObject(ctx, newObj);
 }
 
 
@@ -565,7 +558,7 @@ gen_vertex_arrays(struct gl_context *ctx, GLsizei n, GLuint *arrays,
       struct gl_vertex_array_object *obj;
       GLuint name = first + i;
 
-      obj = (*ctx->Driver.NewArrayObject)( ctx, name );
+      obj = _mesa_new_vao(ctx, name);
       if (!obj) {
          _mesa_error(ctx, GL_OUT_OF_MEMORY, "%s", func);
          return;
index 4fc32962425d17df7b80a6bbc8abf9baf8811857..dee5e29d5b83e4b4bc9f074d50a495abca442be1 100644 (file)
@@ -303,11 +303,6 @@ _mesa_BlendFuncSeparateiARB(GLuint buf, GLenum sfactorRGB, GLenum dfactorRGB,
    ctx->Color.Blend[buf].DstA = dfactorA;
    update_uses_dual_src(ctx, buf);
    ctx->Color._BlendFuncPerBuffer = GL_TRUE;
-
-   if (ctx->Driver.BlendFuncSeparatei) {
-      ctx->Driver.BlendFuncSeparatei(ctx, buf, sfactorRGB, dfactorRGB,
-                                     sfactorA, dfactorA);
-   }
 }
 
 
@@ -406,9 +401,6 @@ _mesa_BlendEquationiARB(GLuint buf, GLenum mode)
    ctx->Color.Blend[buf].EquationRGB = mode;
    ctx->Color.Blend[buf].EquationA = mode;
    ctx->Color._BlendEquationPerBuffer = GL_TRUE;
-
-   if (ctx->Driver.BlendEquationSeparatei)
-      ctx->Driver.BlendEquationSeparatei(ctx, buf, mode, mode);
 }
 
 
@@ -503,9 +495,6 @@ _mesa_BlendEquationSeparateiARB(GLuint buf, GLenum modeRGB, GLenum modeA)
    ctx->Color.Blend[buf].EquationRGB = modeRGB;
    ctx->Color.Blend[buf].EquationA = modeA;
    ctx->Color._BlendEquationPerBuffer = GL_TRUE;
-
-   if (ctx->Driver.BlendEquationSeparatei)
-      ctx->Driver.BlendEquationSeparatei(ctx, buf, modeRGB, modeA);
 }
 
 
@@ -745,9 +734,6 @@ _mesa_ColorMaski( GLuint buf, GLboolean red, GLboolean green,
 
    FLUSH_VERTICES(ctx, _NEW_COLOR);
    COPY_4UBV(ctx->Color.ColorMask[buf], tmp);
-
-   if (ctx->Driver.ColorMaskIndexed)
-      ctx->Driver.ColorMaskIndexed(ctx, buf, red, green, blue, alpha);
 }
 
 
index 0f7529ad9754bc2adb7bd5784a75b68475474d98..1e7a12c8a840ee74904b048bfd580005be2e8de8 100644 (file)
@@ -51,6 +51,7 @@
 
 #include "imports.h"
 #include "mtypes.h"
+#include "vbo/vbo.h"
 
 
 #ifdef __cplusplus
@@ -227,7 +228,7 @@ do {                                                                \
    if (MESA_VERBOSE & VERBOSE_STATE)                           \
       _mesa_debug(ctx, "FLUSH_VERTICES in %s\n", MESA_FUNCTION);\
    if (ctx->Driver.NeedFlush & FLUSH_STORED_VERTICES)          \
-      ctx->Driver.FlushVertices(ctx, FLUSH_STORED_VERTICES);   \
+      vbo_exec_FlushVertices(ctx, FLUSH_STORED_VERTICES);      \
    ctx->NewState |= newstate;                                  \
 } while (0)
 
@@ -246,7 +247,7 @@ do {                                                                \
    if (MESA_VERBOSE & VERBOSE_STATE)                           \
       _mesa_debug(ctx, "FLUSH_CURRENT in %s\n", MESA_FUNCTION);        \
    if (ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT)           \
-      ctx->Driver.FlushVertices(ctx, FLUSH_UPDATE_CURRENT);    \
+      vbo_exec_FlushVertices(ctx, FLUSH_UPDATE_CURRENT);       \
    ctx->NewState |= newstate;                                  \
 } while (0)
 
index 88f37273e1e5ed576e3d1750feb8e1083f21f54d..496a14f8dad328d0dc23c5e40be207de7761acaa 100644 (file)
@@ -93,13 +93,6 @@ struct dd_function_table {
     */
    void (*UpdateState)( struct gl_context *ctx, GLbitfield new_state );
 
-   /**
-    * Resize the given framebuffer to the given size.
-    * XXX OBSOLETE: this function will be removed in the future.
-    */
-   void (*ResizeBuffers)( struct gl_context *ctx, struct gl_framebuffer *fb,
-                          GLuint width, GLuint height);
-
    /**
     * This is called whenever glFinish() is called.
     */
@@ -117,12 +110,6 @@ struct dd_function_table {
     */
    void (*Clear)( struct gl_context *ctx, GLbitfield buffers );
 
-   /**
-    * Execute glAccum command.
-    */
-   void (*Accum)( struct gl_context *ctx, GLenum op, GLfloat value );
-
-
    /**
     * Execute glRasterPos, updating the ctx->Current.Raster fields
     */
@@ -529,22 +516,15 @@ struct dd_function_table {
    /** Set the blend equation */
    void (*BlendEquationSeparate)(struct gl_context *ctx,
                                  GLenum modeRGB, GLenum modeA);
-   void (*BlendEquationSeparatei)(struct gl_context *ctx, GLuint buffer,
-                                  GLenum modeRGB, GLenum modeA);
    /** Specify pixel arithmetic */
    void (*BlendFuncSeparate)(struct gl_context *ctx,
                              GLenum sfactorRGB, GLenum dfactorRGB,
                              GLenum sfactorA, GLenum dfactorA);
-   void (*BlendFuncSeparatei)(struct gl_context *ctx, GLuint buffer,
-                              GLenum sfactorRGB, GLenum dfactorRGB,
-                              GLenum sfactorA, GLenum dfactorA);
    /** Specify a plane against which all geometry is clipped */
    void (*ClipPlane)(struct gl_context *ctx, GLenum plane, const GLfloat *eq);
    /** Enable and disable writing of frame buffer color components */
    void (*ColorMask)(struct gl_context *ctx, GLboolean rmask, GLboolean gmask,
                      GLboolean bmask, GLboolean amask );
-   void (*ColorMaskIndexed)(struct gl_context *ctx, GLuint buf, GLboolean rmask,
-                            GLboolean gmask, GLboolean bmask, GLboolean amask);
    /** Cause a material color to track the current color */
    void (*ColorMaterial)(struct gl_context *ctx, GLenum face, GLenum mode);
    /** Specify whether front- or back-facing facets can be culled */
@@ -565,8 +545,6 @@ struct dd_function_table {
    void (*Enable)(struct gl_context *ctx, GLenum cap, GLboolean state);
    /** Specify fog parameters */
    void (*Fogfv)(struct gl_context *ctx, GLenum pname, const GLfloat *params);
-   /** Specify implementation-specific hints */
-   void (*Hint)(struct gl_context *ctx, GLenum target, GLenum mode);
    /** Set light source parameters.
     * Note: for GL_POSITION and GL_SPOT_DIRECTION, params will have already
     * been transformed to eye-space.
@@ -769,26 +747,12 @@ struct dd_function_table {
                                 GLint *bytesWritten);
    /*@}*/
 
-
-   /**
-    * \name Vertex Array objects
-    */
-   /*@{*/
-   struct gl_vertex_array_object * (*NewArrayObject)(struct gl_context *ctx, GLuint id);
-   void (*DeleteArrayObject)(struct gl_context *ctx, struct gl_vertex_array_object *);
-   void (*BindArrayObject)(struct gl_context *ctx, struct gl_vertex_array_object *);
-   /*@}*/
-
    /**
     * \name GLSL-related functions (ARB extensions and OpenGL 2.x)
     */
    /*@{*/
    struct gl_shader *(*NewShader)(struct gl_context *ctx,
                                   GLuint name, GLenum type);
-   void (*DeleteShader)(struct gl_context *ctx, struct gl_shader *shader);
-   struct gl_shader_program *(*NewShaderProgram)(GLuint name);
-   void (*DeleteShaderProgram)(struct gl_context *ctx,
-                               struct gl_shader_program *shProg);
    void (*UseProgram)(struct gl_context *ctx, struct gl_shader_program *shProg);
    /*@}*/
 
@@ -825,67 +789,15 @@ struct dd_function_table {
     */
    GLbitfield NeedFlush;
 
-   /** Need to call SaveFlushVertices() upon state change? */
+   /** Need to call vbo_save_SaveFlushVertices() upon state change? */
    GLboolean SaveNeedFlush;
 
-   /* Called prior to any of the GLvertexformat functions being
-    * called.  Paired with Driver.FlushVertices().
-    */
-   void (*BeginVertices)( struct gl_context *ctx );
-
-   /**
-    * If inside glBegin()/glEnd(), it should assert(0).  Otherwise, if
-    * FLUSH_STORED_VERTICES bit in \p flags is set flushes any buffered
-    * vertices, if FLUSH_UPDATE_CURRENT bit is set updates
-    * __struct gl_contextRec::Current and gl_light_attrib::Material
-    *
-    * Note that the default T&L engine never clears the
-    * FLUSH_UPDATE_CURRENT bit, even after performing the update.
-    */
-   void (*FlushVertices)( struct gl_context *ctx, GLuint flags );
-   void (*SaveFlushVertices)( struct gl_context *ctx );
-
-   /**
-    * Give the driver the opportunity to hook in its own vtxfmt for
-    * compiling optimized display lists.  This is called on each valid
-    * glBegin() during list compilation.
-    */
-   GLboolean (*NotifySaveBegin)( struct gl_context *ctx, GLenum mode );
-
    /**
     * Notify driver that the special derived value _NeedEyeCoords has
     * changed.
     */
    void (*LightingSpaceChange)( struct gl_context *ctx );
 
-   /**
-    * Called by glNewList().
-    *
-    * Let the T&L component know what is going on with display lists
-    * in time to make changes to dispatch tables, etc.
-    */
-   void (*NewList)( struct gl_context *ctx, GLuint list, GLenum mode );
-   /**
-    * Called by glEndList().
-    *
-    * \sa dd_function_table::NewList.
-    */
-   void (*EndList)( struct gl_context *ctx );
-
-   /**
-    * Called by glCallList(s).
-    *
-    * Notify the T&L component before and after calling a display list.
-    */
-   void (*BeginCallList)( struct gl_context *ctx, 
-                         struct gl_display_list *dlist );
-   /**
-    * Called by glEndCallList().
-    *
-    * \sa dd_function_table::BeginCallList.
-    */
-   void (*EndCallList)( struct gl_context *ctx );
-
    /**@}*/
 
    /**
@@ -963,8 +875,6 @@ struct dd_function_table {
     */
    struct gl_sampler_object * (*NewSamplerObject)(struct gl_context *ctx,
                                                   GLuint name);
-   void (*DeleteSamplerObject)(struct gl_context *ctx,
-                               struct gl_sampler_object *samp);
 
    /**
     * \name Return a timestamp in nanoseconds as defined by GL_ARB_timer_query.
@@ -1007,12 +917,6 @@ struct dd_function_table {
     * \name GL_ARB_shader_image_load_store interface.
     */
    /** @{ */
-   void (*BindImageTexture)(struct gl_context *ctx,
-                            struct gl_image_unit *unit,
-                            struct gl_texture_object *texObj,
-                            GLint level, GLboolean layered, GLint layer,
-                            GLenum access, GLenum format);
-
    void (*MemoryBarrier)(struct gl_context *ctx, GLbitfield barriers);
    /** @} */
 
index 5554738d1a3154a26eee2a1540426fe19ab7eb0b..e8059c7b2600d3f053e5b0c823c8eacd66a59731 100644 (file)
@@ -105,13 +105,12 @@ struct gl_list_extensions
  * \param ctx GL context.
  *
  * Checks if dd_function_table::SaveNeedFlush is marked to flush
- * stored (save) vertices, and calls
- * dd_function_table::SaveFlushVertices if so.
+ * stored (save) vertices, and calls vbo_save_SaveFlushVertices if so.
  */
 #define SAVE_FLUSH_VERTICES(ctx)               \
 do {                                           \
    if (ctx->Driver.SaveNeedFlush)              \
-      ctx->Driver.SaveFlushVertices(ctx);      \
+      vbo_save_SaveFlushVertices(ctx);               \
 } while (0)
 
 
@@ -5466,7 +5465,7 @@ save_Begin(GLenum mode)
       /* Give the driver an opportunity to hook in an optimized
        * display list compiler.
        */
-      if (ctx->Driver.NotifySaveBegin(ctx, mode))
+      if (vbo_save_NotifyBegin(ctx, mode))
          return;
 
       SAVE_FLUSH_VERTICES(ctx);
@@ -7743,8 +7742,7 @@ execute_list(struct gl_context *ctx, GLuint list)
 
    ctx->ListState.CallDepth++;
 
-   if (ctx->Driver.BeginCallList)
-      ctx->Driver.BeginCallList(ctx, dlist);
+   vbo_save_BeginCallList(ctx, dlist);
 
    n = dlist->Head;
 
@@ -8900,8 +8898,7 @@ execute_list(struct gl_context *ctx, GLuint list)
       }
    }
 
-   if (ctx->Driver.EndCallList)
-      ctx->Driver.EndCallList(ctx);
+   vbo_save_EndCallList(ctx);
 
    ctx->ListState.CallDepth--;
 }
@@ -9029,7 +9026,7 @@ _mesa_NewList(GLuint name, GLenum mode)
    ctx->ListState.CurrentBlock = ctx->ListState.CurrentList->Head;
    ctx->ListState.CurrentPos = 0;
 
-   ctx->Driver.NewList(ctx, name, mode);
+   vbo_save_NewList(ctx, name, mode);
 
    ctx->CurrentDispatch = ctx->Save;
    _glapi_set_dispatch(ctx->CurrentDispatch);
@@ -9063,7 +9060,7 @@ _mesa_EndList(void)
    /* Call before emitting END_OF_LIST, in case the driver wants to
     * emit opcodes itself.
     */
-   ctx->Driver.EndList(ctx);
+   vbo_save_EndList(ctx);
 
    (void) alloc_instruction(ctx, OPCODE_END_OF_LIST, 0);
 
index c0030bc56878a5eb4326bf87786795b27fd9d752..e4e2a18c1da214a1f85b1b3246965cc5d555af37 100644 (file)
@@ -32,6 +32,7 @@
 #include "main/imports.h"
 #include "main/macros.h"
 #include "main/samplerobj.h"
+#include "main/shaderobj.h"
 #include "main/texenvprogram.h"
 #include "main/texobj.h"
 #include "main/uniforms.h"
@@ -1208,7 +1209,7 @@ create_new_program(struct gl_context *ctx, struct state_key *key)
    p.top_instructions = p.shader->ir;
    p.instructions = p.shader->ir;
    p.state = key;
-   p.shader_program = ctx->Driver.NewShaderProgram(0);
+   p.shader_program = _mesa_new_shader_program(0);
 
    /* Tell the linker to ignore the fact that we're building a
     * separate shader, in case we're in a GLES2 context that would
index 37e2c29c89ccdc729cf45d74c17806b24792c94b..d18166d528ea6622919092c606009d499f068cf2 100644 (file)
@@ -271,8 +271,7 @@ _mesa_reference_framebuffer_(struct gl_framebuffer **ptr,
  * Resize the given framebuffer's renderbuffers to the new width and height.
  * This should only be used for window-system framebuffers, not
  * user-created renderbuffers (i.e. made with GL_EXT_framebuffer_object).
- * This will typically be called via ctx->Driver.ResizeBuffers() or directly
- * from a device driver.
+ * This will typically be called directly from a device driver.
  *
  * \note it's possible for ctx to be null since a window can be resized
  * without a currently bound rendering context.
@@ -965,3 +964,22 @@ _mesa_print_framebuffer(const struct gl_framebuffer *fb)
       }
    }
 }
+
+bool
+_mesa_is_front_buffer_reading(const struct gl_framebuffer *fb)
+{
+   if (!fb || _mesa_is_user_fbo(fb))
+      return false;
+
+   return fb->_ColorReadBufferIndex == BUFFER_FRONT_LEFT;
+}
+
+bool
+_mesa_is_front_buffer_drawing(const struct gl_framebuffer *fb)
+{
+   if (!fb || _mesa_is_user_fbo(fb))
+      return false;
+
+   return (fb->_NumColorDrawBuffers >= 1 &&
+           fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT);
+}
index 08e43222045eb15772324154c3a8dbc26548e42a..bfc8a0836e7c15061775cc8e1be0ea4cc109862b 100644 (file)
@@ -139,4 +139,10 @@ _mesa_get_read_renderbuffer_for_format(const struct gl_context *ctx,
 extern void
 _mesa_print_framebuffer(const struct gl_framebuffer *fb);
 
+extern bool
+_mesa_is_front_buffer_reading(const struct gl_framebuffer *fb);
+
+extern bool
+_mesa_is_front_buffer_drawing(const struct gl_framebuffer *fb);
+
 #endif /* FRAMEBUFFER_H */
index 38b08b02a65028dc5d5cbbbe7790205da2db9502..c295615b475f6c9b4f7e458605e0f332e9f6cba9 100644 (file)
@@ -124,6 +124,15 @@ descriptor=[
 
 # GL_EXT_texture_filter_anisotropic
   [ "MAX_TEXTURE_MAX_ANISOTROPY_EXT", "CONTEXT_FLOAT(Const.MaxTextureMaxAnisotropy), extra_EXT_texture_filter_anisotropic" ],
+
+# GL_KHR_debug (GL 4.3)/ GL_ARB_debug_output
+  [ "DEBUG_LOGGED_MESSAGES", "LOC_CUSTOM, TYPE_INT, 0, NO_EXTRA" ],
+  [ "DEBUG_NEXT_LOGGED_MESSAGE_LENGTH", "LOC_CUSTOM, TYPE_INT, 0, NO_EXTRA" ],
+  [ "MAX_DEBUG_LOGGED_MESSAGES", "CONST(MAX_DEBUG_LOGGED_MESSAGES), NO_EXTRA" ],
+  [ "MAX_DEBUG_MESSAGE_LENGTH", "CONST(MAX_DEBUG_MESSAGE_LENGTH), NO_EXTRA" ],
+  [ "MAX_LABEL_LENGTH", "CONST(MAX_LABEL_LENGTH), NO_EXTRA" ],
+  [ "MAX_DEBUG_GROUP_STACK_DEPTH", "CONST(MAX_DEBUG_GROUP_STACK_DEPTH), NO_EXTRA" ],
+  [ "DEBUG_GROUP_STACK_DEPTH", "LOC_CUSTOM, TYPE_INT, 0, NO_EXTRA" ],
 ]},
 
 # Enums in OpenGL and GLES1
@@ -791,15 +800,6 @@ descriptor=[
 # GL_ARB_robustness
   [ "RESET_NOTIFICATION_STRATEGY_ARB", "CONTEXT_ENUM(Const.ResetStrategy), NO_EXTRA" ],
 
-# GL_KHR_debug (GL 4.3)/ GL_ARB_debug_output
-  [ "DEBUG_LOGGED_MESSAGES", "LOC_CUSTOM, TYPE_INT, 0, NO_EXTRA" ],
-  [ "DEBUG_NEXT_LOGGED_MESSAGE_LENGTH", "LOC_CUSTOM, TYPE_INT, 0, NO_EXTRA" ],
-  [ "MAX_DEBUG_LOGGED_MESSAGES", "CONST(MAX_DEBUG_LOGGED_MESSAGES), NO_EXTRA" ],
-  [ "MAX_DEBUG_MESSAGE_LENGTH", "CONST(MAX_DEBUG_MESSAGE_LENGTH), NO_EXTRA" ],
-  [ "MAX_LABEL_LENGTH", "CONST(MAX_LABEL_LENGTH), NO_EXTRA" ],
-  [ "MAX_DEBUG_GROUP_STACK_DEPTH", "CONST(MAX_DEBUG_GROUP_STACK_DEPTH), NO_EXTRA" ],
-  [ "DEBUG_GROUP_STACK_DEPTH", "LOC_CUSTOM, TYPE_INT, 0, NO_EXTRA" ],
-
   [ "MAX_DUAL_SOURCE_DRAW_BUFFERS", "CONTEXT_INT(Const.MaxDualSourceDrawBuffers), extra_ARB_blend_func_extended" ],
 
 # GL_ARB_uniform_buffer_object
index 7dab33c0051184bc11d3d923b06f9b4725faff57..faa638253800485a867311246159868cffcef298 100644 (file)
@@ -2678,6 +2678,7 @@ _mesa_es3_effective_internal_format_for_format_and_type(GLenum format,
        * internal formats, they do not correspond to GL constants, so the base
        * format is returned instead.
        */
+      case GL_BGRA_EXT:
       case GL_LUMINANCE_ALPHA:
       case GL_LUMINANCE:
       case GL_ALPHA:
@@ -2797,8 +2798,19 @@ _mesa_es3_error_check_format_and_type(const struct gl_context *ctx,
       if (effectiveInternalFormat == GL_NONE)
          return GL_INVALID_OPERATION;
 
-      GLenum baseInternalFormat =
-         _mesa_base_tex_format(ctx, effectiveInternalFormat);
+      GLenum baseInternalFormat;
+      if (internalFormat == GL_BGRA_EXT) {
+         /* Unfortunately, _mesa_base_tex_format returns a base format of
+          * GL_RGBA for GL_BGRA_EXT.  This makes perfect sense if you're
+          * asking the question, "what channels does this format have?"
+          * However, if we're trying to determine if two internal formats
+          * match in the ES3 sense, we actually want GL_BGRA.
+          */
+         baseInternalFormat = GL_BGRA_EXT;
+      } else {
+         baseInternalFormat =
+            _mesa_base_tex_format(ctx, effectiveInternalFormat);
+      }
 
       if (internalFormat != baseInternalFormat)
          return GL_INVALID_OPERATION;
@@ -2807,6 +2819,11 @@ _mesa_es3_error_check_format_and_type(const struct gl_context *ctx,
    }
 
    switch (format) {
+   case GL_BGRA_EXT:
+      if (type != GL_UNSIGNED_BYTE || internalFormat != GL_BGRA)
+         return GL_INVALID_OPERATION;
+      break;
+
    case GL_RGBA:
       switch (type) {
       case GL_UNSIGNED_BYTE:
index 984239a727636cc79ecfa8de1d2933d80dfd4d78..5d0c15d35abf47eadaa9842c47b9602ad1de153b 100644 (file)
@@ -123,11 +123,6 @@ _mesa_Hint( GLenum target, GLenum mode )
       default:
          goto invalid_target;
    }
-
-   if (ctx->Driver.Hint) {
-      (*ctx->Driver.Hint)( ctx, target, mode );
-   }
-
    return;
 
 invalid_target:
index 7147fd6e4fe3f4b82661f17ffad8d22c8f6e24b5..00e31b05c9914102001de24829eedd0b4bda6197 100644 (file)
@@ -43,7 +43,6 @@
 
 
 #include "glheader.h"
-#include "colormac.h"
 #include "enums.h"
 #include "image.h"
 #include "imports.h"
@@ -1124,7 +1123,8 @@ _mesa_pack_depth_span( struct gl_context *ctx, GLuint n, GLvoid *dest,
       }
       break;
    default:
-      _mesa_problem(ctx, "bad type in _mesa_pack_depth_span");
+      _mesa_problem(ctx, "bad type in _mesa_pack_depth_span (%s)",
+                    _mesa_enum_to_string(dstType));
    }
 
    free(depthCopy);
index c7b96664c21e7c12b94745d24109bb3f3f0fbeac..676dd367b3fccc80d4515570f8e2e5cdac269401 100644 (file)
@@ -72,6 +72,14 @@ lookup_samplerobj_locked(struct gl_context *ctx, GLuint name)
          _mesa_HashLookupLocked(ctx->Shared->SamplerObjects, name);
 }
 
+static void
+delete_sampler_object(struct gl_context *ctx,
+                      struct gl_sampler_object *sampObj)
+{
+   mtx_destroy(&sampObj->Mutex);
+   free(sampObj->Label);
+   free(sampObj);
+}
 
 /**
  * Handle reference counting.
@@ -94,10 +102,8 @@ _mesa_reference_sampler_object_(struct gl_context *ctx,
       deleteFlag = (oldSamp->RefCount == 0);
       mtx_unlock(&oldSamp->Mutex);
 
-      if (deleteFlag) {
-        assert(ctx->Driver.DeleteSamplerObject);
-         ctx->Driver.DeleteSamplerObject(ctx, oldSamp);
-      }
+      if (deleteFlag)
+         delete_sampler_object(ctx, oldSamp);
 
       *ptr = NULL;
    }
@@ -162,19 +168,6 @@ _mesa_new_sampler_object(struct gl_context *ctx, GLuint name)
    return sampObj;
 }
 
-
-/**
- * Fallback for ctx->Driver.DeleteSamplerObject();
- */
-static void
-_mesa_delete_sampler_object(struct gl_context *ctx,
-                            struct gl_sampler_object *sampObj)
-{
-   mtx_destroy(&sampObj->Mutex);
-   free(sampObj->Label);
-   free(sampObj);
-}
-
 static void
 create_samplers(struct gl_context *ctx, GLsizei count, GLuint *samplers,
                 const char *caller)
@@ -628,8 +621,12 @@ static GLuint
 set_sampler_compare_mode(struct gl_context *ctx,
                          struct gl_sampler_object *samp, GLint param)
 {
+    /* If GL_ARB_shadow is not supported, don't report an error.  The
+     * sampler object extension spec isn't clear on this extension interaction.
+     * Silences errors with Wine on older GPUs such as R200.
+     */
    if (!ctx->Extensions.ARB_shadow)
-      return INVALID_PNAME;
+      return GL_FALSE;
 
    if (samp->CompareMode == param)
       return GL_FALSE;
@@ -649,8 +646,12 @@ static GLuint
 set_sampler_compare_func(struct gl_context *ctx,
                          struct gl_sampler_object *samp, GLint param)
 {
+    /* If GL_ARB_shadow is not supported, don't report an error.  The
+     * sampler object extension spec isn't clear on this extension interaction.
+     * Silences errors with Wine on older GPUs such as R200.
+     */
    if (!ctx->Extensions.ARB_shadow)
-      return INVALID_PNAME;
+      return GL_FALSE;
 
    if (samp->CompareFunc == param)
       return GL_FALSE;
@@ -1336,13 +1337,9 @@ _mesa_GetSamplerParameteriv(GLuint sampler, GLenum pname, GLint *params)
       *params = IROUND(sampObj->LodBias);
       break;
    case GL_TEXTURE_COMPARE_MODE:
-      if (!ctx->Extensions.ARB_shadow)
-         goto invalid_pname;
       *params = sampObj->CompareMode;
       break;
    case GL_TEXTURE_COMPARE_FUNC:
-      if (!ctx->Extensions.ARB_shadow)
-         goto invalid_pname;
       *params = sampObj->CompareFunc;
       break;
    case GL_TEXTURE_MAX_ANISOTROPY_EXT:
@@ -1425,13 +1422,9 @@ _mesa_GetSamplerParameterfv(GLuint sampler, GLenum pname, GLfloat *params)
       *params = sampObj->LodBias;
       break;
    case GL_TEXTURE_COMPARE_MODE:
-      if (!ctx->Extensions.ARB_shadow)
-         goto invalid_pname;
       *params = (GLfloat) sampObj->CompareMode;
       break;
    case GL_TEXTURE_COMPARE_FUNC:
-      if (!ctx->Extensions.ARB_shadow)
-         goto invalid_pname;
       *params = (GLfloat) sampObj->CompareFunc;
       break;
    case GL_TEXTURE_MAX_ANISOTROPY_EXT:
@@ -1504,13 +1497,9 @@ _mesa_GetSamplerParameterIiv(GLuint sampler, GLenum pname, GLint *params)
       *params = (GLint) sampObj->LodBias;
       break;
    case GL_TEXTURE_COMPARE_MODE:
-      if (!ctx->Extensions.ARB_shadow)
-         goto invalid_pname;
       *params = sampObj->CompareMode;
       break;
    case GL_TEXTURE_COMPARE_FUNC:
-      if (!ctx->Extensions.ARB_shadow)
-         goto invalid_pname;
       *params = sampObj->CompareFunc;
       break;
    case GL_TEXTURE_MAX_ANISOTROPY_EXT:
@@ -1583,13 +1572,9 @@ _mesa_GetSamplerParameterIuiv(GLuint sampler, GLenum pname, GLuint *params)
       *params = (GLuint) sampObj->LodBias;
       break;
    case GL_TEXTURE_COMPARE_MODE:
-      if (!ctx->Extensions.ARB_shadow)
-         goto invalid_pname;
       *params = sampObj->CompareMode;
       break;
    case GL_TEXTURE_COMPARE_FUNC:
-      if (!ctx->Extensions.ARB_shadow)
-         goto invalid_pname;
       *params = sampObj->CompareFunc;
       break;
    case GL_TEXTURE_MAX_ANISOTROPY_EXT:
@@ -1626,5 +1611,4 @@ void
 _mesa_init_sampler_object_functions(struct dd_function_table *driver)
 {
    driver->NewSamplerObject = _mesa_new_sampler_object;
-   driver->DeleteSamplerObject = _mesa_delete_sampler_object;
 }
index 718967605b586ed5b0d6f5b56cfb60d4bc0228b8..6d73e3bdcf260c538ad8155ef1ca088c6254ab19 100644 (file)
@@ -996,7 +996,7 @@ program_resource_top_level_array_stride(struct gl_shader_program *shProg,
                const glsl_type *array_type = field->type->fields.array;
 
                if (interface->interface_packing != GLSL_INTERFACE_PACKING_STD430) {
-                  if (array_type->is_record()) {
+                  if (array_type->is_record() || array_type->is_array()) {
                      array_stride = array_type->std140_size(row_major);
                      array_stride = glsl_align(array_stride, 16);
                   } else {
index 82a1ec371303cb88c2dabc831cb3a260f137a848..9dd1054c8ee4820f3be0fcf3252731471187ef1c 100644 (file)
@@ -320,7 +320,7 @@ create_shader_program(struct gl_context *ctx)
 
    name = _mesa_HashFindFreeKeyBlock(ctx->Shared->ShaderObjects, 1);
 
-   shProg = ctx->Driver.NewShaderProgram(name);
+   shProg = _mesa_new_shader_program(name);
 
    _mesa_HashInsert(ctx->Shared->ShaderObjects, name, shProg);
 
@@ -2597,7 +2597,7 @@ _mesa_GetUniformSubroutineuiv(GLenum shadertype, GLint location,
 
    {
       struct gl_uniform_storage *uni = sh->SubroutineUniformRemapTable[location];
-      int offset = location - uni->subroutine[stage].index;
+      int offset = location - uni->opaque[stage].index;
       memcpy(params, &uni->storage[offset],
             sizeof(GLuint));
    }
index c4bba842ca792b77d5ab83dcf60c72390e768a3a..bd4b7c7be3b476568dbca6cdf18c133892e9e5ee 100644 (file)
@@ -577,10 +577,6 @@ _mesa_BindImageTexture(GLuint unit, GLuint texture, GLint level,
       u->Layered = GL_FALSE;
       u->Layer = 0;
    }
-
-   if (ctx->Driver.BindImageTexture)
-      ctx->Driver.BindImageTexture(ctx, u, u->TexObj, level, layered,
-                                   layer, access, format);
 }
 
 void GLAPIENTRY
@@ -719,11 +715,6 @@ _mesa_BindImageTextures(GLuint first, GLsizei count, const GLuint *textures)
          u->_ActualFormat = MESA_FORMAT_R_UNORM8;
          u->_Valid = GL_FALSE;
       }
-
-      /* Pass the BindImageTexture call down to the device driver */
-      if (ctx->Driver.BindImageTexture)
-         ctx->Driver.BindImageTexture(ctx, u, u->TexObj, u->Level, u->Layered,
-                                      u->Layer, u->Access, u->Format);
    }
 
    _mesa_end_texture_lookups(ctx);
index 5cd37d7e4c49317b3f23f1f295ab045525a6e041..4e85fda24b40e60d1373fe24c7ed9cac613ee0b6 100644 (file)
@@ -74,7 +74,7 @@ _mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr,
       if (deleteFlag) {
         if (old->Name != 0)
            _mesa_HashRemove(ctx->Shared->ShaderObjects, old->Name);
-         ctx->Driver.DeleteShader(ctx, old);
+         _mesa_delete_shader(ctx, old);
       }
 
       *ptr = NULL;
@@ -116,9 +116,8 @@ _mesa_new_shader(struct gl_context *ctx, GLuint name, GLenum type)
 
 /**
  * Delete a shader object.
- * Called via ctx->Driver.DeleteShader().
  */
-static void
+void
 _mesa_delete_shader(struct gl_context *ctx, struct gl_shader *sh)
 {
    free((void *)sh->Source);
@@ -210,7 +209,7 @@ _mesa_reference_shader_program_(struct gl_context *ctx,
       if (deleteFlag) {
         if (old->Name != 0)
            _mesa_HashRemove(ctx->Shared->ShaderObjects, old->Name);
-         ctx->Driver.DeleteShaderProgram(ctx, old);
+         _mesa_delete_shader_program(ctx, old);
       }
 
       *ptr = NULL;
@@ -246,9 +245,8 @@ init_shader_program(struct gl_shader_program *prog)
 
 /**
  * Allocate a new gl_shader_program object, initialize it.
- * Called via ctx->Driver.NewShaderProgram()
  */
-static struct gl_shader_program *
+struct gl_shader_program *
 _mesa_new_shader_program(GLuint name)
 {
    struct gl_shader_program *shProg;
@@ -362,7 +360,7 @@ _mesa_free_shader_program_data(struct gl_context *ctx,
 
    for (sh = 0; sh < MESA_SHADER_STAGES; sh++) {
       if (shProg->_LinkedShaders[sh] != NULL) {
-        ctx->Driver.DeleteShader(ctx, shProg->_LinkedShaders[sh]);
+        _mesa_delete_shader(ctx, shProg->_LinkedShaders[sh]);
         shProg->_LinkedShaders[sh] = NULL;
       }
    }
@@ -374,10 +372,10 @@ _mesa_free_shader_program_data(struct gl_context *ctx,
 
 /**
  * Free/delete a shader program object.
- * Called via ctx->Driver.DeleteShaderProgram().
  */
-static void
-_mesa_delete_shader_program(struct gl_context *ctx, struct gl_shader_program *shProg)
+void
+_mesa_delete_shader_program(struct gl_context *ctx,
+                            struct gl_shader_program *shProg)
 {
    _mesa_free_shader_program_data(ctx, shProg);
 
@@ -439,8 +437,5 @@ void
 _mesa_init_shader_object_functions(struct dd_function_table *driver)
 {
    driver->NewShader = _mesa_new_shader;
-   driver->DeleteShader = _mesa_delete_shader;
-   driver->NewShaderProgram = _mesa_new_shader_program;
-   driver->DeleteShaderProgram = _mesa_delete_shader_program;
    driver->LinkShader = _mesa_ir_link_shader;
 }
index 943044e37cdd7984284c09281e5a0ec0a8835ccb..796de47073550160e769f3064856b2426004a153 100644 (file)
@@ -82,6 +82,9 @@ _mesa_init_shader(struct gl_context *ctx, struct gl_shader *shader);
 extern struct gl_shader *
 _mesa_new_shader(struct gl_context *ctx, GLuint name, GLenum type);
 
+extern void
+_mesa_delete_shader(struct gl_context *ctx, struct gl_shader *sh);
+
 extern struct gl_shader_program *
 _mesa_lookup_shader_program(struct gl_context *ctx, GLuint name);
 
@@ -89,6 +92,9 @@ extern struct gl_shader_program *
 _mesa_lookup_shader_program_err(struct gl_context *ctx, GLuint name,
                                 const char *caller);
 
+extern struct gl_shader_program *
+_mesa_new_shader_program(GLuint name);
+
 extern void
 _mesa_clear_shader_program_data(struct gl_shader_program *shProg);
 
@@ -96,6 +102,9 @@ extern void
 _mesa_free_shader_program_data(struct gl_context *ctx,
                                struct gl_shader_program *shProg);
 
+extern void
+_mesa_delete_shader_program(struct gl_context *ctx,
+                            struct gl_shader_program *shProg);
 
 
 extern void
index d5ac9f1fb13380868ff6ec0ad23f0dd246ec4766..1acaf59f432aef0d745b338b842ecd5e379f097a 100644 (file)
@@ -219,12 +219,12 @@ delete_shader_cb(GLuint id, void *data, void *userData)
    struct gl_context *ctx = (struct gl_context *) userData;
    struct gl_shader *sh = (struct gl_shader *) data;
    if (_mesa_validate_shader_target(ctx, sh->Type)) {
-      ctx->Driver.DeleteShader(ctx, sh);
+      _mesa_delete_shader(ctx, sh);
    }
    else {
       struct gl_shader_program *shProg = (struct gl_shader_program *) data;
       assert(shProg->Type == GL_SHADER_PROGRAM_MESA);
-      ctx->Driver.DeleteShaderProgram(ctx, shProg);
+      _mesa_delete_shader_program(ctx, shProg);
    }
 }
 
index d7147afdc5c436a8563ab366cb4e62e00d76e57a..32854b60eb223282a749af6a58e3d417271493bb 100644 (file)
@@ -59,6 +59,9 @@ _mesa_streaming_load_memcpy(void *restrict dst, void *restrict src, size_t len)
       len -= MIN2(bytes_before_alignment_boundary, len);
    }
 
+   if (len >= 64)
+      _mm_mfence();
+
    while (len >= 64) {
       __m128i *dst_cacheline = (__m128i *)d;
       __m128i *src_cacheline = (__m128i *)s;
index b19c6d74bc0b120e6c9e4adb4b2049bebced531b..ac2d2332df8b8edc8e03fd1e39105eb508ddb961 100644 (file)
@@ -2039,6 +2039,19 @@ const struct function gles11_functions_possible[] = {
    { "glUnmapBufferOES", 11, -1 },
    { "glVertexPointer", 11, _gloffset_VertexPointer },
    { "glViewport", 11, _gloffset_Viewport },
+
+   /* GL_KHR_debug */
+   { "glPushDebugGroupKHR", 11, -1 },
+   { "glPopDebugGroupKHR", 11, -1 },
+   { "glDebugMessageCallbackKHR", 11, -1 },
+   { "glDebugMessageControlKHR", 11, -1 },
+   { "glDebugMessageInsertKHR", 11, -1 },
+   { "glGetDebugMessageLogKHR", 11, -1 },
+   { "glGetObjectLabelKHR", 11, -1 },
+   { "glGetObjectPtrLabelKHR", 11, -1 },
+   { "glObjectLabelKHR", 11, -1 },
+   { "glObjectPtrLabelKHR", 11, -1 },
+
    { NULL, 0, -1 }
 };
 
@@ -2262,6 +2275,18 @@ const struct function gles2_functions_possible[] = {
    { "glEndPerfQueryINTEL", 20, -1 },
    { "glGetPerfQueryDataINTEL", 20, -1 },
 
+   /* GL_KHR_debug */
+   { "glPushDebugGroupKHR", 20, -1 },
+   { "glPopDebugGroupKHR", 20, -1 },
+   { "glDebugMessageCallbackKHR", 20, -1 },
+   { "glDebugMessageControlKHR", 20, -1 },
+   { "glDebugMessageInsertKHR", 20, -1 },
+   { "glGetDebugMessageLogKHR", 20, -1 },
+   { "glGetObjectLabelKHR", 20, -1 },
+   { "glGetObjectPtrLabelKHR", 20, -1 },
+   { "glObjectLabelKHR", 20, -1 },
+   { "glObjectPtrLabelKHR", 20, -1 },
+
    { NULL, 0, -1 }
 };
 
index 33c959dc1a5583f3b2faf6804cf129286a5b115a..d48729778aec3e6cba86720abf253aaa57daa336 100644 (file)
@@ -804,11 +804,11 @@ _mesa_uniform(struct gl_context *ctx, struct gl_shader_program *shProg,
 
         /* If the shader stage doesn't use the sampler uniform, skip this.
          */
-        if (sh == NULL || !uni->sampler[i].active)
+        if (sh == NULL || !uni->opaque[i].active)
            continue;
 
          for (int j = 0; j < count; j++) {
-            sh->SamplerUnits[uni->sampler[i].index + offset + j] =
+            sh->SamplerUnits[uni->opaque[i].index + offset + j] =
                ((unsigned *) values)[j];
          }
 
@@ -850,11 +850,11 @@ _mesa_uniform(struct gl_context *ctx, struct gl_shader_program *shProg,
     */
    if (uni->type->is_image()) {
       for (int i = 0; i < MESA_SHADER_STAGES; i++) {
-        if (uni->image[i].active) {
+        if (uni->opaque[i].active) {
             struct gl_shader *sh = shProg->_LinkedShaders[i];
 
             for (int j = 0; j < count; j++)
-               sh->ImageUnits[uni->image[i].index + offset + j] =
+               sh->ImageUnits[uni->opaque[i].index + offset + j] =
                   ((GLint *) values)[j];
          }
       }
index 4df57c148c76adae7d9c9eaec30a9e065106e1c1..887d0c03a50c88fc581dc054cdea1daf0588fb30 100644 (file)
@@ -2354,7 +2354,7 @@ _mesa_print_arrays(struct gl_context *ctx)
 void
 _mesa_init_varray(struct gl_context *ctx)
 {
-   ctx->Array.DefaultVAO = ctx->Driver.NewArrayObject(ctx, 0);
+   ctx->Array.DefaultVAO = _mesa_new_vao(ctx, 0);
    _mesa_reference_vao(ctx, &ctx->Array.VAO, ctx->Array.DefaultVAO);
    ctx->Array.ActiveTexture = 0;   /* GL_ARB_multitexture */
 
index 98032456662667d6ddcd716853107709e2428a23..0214b8e684c6af4ff236c9da036ed1cf628501c0 100644 (file)
@@ -2353,11 +2353,12 @@ add_uniform_to_shader::visit_field(const glsl_type *type, const char *name,
         struct gl_uniform_storage *storage =
            &this->shader_program->UniformStorage[location];
 
-         assert(storage->sampler[shader_type].active);
+         assert(storage->type->is_sampler() &&
+                storage->opaque[shader_type].active);
 
         for (unsigned int j = 0; j < size / 4; j++)
             params->ParameterValues[index + j][0].f =
-               storage->sampler[shader_type].index + j;
+               storage->opaque[shader_type].index + j;
       }
    }
 
index b1168fdade8d0d996ee7588066e93c45a2494a9d..1198a3c45f1d120a688db4810796e330d37666e3 100644 (file)
@@ -119,7 +119,7 @@ _mesa_get_sampler_uniform_value(class ir_dereference *sampler,
       return 0;
    }
 
-   if (!shader_program->UniformStorage[location].sampler[shader].active) {
+   if (!shader_program->UniformStorage[location].opaque[shader].active) {
       assert(0 && "cannot return a sampler");
       linker_error(shader_program,
                   "cannot return a sampler named %s, because it is not "
@@ -128,7 +128,7 @@ _mesa_get_sampler_uniform_value(class ir_dereference *sampler,
       return 0;
    }
 
-   return shader_program->UniformStorage[location].sampler[shader].index +
+   return shader_program->UniformStorage[location].opaque[shader].index +
           getname.offset;
 }
 
index cceed42c828b8d2cefd7815b9b45aa4369dd5642..0f01e9939de21a625babfec1ecb8e4600eee51a5 100644 (file)
@@ -237,6 +237,14 @@ static void update_raster_state( struct st_context *st )
    /* _NEW_MULTISAMPLE */
    raster->multisample = ctx->Multisample._Enabled;
 
+   /* _NEW_MULTISAMPLE | _NEW_BUFFERS */
+   raster->force_persample_interp =
+         st->can_force_persample_interp &&
+         ctx->Multisample._Enabled &&
+         ctx->Multisample.SampleShading &&
+         ctx->Multisample.MinSampleShadingValue *
+         ctx->DrawBuffer->Visual.samples > 1;
+
    /* _NEW_SCISSOR */
    raster->scissor = ctx->Scissor.EnableFlags;
 
index fee15a980f30d60af729820825d45da0eb339548..1e880a107c0c14a5a499484d1573e45312a962ec 100644 (file)
@@ -70,8 +70,15 @@ update_fp( struct st_context *st )
    key.clamp_color = st->clamp_frag_color_in_shader &&
                      st->ctx->Color._ClampFragmentColor;
 
-   /* Ignore sample qualifier while computing this flag. */
+   /* Don't set it if the driver can force the interpolation by itself.
+    * If SAMPLE_ID or SAMPLE_POS are used, the interpolation is set
+    * automatically.
+    * Ignore sample qualifier while computing this flag.
+    */
    key.persample_shading =
+      !st->can_force_persample_interp &&
+      !(stfp->Base.Base.SystemValuesRead & (SYSTEM_BIT_SAMPLE_ID |
+                                            SYSTEM_BIT_SAMPLE_POS)) &&
       _mesa_get_min_invocations_per_fragment(st->ctx, &stfp->Base, true) > 1;
 
    st->fp_variant = st_get_fp_variant(st, stfp, &key);
index db254c2144e205f179724d9ca9952add210d8d50..8afd336779faf68a93f813d3bf3873909d11d7a4 100644 (file)
@@ -527,8 +527,4 @@ st_init_bufferobject_functions(struct dd_function_table *functions)
    functions->UnmapBuffer = st_bufferobj_unmap;
    functions->CopyBufferSubData = st_copy_buffer_subdata;
    functions->ClearBufferSubData = st_clear_buffer_subdata;
-
-   /* For GL_APPLE_vertex_array_object */
-   functions->NewArrayObject = _mesa_new_vao;
-   functions->DeleteArrayObject = _mesa_delete_vao;
 }
index 9d06a232bfa8673326471a110c5f2df40e95d983..ff703fa41cb015ff10ea80f4fffd4e932f00f5cc 100644 (file)
@@ -245,17 +245,6 @@ st_renderbuffer_delete(struct gl_context *ctx, struct gl_renderbuffer *rb)
 }
 
 
-/**
- * Called via ctx->Driver.NewFramebuffer()
- */
-static struct gl_framebuffer *
-st_new_framebuffer(struct gl_context *ctx, GLuint name)
-{
-   /* XXX not sure we need to subclass gl_framebuffer for pipe */
-   return _mesa_new_framebuffer(ctx, name);
-}
-
-
 /**
  * Called via ctx->Driver.NewRenderbuffer()
  */
@@ -826,7 +815,7 @@ st_UnmapRenderbuffer(struct gl_context *ctx,
 
 void st_init_fbo_functions(struct dd_function_table *functions)
 {
-   functions->NewFramebuffer = st_new_framebuffer;
+   functions->NewFramebuffer = _mesa_new_framebuffer;
    functions->NewRenderbuffer = st_new_renderbuffer;
    functions->FramebufferRenderbuffer = _mesa_FramebufferRenderbuffer_sw;
    functions->RenderTexture = st_render_texture;
index 72c23cad4bc95665d0766631ec97d16fa5bf832e..a9ab5edcf49cbb10f2e3a7aadc474813e813e60c 100644 (file)
@@ -237,6 +237,8 @@ st_create_context_priv( struct gl_context *ctx, struct pipe_context *pipe,
                                               PIPE_BIND_SAMPLER_VIEW);
    st->prefer_blit_based_texture_transfer = screen->get_param(screen,
                               PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER);
+   st->can_force_persample_interp = screen->get_param(screen,
+                                          PIPE_CAP_FORCE_PERSAMPLE_INTERP);
 
    st->needs_texcoord_semantic =
       screen->get_param(screen, PIPE_CAP_TGSI_TEXCOORD);
@@ -410,8 +412,6 @@ void st_init_driver_functions(struct pipe_screen *screen,
    _mesa_init_shader_object_functions(functions);
    _mesa_init_sampler_object_functions(functions);
 
-   functions->Accum = _mesa_accum;
-
    st_init_blit_functions(functions);
    st_init_bufferobject_functions(functions);
    st_init_clear_functions(functions);
index 81d5480431aa27137ce4bab5302441a7c920c287..a4cda29059dbbff032184b49c7b9f69bd02b8634 100644 (file)
@@ -98,6 +98,7 @@ struct st_context
    boolean has_etc1;
    boolean has_etc2;
    boolean prefer_blit_based_texture_transfer;
+   boolean can_force_persample_interp;
 
    boolean needs_texcoord_semantic;
    boolean apply_texture_swizzle_to_border_color;
index e62dd7aab80f570f9ee92a8a53a8d80cc8e9d99e..a07f8fec309877085436a268e55fc99e09d5df66 100644 (file)
@@ -619,7 +619,9 @@ st_translate_fragment_program(struct st_context *st,
          else
             interpLocation[slot] = TGSI_INTERPOLATE_LOC_CENTER;
 
-         if (key->persample_shading)
+         if (stfp->Base.Base.SystemValuesRead & (SYSTEM_BIT_SAMPLE_ID |
+                                                 SYSTEM_BIT_SAMPLE_POS) ||
+             key->persample_shading)
             interpLocation[slot] = TGSI_INTERPOLATE_LOC_SAMPLE;
 
          switch (attr) {
@@ -1681,6 +1683,26 @@ st_precompile_shader_variant(struct st_context *st,
       break;
    }
 
+   case GL_TESS_CONTROL_PROGRAM_NV: {
+      struct st_tessctrl_program *p = (struct st_tessctrl_program *)prog;
+      struct st_tcp_variant_key key;
+
+      memset(&key, 0, sizeof(key));
+      key.st = st;
+      st_get_tcp_variant(st, p, &key);
+      break;
+   }
+
+   case GL_TESS_EVALUATION_PROGRAM_NV: {
+      struct st_tesseval_program *p = (struct st_tesseval_program *)prog;
+      struct st_tep_variant_key key;
+
+      memset(&key, 0, sizeof(key));
+      key.st = st;
+      st_get_tep_variant(st, p, &key);
+      break;
+   }
+
    case GL_GEOMETRY_PROGRAM_NV: {
       struct st_geometry_program *p = (struct st_geometry_program *)prog;
       struct st_gp_variant_key key;
index e7e19a035971480f403cb3c8c004d454f6cd4fd4..56fa1a382f7bbda13c0e6d67357c57c5fd3bce50 100644 (file)
@@ -85,8 +85,8 @@ static void TAG(render_points_verts)(struct gl_context *ctx,
          currentsz = dmasz;
       }
    } else {
-      fprintf(stderr, "%s - cannot draw primitive\n", __func__);
-      return;
+      unreachable("Cannot draw primitive; validate_render should have "
+                  "prevented this");
    }
 }
 
@@ -316,11 +316,12 @@ static void TAG(render_poly_verts)(struct gl_context *ctx,
       }
 
       FLUSH();
-   } else if (ctx->Light.ShadeModel == GL_SMOOTH) {
+   } else if (ctx->Light.ShadeModel == GL_SMOOTH ||
+              ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION) {
       TAG(render_tri_fan_verts)( ctx, start, count, flags );
    } else {
-      fprintf(stderr, "%s - cannot draw primitive\n", __func__);
-      return;
+      unreachable("Cannot draw primitive; validate_render should have "
+                  "prevented this");
    }
 }
 
@@ -331,14 +332,7 @@ static void TAG(render_quad_strip_verts)(struct gl_context *ctx,
 {
    GLuint j, nr;
 
-   if (ctx->Light.ShadeModel == GL_FLAT &&
-       TNL_CONTEXT(ctx)->vb.AttribPtr[_TNL_ATTRIB_COLOR0]->stride) {
-      /* Vertices won't fit in a single buffer or elts not available - should
-       * never happen.
-       */
-      fprintf(stderr, "%s - cannot draw primitive\n", __func__);
-      return;
-   } else {
+   if (ctx->Light.ShadeModel == GL_SMOOTH) {
       LOCAL_VARS;
       const unsigned dmasz = GET_SUBSEQUENT_VB_MAX_VERTS() & ~1;
       unsigned currentsz;
@@ -364,6 +358,9 @@ static void TAG(render_quad_strip_verts)(struct gl_context *ctx,
       }
 
       FLUSH();
+   } else {
+      unreachable("Cannot draw primitive; validate_render should have "
+                  "prevented this");
    }
 }
 
@@ -373,28 +370,33 @@ static void TAG(render_quads_verts)(struct gl_context *ctx,
                                     GLuint count,
                                     GLuint flags)
 {
-   LOCAL_VARS;
-   GLuint j;
+   if (ctx->Light.ShadeModel == GL_SMOOTH ||
+       ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION) {
+      LOCAL_VARS;
+      GLuint j;
 
-   /* Emit whole number of quads in total. */
-   count -= count & 3;
+      /* Emit whole number of quads in total. */
+      count -= count & 3;
 
-   /* Hardware doesn't have a quad primitive type -- try to simulate it using
-    * triangle primitive.  This is a win for gears, but is it useful in the
-    * broader world?
-    */
-   INIT(GL_TRIANGLES);
-
-   for (j = 0; j + 3 < count; j += 4) {
-      void *tmp = ALLOC_VERTS(6);
-      /* Send v0, v1, v3
-       */
-      tmp = EMIT_VERTS(ctx, start + j,     2, tmp);
-      tmp = EMIT_VERTS(ctx, start + j + 3, 1, tmp);
-      /* Send v1, v2, v3
+      /* Hardware doesn't have a quad primitive type -- try to simulate it using
+       * triangle primitive.  This is a win for gears, but is it useful in the
+       * broader world?
        */
-      tmp = EMIT_VERTS(ctx, start + j + 1, 3, tmp);
-      (void) tmp;
+      INIT(GL_TRIANGLES);
+
+      for (j = 0; j + 3 < count; j += 4) {
+         void *tmp = ALLOC_VERTS(6);
+         /* Send v0, v1, v3
+          */
+         tmp = EMIT_VERTS(ctx, start + j,     2, tmp);
+         tmp = EMIT_VERTS(ctx, start + j + 3, 1, tmp);
+         /* Send v1, v2, v3
+          */
+         tmp = EMIT_VERTS(ctx, start + j + 1, 3, tmp);
+         (void) tmp;
+      }
+   } else {
+      unreachable("Cannot draw primitive");
    }
 }
 
@@ -461,15 +463,15 @@ static bool TAG(validate_render)(struct gl_context *ctx,
          ok = true;
          break;
       case GL_POLYGON:
-         ok = (HAVE_POLYGONS) || ctx->Light.ShadeModel == GL_SMOOTH;
+         ok = (HAVE_POLYGONS) || ctx->Light.ShadeModel == GL_SMOOTH ||
+              ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION;
          break;
       case GL_QUAD_STRIP:
-         ok = VB->Elts ||
-              (ctx->Light.ShadeModel != GL_FLAT ||
-               VB->AttribPtr[_TNL_ATTRIB_COLOR0]->stride == 0);
+         ok = VB->Elts || ctx->Light.ShadeModel == GL_SMOOTH;
          break;
       case GL_QUADS:
-         ok = true; /* flatshading is ok. */
+         ok = ctx->Light.ShadeModel == GL_SMOOTH ||
+              ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION;
          break;
       default:
          break;
index 2aaff5df0198b98a67c1e5a22f046883fdff43e6..00e843c9a0f6b49a45824d8f463461c707b7f8fc 100644 (file)
@@ -88,6 +88,14 @@ void
 vbo_initialize_save_dispatch(const struct gl_context *ctx,
                              struct _glapi_table *exec);
 
+void vbo_exec_FlushVertices(struct gl_context *ctx, GLuint flags);
+void vbo_save_SaveFlushVertices(struct gl_context *ctx);
+GLboolean vbo_save_NotifyBegin(struct gl_context *ctx, GLenum mode);
+void vbo_save_NewList(struct gl_context *ctx, GLuint list, GLenum mode);
+void vbo_save_EndList(struct gl_context *ctx);
+void vbo_save_BeginCallList(struct gl_context *ctx, struct gl_display_list *list);
+void vbo_save_EndCallList(struct gl_context *ctx);
+
 
 typedef void (*vbo_draw_func)( struct gl_context *ctx,
                               const struct _mesa_prim *prims,
index eb90350432857af3071976c661f0da5d67e088e8..a301c6c9a227b136c9648c4bfc98c0a82da282ef 100644 (file)
@@ -50,8 +50,6 @@ void vbo_exec_init( struct gl_context *ctx )
 
    ctx->Driver.NeedFlush = 0;
    ctx->Driver.CurrentExecPrimitive = PRIM_OUTSIDE_BEGIN_END;
-   ctx->Driver.BeginVertices = vbo_exec_BeginVertices;
-   ctx->Driver.FlushVertices = vbo_exec_FlushVertices;
 
    vbo_exec_invalidate_state( ctx, ~0 );
 }
index f17fe684cc3fea7d960fd5f0da74638e2d83f1ba..80f3015925d6e17cf10d51b497df7edf4c50fd91 100644 (file)
@@ -148,7 +148,6 @@ void vbo_exec_destroy( struct gl_context *ctx );
 void vbo_exec_invalidate_state( struct gl_context *ctx, GLuint new_state );
 
 void vbo_exec_BeginVertices( struct gl_context *ctx );
-void vbo_exec_FlushVertices( struct gl_context *ctx, GLuint flags );
 
 
 /* Internal functions:
index 138cd60513dd0f4a2faec5eee251e04c2bc37346..583a2f9b79ff3a64c1741d06d971b0f441f8be06 100644 (file)
@@ -419,7 +419,7 @@ do {                                                                        \
    struct vbo_exec_context *exec = &vbo_context(ctx)->exec;            \
    int sz = (sizeof(C) / sizeof(GLfloat));                              \
    if (unlikely(!(ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT)))      \
-      ctx->Driver.BeginVertices( ctx );                                        \
+      vbo_exec_BeginVertices(ctx);                                     \
                                                                         \
    if (unlikely(exec->vtx.active_sz[A] != N * sz) ||                    \
        unlikely(exec->vtx.attrtype[A] != T))                            \
@@ -1165,7 +1165,14 @@ void vbo_exec_BeginVertices( struct gl_context *ctx )
 
 
 /**
- * Called via ctx->Driver.FlushVertices()
+ * If inside glBegin()/glEnd(), it should assert(0).  Otherwise, if
+ * FLUSH_STORED_VERTICES bit in \p flags is set flushes any buffered
+ * vertices, if FLUSH_UPDATE_CURRENT bit is set updates
+ * __struct gl_contextRec::Current and gl_light_attrib::Material
+ *
+ * Note that the default T&L engine never clears the
+ * FLUSH_UPDATE_CURRENT bit, even after performing the update.
+ *
  * \param flags  bitmask of FLUSH_STORED_VERTICES, FLUSH_UPDATE_CURRENT
  */
 void vbo_exec_FlushVertices( struct gl_context *ctx, GLuint flags )
@@ -1190,7 +1197,7 @@ void vbo_exec_FlushVertices( struct gl_context *ctx, GLuint flags )
    /* Flush (draw), and make sure VBO is left unmapped when done */
    vbo_exec_FlushVertices_internal(exec, GL_TRUE);
 
-   /* Need to do this to ensure BeginVertices gets called again:
+   /* Need to do this to ensure vbo_exec_BeginVertices gets called again:
     */
    ctx->Driver.NeedFlush &= ~(FLUSH_UPDATE_CURRENT | flags);
 
index a177660c0f2b9d0ce559146772fa38db424b1586..79603e9b32eabd1f1e2832e98dded8e3cfa177de 100644 (file)
 #include "vbo_context.h"
 
 
-static void vbo_save_callback_init( struct gl_context *ctx )
-{
-   ctx->Driver.NewList = vbo_save_NewList;
-   ctx->Driver.EndList = vbo_save_EndList;
-   ctx->Driver.SaveFlushVertices = vbo_save_SaveFlushVertices;
-   ctx->Driver.BeginCallList = vbo_save_BeginCallList;
-   ctx->Driver.EndCallList = vbo_save_EndCallList;
-   ctx->Driver.NotifySaveBegin = vbo_save_NotifyBegin;
-}
-
-
-
 /**
  * Called at context creation time.
  */
@@ -56,7 +44,6 @@ void vbo_save_init( struct gl_context *ctx )
    save->ctx = ctx;
 
    vbo_save_api_init( save );
-   vbo_save_callback_init(ctx);
 
    {
       struct gl_client_array *arrays = save->arrays;
index 5b1ac81771ed1604ec694d3a90a26ef2839d8e78..8032db8a9e0e35cd753dd93a27769f95692b8ff2 100644 (file)
@@ -175,13 +175,6 @@ void vbo_loopback_vertex_list( struct gl_context *ctx,
 
 /* Callbacks:
  */
-void vbo_save_EndList( struct gl_context *ctx );
-void vbo_save_NewList( struct gl_context *ctx, GLuint list, GLenum mode );
-void vbo_save_EndCallList( struct gl_context *ctx );
-void vbo_save_BeginCallList( struct gl_context *ctx, struct gl_display_list *list );
-void vbo_save_SaveFlushVertices( struct gl_context *ctx );
-GLboolean vbo_save_NotifyBegin( struct gl_context *ctx, GLenum mode );
-
 void vbo_save_playback_vertex_list( struct gl_context *ctx, void *data );
 
 void vbo_save_api_init( struct vbo_save_context *save );
index 29de3d38aaa33edeb381adbbff116627f7dca442..1a70d168c5591fba27ad124d4cc348af0892002a 100644 (file)
@@ -970,8 +970,7 @@ _save_CallLists(GLsizei n, GLenum type, const GLvoid * v)
 
 
 /**
- * Called via ctx->Driver.NotifySaveBegin() when a glBegin is getting
- * compiled into a display list.
+ * Called when a glBegin is getting compiled into a display list.
  * Updating of ctx->Driver.CurrentSavePrimitive is already taken care of.
  */
 GLboolean
@@ -1001,7 +1000,7 @@ vbo_save_NotifyBegin(struct gl_context *ctx, GLenum mode)
       _mesa_install_save_vtxfmt(ctx, &save->vtxfmt);
    }
 
-   /* We need to call SaveFlushVertices() if there's state change */
+   /* We need to call vbo_save_SaveFlushVertices() if there's state change */
    ctx->Driver.SaveNeedFlush = GL_TRUE;
 
    /* GL_TRUE means we've handled this glBegin here; don't compile a BEGIN
@@ -1604,8 +1603,6 @@ vbo_save_api_init(struct vbo_save_context *save)
                                vbo_destroy_vertex_list,
                                vbo_print_vertex_list);
 
-   ctx->Driver.NotifySaveBegin = vbo_save_NotifyBegin;
-
    _save_vtxfmt_init(ctx);
    _save_current_init(ctx);
    _mesa_noop_vtxfmt_init(&save->vtxfmt_noop);
index ef38b5ac7d10b3c0780b0bef73963682e06c5195..e45431d1de88cc665ae260b6b0c366a235b3b798 100644 (file)
@@ -1,5 +1,7 @@
 MESA_UTIL_FILES :=     \
        bitset.h \
+       debug.c \
+       debug.h \
        format_srgb.h \
        hash_table.c    \
        hash_table.h \
diff --git a/src/util/debug.c b/src/util/debug.c
new file mode 100644 (file)
index 0000000..3729ce8
--- /dev/null
@@ -0,0 +1,53 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <string.h>
+#include "main/macros.h"
+#include "debug.h"
+
+uint64_t
+parse_debug_string(const char *debug,
+                   const struct debug_control *control)
+{
+   uint64_t flag = 0;
+
+   if (debug != NULL) {
+      for (; control->string != NULL; control++) {
+         if (!strcmp(debug, "all")) {
+            flag |= control->flag;
+
+         } else {
+            const char *s = debug;
+            unsigned n;
+
+            for (; n = strcspn(s, ", "), *s; s += MAX2(1, n)) {
+               if (strlen(control->string) == n &&
+                   !strncmp(control->string, s, n))
+                  flag |= control->flag;
+            }
+         }
+      }
+   }
+
+   return flag;
+}
diff --git a/src/util/debug.h b/src/util/debug.h
new file mode 100644 (file)
index 0000000..801736a
--- /dev/null
@@ -0,0 +1,46 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef _DEBUG_H
+#define _DEBUG_H
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct debug_control {
+    const char * string;
+    uint64_t     flag;
+};
+
+uint64_t
+parse_debug_string(const char *debug,
+                   const struct debug_control *control);
+
+#ifdef __cplusplus
+} /* extern C */
+#endif
+
+#endif /* _DEBUG_H */
index c5a076d2ba3311bb14cc744e7116f6649da63fcf..a691329065cd9d626662605409b1a0f56b6c4706 100644 (file)
@@ -118,7 +118,10 @@ anv_entrypoints.c : anv_entrypoints_gen.py $(vulkan_include_HEADERS)
 CLEANFILES = $(BUILT_SOURCES)
 
 libvulkan_la_LIBADD = $(WAYLAND_LIBS) -lxcb -lxcb-dri3 \
-       $(top_builddir)/src/mesa/drivers/dri/i965/libi965_compiler.la
+       $(top_builddir)/src/mesa/drivers/dri/i965/libi965_compiler.la \
+       ../mesa/libmesa.la \
+       ../mesa/drivers/dri/common/libdri_test_stubs.la \
+       -lpthread -ldl
 
 # Libvulkan with dummy gem. Used for unit tests.
 
index c7593e61b4dcfe3d63f60dbb4153dd60fd916d15..759ec7ae4d9d2bc618a29382dbe997701b95d82a 100644 (file)
@@ -35,6 +35,7 @@
 #include <brw_vs.h>
 #include <brw_gs.h>
 #include <brw_cs.h>
+#include "brw_vec4_gs_visitor.h"
 
 #include <mesa/main/shaderobj.h>
 #include <mesa/main/fbobject.h>
@@ -153,6 +154,71 @@ create_params_array(struct anv_pipeline *pipeline,
          (const gl_constant_value *)&null_data->client_data[i * sizeof(float)];
 }
 
+/**
+ * Return a bitfield where bit n is set if barycentric interpolation mode n
+ * (see enum brw_wm_barycentric_interp_mode) is needed by the fragment shader.
+ */
+unsigned
+brw_compute_barycentric_interp_modes(const struct brw_device_info *devinfo,
+                                     bool shade_model_flat,
+                                     bool persample_shading,
+                                     nir_shader *shader)
+{
+   unsigned barycentric_interp_modes = 0;
+
+   nir_foreach_variable(var, &shader->inputs) {
+      enum glsl_interp_qualifier interp_qualifier =
+         (enum glsl_interp_qualifier) var->data.interpolation;
+      bool is_centroid = var->data.centroid && !persample_shading;
+      bool is_sample = var->data.sample || persample_shading;
+      bool is_gl_Color = (var->data.location == VARYING_SLOT_COL0) ||
+                         (var->data.location == VARYING_SLOT_COL1);
+
+      /* Ignore WPOS and FACE, because they don't require interpolation. */
+      if (var->data.location == VARYING_SLOT_POS ||
+          var->data.location == VARYING_SLOT_FACE)
+         continue;
+
+      /* Determine the set (or sets) of barycentric coordinates needed to
+       * interpolate this variable.  Note that when
+       * brw->needs_unlit_centroid_workaround is set, centroid interpolation
+       * uses PIXEL interpolation for unlit pixels and CENTROID interpolation
+       * for lit pixels, so we need both sets of barycentric coordinates.
+       */
+      if (interp_qualifier == INTERP_QUALIFIER_NOPERSPECTIVE) {
+         if (is_centroid) {
+            barycentric_interp_modes |=
+               1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC;
+         } else if (is_sample) {
+            barycentric_interp_modes |=
+               1 << BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC;
+         }
+         if ((!is_centroid && !is_sample) ||
+             devinfo->needs_unlit_centroid_workaround) {
+            barycentric_interp_modes |=
+               1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC;
+         }
+      } else if (interp_qualifier == INTERP_QUALIFIER_SMOOTH ||
+                 (!(shade_model_flat && is_gl_Color) &&
+                  interp_qualifier == INTERP_QUALIFIER_NONE)) {
+         if (is_centroid) {
+            barycentric_interp_modes |=
+               1 << BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC;
+         } else if (is_sample) {
+            barycentric_interp_modes |=
+               1 << BRW_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC;
+         }
+         if ((!is_centroid && !is_sample) ||
+             devinfo->needs_unlit_centroid_workaround) {
+            barycentric_interp_modes |=
+               1 << BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
+         }
+      }
+   }
+
+   return barycentric_interp_modes;
+}
+
 static void
 brw_vs_populate_key(struct brw_context *brw,
                     struct brw_vertex_program *vp,
@@ -188,10 +254,6 @@ brw_vs_populate_key(struct brw_context *brw,
             key->point_coord_replace |= (1 << i);
       }
    }
-
-   /* _NEW_TEXTURE */
-   brw_populate_sampler_prog_key_data(ctx, prog, brw->vs.base.sampler_count,
-                                      &key->tex);
 }
 
 static bool
@@ -263,7 +325,7 @@ really_do_vs_prog(struct brw_context *brw,
    /* Emit GEN4 code.
     */
    program = brw_vs_emit(brw, mem_ctx, key, prog_data, &vp->program,
-                         prog, &program_size);
+                         prog, -1, &program_size);
    if (program == NULL) {
       ralloc_free(mem_ctx);
       return false;
@@ -288,7 +350,6 @@ void brw_wm_populate_key(struct brw_context *brw,
                          struct brw_wm_prog_key *key)
 {
    struct gl_context *ctx = &brw->ctx;
-   struct gl_program *prog = (struct gl_program *) brw->fragment_program;
    GLuint lookup = 0;
    GLuint line_aa;
    bool program_uses_dfdy = fp->program.UsesDFdy;
@@ -383,10 +444,6 @@ void brw_wm_populate_key(struct brw_context *brw,
    /* _NEW_FRAG_CLAMP | _NEW_BUFFERS */
    key->clamp_fragment_color = ctx->Color._ClampFragmentColor;
 
-   /* _NEW_TEXTURE */
-   brw_populate_sampler_prog_key_data(ctx, prog, brw->wm.base.sampler_count,
-                                      &key->tex);
-
    /* _NEW_BUFFERS */
    /*
     * Include the draw buffer origin and height so that we can calculate
@@ -522,7 +579,7 @@ really_do_wm_prog(struct brw_context *brw,
    prog_data->binding_table.render_target_start = 0;
 
    program = brw_wm_fs_emit(brw, mem_ctx, key, prog_data,
-                            &fp->program, prog, &program_size);
+                            &fp->program, prog, -1, -1, &program_size);
    if (program == NULL) {
       ralloc_free(mem_ctx);
       return false;
@@ -546,44 +603,260 @@ really_do_wm_prog(struct brw_context *brw,
    return true;
 }
 
-static void
-brw_gs_populate_key(struct brw_context *brw,
-                    struct anv_pipeline *pipeline,
+bool
+anv_codegen_gs_prog(struct brw_context *brw,
+                    struct gl_shader_program *prog,
                     struct brw_geometry_program *gp,
-                    struct brw_gs_prog_key *key)
+                    struct brw_gs_prog_key *key,
+                    struct anv_pipeline *pipeline)
 {
-   struct gl_context *ctx = &brw->ctx;
-   struct brw_stage_state *stage_state = &brw->gs.base;
-   struct gl_program *prog = &gp->program.Base;
+   struct brw_gs_compile c;
 
-   memset(key, 0, sizeof(*key));
+   memset(&c, 0, sizeof(c));
+   c.key = *key;
+   c.gp = gp;
 
-   key->program_string_id = gp->id;
+   c.prog_data.include_primitive_id =
+      (gp->program.Base.InputsRead & VARYING_BIT_PRIMITIVE_ID) != 0;
 
-   /* _NEW_TEXTURE */
-   brw_populate_sampler_prog_key_data(ctx, prog, stage_state->sampler_count,
-                                      &key->tex);
-}
+   c.prog_data.invocations = gp->program.Invocations;
 
-static bool
-really_do_gs_prog(struct brw_context *brw,
-                  struct gl_shader_program *prog,
-                  struct brw_geometry_program *gp,
-                  struct brw_gs_prog_key *key, struct anv_pipeline *pipeline)
-{
-   struct brw_gs_compile_output output;
-
-   /* FIXME: We pass the bind map to the compile in the output struct. Need
-    * something better. */
-   set_binding_table_layout(&output.prog_data.base.base,
+   set_binding_table_layout(&c.prog_data.base.base,
                             pipeline, VK_SHADER_STAGE_GEOMETRY);
 
-   brw_compile_gs_prog(brw, prog, gp, key, &output);
+   /* Allocate the references to the uniforms that will end up in the
+    * prog_data associated with the compiled program, and which will be freed
+    * by the state cache.
+    *
+    * Note: param_count needs to be num_uniform_components * 4, since we add
+    * padding around uniform values below vec4 size, so the worst case is that
+    * every uniform is a float which gets padded to the size of a vec4.
+    */
+   struct gl_shader *gs = prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
+   int param_count = gp->program.Base.nir->num_uniforms * 4;
+
+   c.prog_data.base.base.param =
+      rzalloc_array(NULL, const gl_constant_value *, param_count);
+   c.prog_data.base.base.pull_param =
+      rzalloc_array(NULL, const gl_constant_value *, param_count);
+   c.prog_data.base.base.image_param =
+      rzalloc_array(NULL, struct brw_image_param, gs->NumImages);
+   c.prog_data.base.base.nr_params = param_count;
+   c.prog_data.base.base.nr_image_params = gs->NumImages;
+
+   brw_nir_setup_glsl_uniforms(gp->program.Base.nir, prog, &gp->program.Base,
+                               &c.prog_data.base.base, false);
+
+   if (brw->gen >= 8) {
+      c.prog_data.static_vertex_count = !gp->program.Base.nir ? -1 :
+         nir_gs_count_vertices(gp->program.Base.nir);
+   }
+
+   if (brw->gen >= 7) {
+      if (gp->program.OutputType == GL_POINTS) {
+         /* When the output type is points, the geometry shader may output data
+          * to multiple streams, and EndPrimitive() has no effect.  So we
+          * configure the hardware to interpret the control data as stream ID.
+          */
+         c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID;
+
+         /* We only have to emit control bits if we are using streams */
+         if (prog->Geom.UsesStreams)
+            c.control_data_bits_per_vertex = 2;
+         else
+            c.control_data_bits_per_vertex = 0;
+      } else {
+         /* When the output type is triangle_strip or line_strip, EndPrimitive()
+          * may be used to terminate the current strip and start a new one
+          * (similar to primitive restart), and outputting data to multiple
+          * streams is not supported.  So we configure the hardware to interpret
+          * the control data as EndPrimitive information (a.k.a. "cut bits").
+          */
+         c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT;
+
+         /* We only need to output control data if the shader actually calls
+          * EndPrimitive().
+          */
+         c.control_data_bits_per_vertex = gp->program.UsesEndPrimitive ? 1 : 0;
+      }
+   } else {
+      /* There are no control data bits in gen6. */
+      c.control_data_bits_per_vertex = 0;
+
+      /* If it is using transform feedback, enable it */
+      if (prog->TransformFeedback.NumVarying)
+         c.prog_data.gen6_xfb_enabled = true;
+      else
+         c.prog_data.gen6_xfb_enabled = false;
+   }
+   c.control_data_header_size_bits =
+      gp->program.VerticesOut * c.control_data_bits_per_vertex;
+
+   /* 1 HWORD = 32 bytes = 256 bits */
+   c.prog_data.control_data_header_size_hwords =
+      ALIGN(c.control_data_header_size_bits, 256) / 256;
+
+   GLbitfield64 outputs_written = gp->program.Base.OutputsWritten;
+
+   brw_compute_vue_map(brw->intelScreen->devinfo,
+                       &c.prog_data.base.vue_map, outputs_written,
+                       prog ? prog->SeparateShader : false);
+
+   /* Compute the output vertex size.
+    *
+    * From the Ivy Bridge PRM, Vol2 Part1 7.2.1.1 STATE_GS - Output Vertex
+    * Size (p168):
+    *
+    *     [0,62] indicating [1,63] 16B units
+    *
+    *     Specifies the size of each vertex stored in the GS output entry
+    *     (following any Control Header data) as a number of 128-bit units
+    *     (minus one).
+    *
+    *     Programming Restrictions: The vertex size must be programmed as a
+    *     multiple of 32B units with the following exception: Rendering is
+    *     disabled (as per SOL stage state) and the vertex size output by the
+    *     GS thread is 16B.
+    *
+    *     If rendering is enabled (as per SOL state) the vertex size must be
+    *     programmed as a multiple of 32B units. In other words, the only time
+    *     software can program a vertex size with an odd number of 16B units
+    *     is when rendering is disabled.
+    *
+    * Note: B=bytes in the above text.
+    *
+    * It doesn't seem worth the extra trouble to optimize the case where the
+    * vertex size is 16B (especially since this would require special-casing
+    * the GEN assembly that writes to the URB).  So we just set the vertex
+    * size to a multiple of 32B (2 vec4's) in all cases.
+    *
+    * The maximum output vertex size is 62*16 = 992 bytes (31 hwords).  We
+    * budget that as follows:
+    *
+    *   512 bytes for varyings (a varying component is 4 bytes and
+    *             gl_MaxGeometryOutputComponents = 128)
+    *    16 bytes overhead for VARYING_SLOT_PSIZ (each varying slot is 16
+    *             bytes)
+    *    16 bytes overhead for gl_Position (we allocate it a slot in the VUE
+    *             even if it's not used)
+    *    32 bytes overhead for gl_ClipDistance (we allocate it 2 VUE slots
+    *             whenever clip planes are enabled, even if the shader doesn't
+    *             write to gl_ClipDistance)
+    *    16 bytes overhead since the VUE size must be a multiple of 32 bytes
+    *             (see below)--this causes up to 1 VUE slot to be wasted
+    *   400 bytes available for varying packing overhead
+    *
+    * Worst-case varying packing overhead is 3/4 of a varying slot (12 bytes)
+    * per interpolation type, so this is plenty.
+    *
+    */
+   unsigned output_vertex_size_bytes = c.prog_data.base.vue_map.num_slots * 16;
+   assert(brw->gen == 6 ||
+          output_vertex_size_bytes <= GEN7_MAX_GS_OUTPUT_VERTEX_SIZE_BYTES);
+   c.prog_data.output_vertex_size_hwords =
+      ALIGN(output_vertex_size_bytes, 32) / 32;
+
+   /* Compute URB entry size.  The maximum allowed URB entry size is 32k.
+    * That divides up as follows:
+    *
+    *     64 bytes for the control data header (cut indices or StreamID bits)
+    *   4096 bytes for varyings (a varying component is 4 bytes and
+    *              gl_MaxGeometryTotalOutputComponents = 1024)
+    *   4096 bytes overhead for VARYING_SLOT_PSIZ (each varying slot is 16
+    *              bytes/vertex and gl_MaxGeometryOutputVertices is 256)
+    *   4096 bytes overhead for gl_Position (we allocate it a slot in the VUE
+    *              even if it's not used)
+    *   8192 bytes overhead for gl_ClipDistance (we allocate it 2 VUE slots
+    *              whenever clip planes are enabled, even if the shader doesn't
+    *              write to gl_ClipDistance)
+    *   4096 bytes overhead since the VUE size must be a multiple of 32
+    *              bytes (see above)--this causes up to 1 VUE slot to be wasted
+    *   8128 bytes available for varying packing overhead
+    *
+    * Worst-case varying packing overhead is 3/4 of a varying slot per
+    * interpolation type, which works out to 3072 bytes, so this would allow
+    * us to accommodate 2 interpolation types without any danger of running
+    * out of URB space.
+    *
+    * In practice, the risk of running out of URB space is very small, since
+    * the above figures are all worst-case, and most of them scale with the
+    * number of output vertices.  So we'll just calculate the amount of space
+    * we need, and if it's too large, fail to compile.
+    *
+    * The above is for gen7+ where we have a single URB entry that will hold
+    * all the output. In gen6, we will have to allocate URB entries for every
+    * vertex we emit, so our URB entries only need to be large enough to hold
+    * a single vertex. Also, gen6 does not have a control data header.
+    */
+   unsigned output_size_bytes;
+   if (brw->gen >= 7) {
+      output_size_bytes =
+         c.prog_data.output_vertex_size_hwords * 32 * gp->program.VerticesOut;
+      output_size_bytes += 32 * c.prog_data.control_data_header_size_hwords;
+   } else {
+      output_size_bytes = c.prog_data.output_vertex_size_hwords * 32;
+   }
+
+   /* Broadwell stores "Vertex Count" as a full 8 DWord (32 byte) URB output,
+    * which comes before the control header.
+    */
+   if (brw->gen >= 8)
+      output_size_bytes += 32;
+
+   assert(output_size_bytes >= 1);
+   int max_output_size_bytes = GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES;
+   if (brw->gen == 6)
+      max_output_size_bytes = GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES;
+   if (output_size_bytes > max_output_size_bytes)
+      return false;
 
-   pipeline->gs_vec4 = upload_kernel(pipeline, output.program, output.program_size);
+
+   /* URB entry sizes are stored as a multiple of 64 bytes in gen7+ and
+    * a multiple of 128 bytes in gen6.
+    */
+   if (brw->gen >= 7)
+      c.prog_data.base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
+   else
+      c.prog_data.base.urb_entry_size = ALIGN(output_size_bytes, 128) / 128;
+
+   /* FIXME: Need to pull this from nir shader. */
+   c.prog_data.output_topology = _3DPRIM_TRISTRIP;
+
+   /* The GLSL linker will have already matched up GS inputs and the outputs
+    * of prior stages.  The driver does extend VS outputs in some cases, but
+    * only for legacy OpenGL or Gen4-5 hardware, neither of which offer
+    * geometry shader support.  So we can safely ignore that.
+    *
+    * For SSO pipelines, we use a fixed VUE map layout based on variable
+    * locations, so we can rely on rendezvous-by-location making this work.
+    *
+    * However, we need to ignore VARYING_SLOT_PRIMITIVE_ID, as it's not
+    * written by previous stages and shows up via payload magic.
+    */
+   GLbitfield64 inputs_read =
+      gp->program.Base.InputsRead & ~VARYING_BIT_PRIMITIVE_ID;
+   brw_compute_vue_map(brw->intelScreen->devinfo,
+                       &c.input_vue_map, inputs_read,
+                       prog->SeparateShader);
+
+   /* GS inputs are read from the VUE 256 bits (2 vec4's) at a time, so we
+    * need to program a URB read length of ceiling(num_slots / 2).
+    */
+   c.prog_data.base.urb_read_length = (c.input_vue_map.num_slots + 1) / 2;
+
+   void *mem_ctx = ralloc_context(NULL);
+   unsigned program_size;
+   const unsigned *program =
+      brw_gs_emit(brw, prog, &c, mem_ctx, -1, &program_size);
+   if (program == NULL) {
+      ralloc_free(mem_ctx);
+      return false;
+   }
+
+   pipeline->gs_vec4 = upload_kernel(pipeline, program, program_size);
    pipeline->gs_vertex_count = gp->program.VerticesIn;
 
-   ralloc_free(output.mem_ctx);
+   ralloc_free(mem_ctx);
 
    return true;
 }
@@ -610,7 +883,7 @@ brw_codegen_cs_prog(struct brw_context *brw,
    anv_nir_apply_dynamic_offsets(pipeline, cs->Program->nir, &prog_data->base);
 
    program = brw_cs_emit(brw, mem_ctx, key, prog_data,
-                         &cp->program, prog, &program_size);
+                         &cp->program, prog, -1, &program_size);
    if (program == NULL) {
       ralloc_free(mem_ctx);
       return false;
@@ -690,7 +963,7 @@ anv_compiler_create(struct anv_device *device)
    compiler->brw->intelScreen = compiler->screen;
    compiler->screen->devinfo = &device->info;
 
-   brw_process_intel_debug_variable(compiler->screen);
+   brw_process_intel_debug_variable();
 
    compiler->screen->compiler = brw_compiler_create(compiler, &device->info);
 
@@ -1011,7 +1284,7 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline)
    brw->use_rep_send = pipeline->use_repclear;
    brw->no_simd8 = pipeline->use_repclear;
 
-   program = brw->ctx.Driver.NewShaderProgram(name);
+   program = _mesa_new_shader_program(name);
    program->Shaders = (struct gl_shader **)
       calloc(VK_SHADER_STAGE_NUM, sizeof(struct gl_shader *));
    fail_if(program == NULL || program->Shaders == NULL,
@@ -1059,9 +1332,7 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline)
          program->_LinkedShaders[MESA_SHADER_GEOMETRY]->Program;
       struct brw_geometry_program *bgp = brw_geometry_program(gp);
 
-      brw_gs_populate_key(brw, pipeline, bgp, &gs_key);
-
-      success = really_do_gs_prog(brw, program, bgp, &gs_key, pipeline);
+      success = anv_codegen_gs_prog(brw, program, bgp, &gs_key, pipeline);
       fail_if(!success, "do_gs_prog failed\n");
       add_compiled_stage(pipeline, VK_SHADER_STAGE_GEOMETRY,
                          &pipeline->gs_prog_data.base.base);
@@ -1100,7 +1371,7 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline)
                          &pipeline->cs_prog_data.base);
    }
 
-   brw->ctx.Driver.DeleteShaderProgram(&brw->ctx, program);
+   _mesa_delete_shader_program(&brw->ctx, program);
 
    struct anv_device *device = compiler->device;
    while (device->scratch_block_pool.bo.size < pipeline->total_scratch)