vc4: Try compiling our FSes in multithreaded mode on new kernels.
authorEric Anholt <eric@anholt.net>
Fri, 11 Nov 2016 01:47:34 +0000 (17:47 -0800)
committerEric Anholt <eric@anholt.net>
Thu, 17 Nov 2016 03:45:01 +0000 (19:45 -0800)
Multithreaded fragment shaders let us hide texturing latency by a
hyperthreading-style switch to another fragment shader.  This gets us up
to 20% framerate improvements on glmark2 tests.

src/gallium/drivers/vc4/vc4_context.h
src/gallium/drivers/vc4/vc4_program.c
src/gallium/drivers/vc4/vc4_screen.c
src/gallium/drivers/vc4/vc4_screen.h
src/gallium/drivers/vc4/vc4_simulator.c

index e59b1d282871379f297c815a00146349c3ef7793..2005ae0e4eaf6f0b95f467a7b513f6f386826841 100644 (file)
@@ -42,6 +42,9 @@
 #ifndef DRM_VC4_PARAM_SUPPORTS_ETC1
 #define DRM_VC4_PARAM_SUPPORTS_ETC1            4
 #endif
+#ifndef DRM_VC4_PARAM_SUPPORTS_THREADED_FS
+#define DRM_VC4_PARAM_SUPPORTS_THREADED_FS     5
+#endif
 
 #ifdef USE_VC4_SIMULATOR
 #define using_vc4_simulator true
index 3c30f8c477fa6c10e50f8ae1f29d84e06aebd094..1191f1766fe051a1769011bb77f833063731a889 100644 (file)
@@ -2158,7 +2158,7 @@ count_nir_instrs(nir_shader *nir)
 
 static struct vc4_compile *
 vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage,
-                       struct vc4_key *key)
+               struct vc4_key *key, bool fs_threaded)
 {
         struct vc4_compile *c = qir_compile_init();
 
@@ -2168,6 +2168,7 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage,
         c->program_id = key->shader_state->program_id;
         c->variant_id =
                 p_atomic_inc_return(&key->shader_state->compiled_variant_count);
+        c->fs_threaded = fs_threaded;
 
         c->key = key;
         switch (stage) {
@@ -2496,12 +2497,16 @@ vc4_get_compiled_shader(struct vc4_context *vc4, enum qstage stage,
 {
         struct hash_table *ht;
         uint32_t key_size;
+        bool try_threading;
+
         if (stage == QSTAGE_FRAG) {
                 ht = vc4->fs_cache;
                 key_size = sizeof(struct vc4_fs_key);
+                try_threading = vc4->screen->has_threaded_fs;
         } else {
                 ht = vc4->vs_cache;
                 key_size = sizeof(struct vc4_vs_key);
+                try_threading = false;
         }
 
         struct vc4_compiled_shader *shader;
@@ -2509,7 +2514,13 @@ vc4_get_compiled_shader(struct vc4_context *vc4, enum qstage stage,
         if (entry)
                 return entry->data;
 
-        struct vc4_compile *c = vc4_shader_ntq(vc4, stage, key);
+        struct vc4_compile *c = vc4_shader_ntq(vc4, stage, key, try_threading);
+        /* If the FS failed to compile threaded, fall back to single threaded. */
+        if (try_threading && c->failed) {
+                qir_compile_destroy(c);
+                c = vc4_shader_ntq(vc4, stage, key, false);
+        }
+
         shader = rzalloc(NULL, struct vc4_compiled_shader);
 
         shader->program_id = vc4->next_compiled_program_id++;
index 9f852f0326d0ea81a518d543d92a3812b10e427e..97510b621d45fa2eab59fda48375d17e245293df 100644 (file)
@@ -614,6 +614,8 @@ vc4_screen_create(int fd)
                 vc4_has_feature(screen, DRM_VC4_PARAM_SUPPORTS_BRANCHES);
         screen->has_etc1 =
                 vc4_has_feature(screen, DRM_VC4_PARAM_SUPPORTS_ETC1);
+        screen->has_threaded_fs =
+                vc4_has_feature(screen, DRM_VC4_PARAM_SUPPORTS_THREADED_FS);
 
         if (!vc4_get_chip_info(screen))
                 goto fail;
index 572d62d6ac1278d13bd3bc519eb47f98af8b02d8..1f91ad37f8861a4f6f0665d682d6ab9934905f9b 100644 (file)
@@ -90,6 +90,7 @@ struct vc4_screen {
         uint32_t bo_count;
         bool has_control_flow;
         bool has_etc1;
+        bool has_threaded_fs;
 
         struct vc4_simulator_file *sim_file;
 };
index 815898329b32dd18763bba5162cbb6921d631f8e..9565c49efb7da249d2a8fd6413b48175448fb8d7 100644 (file)
@@ -613,6 +613,7 @@ vc4_simulator_get_param_ioctl(int fd, struct drm_vc4_get_param *args)
         switch (args->param) {
         case DRM_VC4_PARAM_SUPPORTS_BRANCHES:
         case DRM_VC4_PARAM_SUPPORTS_ETC1:
+        case DRM_VC4_PARAM_SUPPORTS_THREADED_FS:
                 args->value = true;
                 return 0;