From 7f27ad55974d0bdac4c94a4523a4d42cc75334d5 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 10 Nov 2016 17:47:34 -0800 Subject: [PATCH] vc4: Try compiling our FSes in multithreaded mode on new kernels. Multithreaded fragment shaders let us hide texturing latency by a hyperthreading-style switch to another fragment shader. This gets us up to 20% framerate improvements on glmark2 tests. --- src/gallium/drivers/vc4/vc4_context.h | 3 +++ src/gallium/drivers/vc4/vc4_program.c | 15 +++++++++++++-- src/gallium/drivers/vc4/vc4_screen.c | 2 ++ src/gallium/drivers/vc4/vc4_screen.h | 1 + src/gallium/drivers/vc4/vc4_simulator.c | 1 + 5 files changed, 20 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h index e59b1d28287..2005ae0e4ea 100644 --- a/src/gallium/drivers/vc4/vc4_context.h +++ b/src/gallium/drivers/vc4/vc4_context.h @@ -42,6 +42,9 @@ #ifndef DRM_VC4_PARAM_SUPPORTS_ETC1 #define DRM_VC4_PARAM_SUPPORTS_ETC1 4 #endif +#ifndef DRM_VC4_PARAM_SUPPORTS_THREADED_FS +#define DRM_VC4_PARAM_SUPPORTS_THREADED_FS 5 +#endif #ifdef USE_VC4_SIMULATOR #define using_vc4_simulator true diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 3c30f8c477f..1191f1766fe 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -2158,7 +2158,7 @@ count_nir_instrs(nir_shader *nir) static struct vc4_compile * vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage, - struct vc4_key *key) + struct vc4_key *key, bool fs_threaded) { struct vc4_compile *c = qir_compile_init(); @@ -2168,6 +2168,7 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage, c->program_id = key->shader_state->program_id; c->variant_id = p_atomic_inc_return(&key->shader_state->compiled_variant_count); + c->fs_threaded = fs_threaded; c->key = key; switch (stage) { @@ -2496,12 +2497,16 @@ vc4_get_compiled_shader(struct vc4_context *vc4, enum qstage stage, { struct hash_table *ht; uint32_t key_size; + bool try_threading; + if (stage == QSTAGE_FRAG) { ht = vc4->fs_cache; key_size = sizeof(struct vc4_fs_key); + try_threading = vc4->screen->has_threaded_fs; } else { ht = vc4->vs_cache; key_size = sizeof(struct vc4_vs_key); + try_threading = false; } struct vc4_compiled_shader *shader; @@ -2509,7 +2514,13 @@ vc4_get_compiled_shader(struct vc4_context *vc4, enum qstage stage, if (entry) return entry->data; - struct vc4_compile *c = vc4_shader_ntq(vc4, stage, key); + struct vc4_compile *c = vc4_shader_ntq(vc4, stage, key, try_threading); + /* If the FS failed to compile threaded, fall back to single threaded. */ + if (try_threading && c->failed) { + qir_compile_destroy(c); + c = vc4_shader_ntq(vc4, stage, key, false); + } + shader = rzalloc(NULL, struct vc4_compiled_shader); shader->program_id = vc4->next_compiled_program_id++; diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c index 9f852f0326d..97510b621d4 100644 --- a/src/gallium/drivers/vc4/vc4_screen.c +++ b/src/gallium/drivers/vc4/vc4_screen.c @@ -614,6 +614,8 @@ vc4_screen_create(int fd) vc4_has_feature(screen, DRM_VC4_PARAM_SUPPORTS_BRANCHES); screen->has_etc1 = vc4_has_feature(screen, DRM_VC4_PARAM_SUPPORTS_ETC1); + screen->has_threaded_fs = + vc4_has_feature(screen, DRM_VC4_PARAM_SUPPORTS_THREADED_FS); if (!vc4_get_chip_info(screen)) goto fail; diff --git a/src/gallium/drivers/vc4/vc4_screen.h b/src/gallium/drivers/vc4/vc4_screen.h index 572d62d6ac1..1f91ad37f88 100644 --- a/src/gallium/drivers/vc4/vc4_screen.h +++ b/src/gallium/drivers/vc4/vc4_screen.h @@ -90,6 +90,7 @@ struct vc4_screen { uint32_t bo_count; bool has_control_flow; bool has_etc1; + bool has_threaded_fs; struct vc4_simulator_file *sim_file; }; diff --git a/src/gallium/drivers/vc4/vc4_simulator.c b/src/gallium/drivers/vc4/vc4_simulator.c index 815898329b3..9565c49efb7 100644 --- a/src/gallium/drivers/vc4/vc4_simulator.c +++ b/src/gallium/drivers/vc4/vc4_simulator.c @@ -613,6 +613,7 @@ vc4_simulator_get_param_ioctl(int fd, struct drm_vc4_get_param *args) switch (args->param) { case DRM_VC4_PARAM_SUPPORTS_BRANCHES: case DRM_VC4_PARAM_SUPPORTS_ETC1: + case DRM_VC4_PARAM_SUPPORTS_THREADED_FS: args->value = true; return 0; -- 2.30.2