radv: dont store disasm string unless keep_shader_info flag set
[mesa.git] / src / amd / vulkan / radv_shader.c
index 3e2966b78564556119fceedc67e68b98aa731d45..ffbef6857b9aede8fdbf423c03f7d0d74870f70e 100644 (file)
@@ -76,7 +76,8 @@ static const struct nir_shader_compiler_options nir_options = {
        .lower_fpow = true,
        .lower_mul_2x32_64 = true,
        .lower_rotate = true,
-       .max_unroll_iterations = 32
+       .max_unroll_iterations = 32,
+       .use_interpolated_input_intrinsics = true,
 };
 
 VkResult radv_CreateShaderModule(
@@ -269,6 +270,7 @@ radv_shader_compile_to_nir(struct radv_device *device,
                                .int64_atomics = true,
                                .multiview = true,
                                .physical_storage_buffer_address = true,
+                               .post_depth_coverage = true,
                                .runtime_descriptor_array = true,
                                .shader_viewport_index_layer = true,
                                .stencil_export = true,
@@ -290,6 +292,7 @@ radv_shader_compile_to_nir(struct radv_device *device,
                        .phys_ssbo_addr_format = nir_address_format_64bit_global,
                        .push_const_addr_format = nir_address_format_logical,
                        .shared_addr_format = nir_address_format_32bit_offset,
+                       .frag_coord_is_sysval = true,
                };
                nir = spirv_to_nir(spirv, module->size / 4,
                                   spec_entries, num_spec_entries,
@@ -334,6 +337,9 @@ radv_shader_compile_to_nir(struct radv_device *device,
                NIR_PASS_V(nir, nir_split_var_copies);
                NIR_PASS_V(nir, nir_split_per_member_structs);
 
+               if (nir->info.stage == MESA_SHADER_FRAGMENT)
+                       NIR_PASS_V(nir, nir_lower_input_attachments, true);
+
                NIR_PASS_V(nir, nir_remove_dead_variables,
                           nir_var_shader_in | nir_var_shader_out | nir_var_system_value);
 
@@ -357,11 +363,11 @@ radv_shader_compile_to_nir(struct radv_device *device,
        nir_lower_vars_to_ssa(nir);
 
        if (nir->info.stage == MESA_SHADER_VERTEX ||
-           nir->info.stage == MESA_SHADER_GEOMETRY) {
+           nir->info.stage == MESA_SHADER_GEOMETRY ||
+           nir->info.stage == MESA_SHADER_FRAGMENT) {
                NIR_PASS_V(nir, nir_lower_io_to_temporaries,
                           nir_shader_get_entrypoint(nir), true, true);
-       } else if (nir->info.stage == MESA_SHADER_TESS_EVAL||
-                  nir->info.stage == MESA_SHADER_FRAGMENT) {
+       } else if (nir->info.stage == MESA_SHADER_TESS_EVAL) {
                NIR_PASS_V(nir, nir_lower_io_to_temporaries,
                           nir_shader_get_entrypoint(nir), true, false);
        }
@@ -401,6 +407,151 @@ radv_shader_compile_to_nir(struct radv_device *device,
        return nir;
 }
 
+static void mark_16bit_fs_input(struct radv_shader_variant_info *shader_info,
+                                const struct glsl_type *type,
+                                int location)
+{
+       if (glsl_type_is_scalar(type) || glsl_type_is_vector(type) || glsl_type_is_matrix(type)) {
+               unsigned attrib_count = glsl_count_attribute_slots(type, false);
+               if (glsl_type_is_16bit(type)) {
+                       shader_info->fs.float16_shaded_mask |= ((1ull << attrib_count) - 1) << location;
+               }
+       } else if (glsl_type_is_array(type)) {
+               unsigned stride = glsl_count_attribute_slots(glsl_get_array_element(type), false);
+               for (unsigned i = 0; i < glsl_get_length(type); ++i) {
+                       mark_16bit_fs_input(shader_info, glsl_get_array_element(type), location + i * stride);
+               }
+       } else {
+               assert(glsl_type_is_struct_or_ifc(type));
+               for (unsigned i = 0; i < glsl_get_length(type); i++) {
+                       mark_16bit_fs_input(shader_info, glsl_get_struct_field(type, i), location);
+                       location += glsl_count_attribute_slots(glsl_get_struct_field(type, i), false);
+               }
+       }
+}
+
+static void
+handle_fs_input_decl(struct radv_shader_variant_info *shader_info,
+                    struct nir_variable *variable)
+{
+       unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
+
+       if (variable->data.compact) {
+               unsigned component_count = variable->data.location_frac +
+                                          glsl_get_length(variable->type);
+               attrib_count = (component_count + 3) / 4;
+       } else {
+               mark_16bit_fs_input(shader_info, variable->type,
+                                   variable->data.driver_location);
+       }
+
+       uint64_t mask = ((1ull << attrib_count) - 1);
+
+       if (variable->data.interpolation == INTERP_MODE_FLAT)
+               shader_info->fs.flat_shaded_mask |= mask << variable->data.driver_location;
+
+       if (variable->data.location >= VARYING_SLOT_VAR0)
+               shader_info->fs.input_mask |= mask << (variable->data.location - VARYING_SLOT_VAR0);
+}
+
+static int
+type_size_vec4(const struct glsl_type *type, bool bindless)
+{
+       return glsl_count_attribute_slots(type, false);
+}
+
+static nir_variable *
+find_layer_in_var(nir_shader *nir)
+{
+       nir_foreach_variable(var, &nir->inputs) {
+               if (var->data.location == VARYING_SLOT_LAYER) {
+                       return var;
+               }
+       }
+
+       nir_variable *var =
+               nir_variable_create(nir, nir_var_shader_in, glsl_int_type(), "layer id");
+       var->data.location = VARYING_SLOT_LAYER;
+       var->data.interpolation = INTERP_MODE_FLAT;
+       return var;
+}
+
+/* We use layered rendering to implement multiview, which means we need to map
+ * view_index to gl_Layer. The attachment lowering also uses needs to know the
+ * layer so that it can sample from the correct layer. The code generates a
+ * load from the layer_id sysval, but since we don't have a way to get at this
+ * information from the fragment shader, we also need to lower this to the
+ * gl_Layer varying.  This pass lowers both to a varying load from the LAYER
+ * slot, before lowering io, so that nir_assign_var_locations() will give the
+ * LAYER varying the correct driver_location.
+ */
+
+static bool
+lower_view_index(nir_shader *nir)
+{
+       bool progress = false;
+       nir_function_impl *entry = nir_shader_get_entrypoint(nir);
+       nir_builder b;
+       nir_builder_init(&b, entry);
+       
+       nir_variable *layer = NULL;
+       nir_foreach_block(block, entry) {
+               nir_foreach_instr_safe(instr, block) {
+                       if (instr->type != nir_instr_type_intrinsic)
+                               continue;
+
+                       nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);
+                       if (load->intrinsic != nir_intrinsic_load_view_index &&
+                           load->intrinsic != nir_intrinsic_load_layer_id)
+                               continue;
+
+                       if (!layer)
+                               layer = find_layer_in_var(nir);
+
+                       b.cursor = nir_before_instr(instr);
+                       nir_ssa_def *def = nir_load_var(&b, layer);
+                       nir_ssa_def_rewrite_uses(&load->dest.ssa,
+                                                nir_src_for_ssa(def));
+
+                       nir_instr_remove(instr);
+                       progress = true;
+               }
+       }
+
+       return progress;
+}
+
+/* Gather information needed to setup the vs<->ps linking registers in
+ * radv_pipeline_generate_ps_inputs().
+ */
+
+static void
+handle_fs_inputs(nir_shader *nir, struct radv_shader_variant_info *shader_info)
+{
+       shader_info->fs.num_interp = nir->num_inputs;
+       
+       nir_foreach_variable(variable, &nir->inputs)
+               handle_fs_input_decl(shader_info, variable);
+}
+
+static void
+lower_fs_io(nir_shader *nir, struct radv_shader_variant_info *shader_info)
+{
+       NIR_PASS_V(nir, lower_view_index);
+       nir_assign_io_var_locations(&nir->inputs, &nir->num_inputs,
+                                   MESA_SHADER_FRAGMENT);
+
+       handle_fs_inputs(nir, shader_info);
+
+       NIR_PASS_V(nir, nir_lower_io, nir_var_shader_in, type_size_vec4, 0);
+
+       /* This pass needs actual constants */
+       nir_opt_constant_folding(nir);
+
+       NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in);
+}
+
+
 void *
 radv_alloc_shader_memory(struct radv_device *device,
                         struct radv_shader_variant *shader)
@@ -539,29 +690,41 @@ static void radv_postprocess_config(const struct radv_physical_device *pdevice,
        config_out->float_mode |= V_00B028_FP_64_DENORMS;
 
        config_out->rsrc2 = S_00B12C_USER_SGPR(info->num_user_sgprs) |
-                           S_00B12C_USER_SGPR_MSB_GFX9(info->num_user_sgprs >> 5) |
-                           S_00B12C_SCRATCH_EN(scratch_enabled) |
-                           S_00B12C_SO_BASE0_EN(!!info->info.so.strides[0]) |
-                           S_00B12C_SO_BASE1_EN(!!info->info.so.strides[1]) |
-                           S_00B12C_SO_BASE2_EN(!!info->info.so.strides[2]) |
-                           S_00B12C_SO_BASE3_EN(!!info->info.so.strides[3]) |
-                           S_00B12C_SO_EN(!!info->info.so.num_outputs);
+                           S_00B12C_SCRATCH_EN(scratch_enabled);
 
        config_out->rsrc1 = S_00B848_VGPRS((num_vgprs - 1) / 4) |
-                           S_00B848_SGPRS((num_sgprs - 1) / 8) |
                            S_00B848_DX10_CLAMP(1) |
                            S_00B848_FLOAT_MODE(config_out->float_mode);
 
+       if (pdevice->rad_info.chip_class >= GFX10) {
+               config_out->rsrc2 |= S_00B22C_USER_SGPR_MSB_GFX10(info->num_user_sgprs >> 5);
+       } else {
+               config_out->rsrc1 |= S_00B228_SGPRS((num_sgprs - 1) / 8);
+               config_out->rsrc2 |= S_00B22C_USER_SGPR_MSB_GFX9(info->num_user_sgprs >> 5)  |
+                                    S_00B12C_SO_BASE0_EN(!!info->info.so.strides[0]) |
+                                    S_00B12C_SO_BASE1_EN(!!info->info.so.strides[1]) |
+                                    S_00B12C_SO_BASE2_EN(!!info->info.so.strides[2]) |
+                                    S_00B12C_SO_BASE3_EN(!!info->info.so.strides[3]) |
+                                    S_00B12C_SO_EN(!!info->info.so.num_outputs);
+       }
+
        switch (stage) {
        case MESA_SHADER_TESS_EVAL:
-               if (info->tes.as_es) {
+               if (info->is_ngg) {
+                       config_out->rsrc1 |= S_00B228_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10);
+                       config_out->rsrc2 |= S_00B22C_OC_LDS_EN(1);
+               } else if (info->tes.as_es) {
                        assert(pdevice->rad_info.chip_class <= GFX8);
                        vgpr_comp_cnt = info->info.uses_prim_id ? 3 : 2;
+
+                       config_out->rsrc2 |= S_00B12C_OC_LDS_EN(1);
                } else {
                        bool enable_prim_id = info->tes.export_prim_id || info->info.uses_prim_id;
                        vgpr_comp_cnt = enable_prim_id ? 3 : 2;
+
+                       config_out->rsrc1 |= S_00B128_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10);
+                       config_out->rsrc2 |= S_00B12C_OC_LDS_EN(1);
                }
-               config_out->rsrc2 |= S_00B12C_OC_LDS_EN(1);
                break;
        case MESA_SHADER_TESS_CTRL:
                if (pdevice->rad_info.chip_class >= GFX9) {
@@ -569,13 +732,21 @@ static void radv_postprocess_config(const struct radv_physical_device *pdevice,
                         * VGPR0-3: (VertexID, RelAutoindex, InstanceID / StepRate0, InstanceID).
                         * StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded.
                         */
-                       vgpr_comp_cnt = info->info.vs.needs_instance_id ? 2 : 1;
+                       if (pdevice->rad_info.chip_class >= GFX10) {
+                               vgpr_comp_cnt = info->info.vs.needs_instance_id ? 3 : 1;
+                       } else {
+                               vgpr_comp_cnt = info->info.vs.needs_instance_id ? 2 : 1;
+                       }
                } else {
                        config_out->rsrc2 |= S_00B12C_OC_LDS_EN(1);
                }
+               config_out->rsrc1 |= S_00B428_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10) |
+                                    S_00B848_WGP_MODE(pdevice->rad_info.chip_class >= GFX10);
                break;
        case MESA_SHADER_VERTEX:
-               if (info->vs.as_ls) {
+               if (info->is_ngg) {
+                       config_out->rsrc1 |= S_00B228_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10);
+               } else if (info->vs.as_ls) {
                        assert(pdevice->rad_info.chip_class <= GFX8);
                        /* We need at least 2 components for LS.
                         * VGPR0-3: (VertexID, RelAutoindex, InstanceID / StepRate0, InstanceID).
@@ -598,12 +769,20 @@ static void radv_postprocess_config(const struct radv_physical_device *pdevice,
                        } else {
                                vgpr_comp_cnt = 0;
                        }
+
+                       config_out->rsrc1 |= S_00B128_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10);
                }
                break;
        case MESA_SHADER_FRAGMENT:
+               config_out->rsrc1 |= S_00B028_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10);
+               break;
        case MESA_SHADER_GEOMETRY:
+               config_out->rsrc1 |= S_00B228_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10) |
+                                    S_00B848_WGP_MODE(pdevice->rad_info.chip_class >= GFX10);
                break;
        case MESA_SHADER_COMPUTE:
+               config_out->rsrc1 |= S_00B848_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10) |
+                                    S_00B848_WGP_MODE(pdevice->rad_info.chip_class >= GFX10);
                config_out->rsrc2 |=
                        S_00B84C_TGID_X_EN(info->info.cs.uses_block_id[0]) |
                        S_00B84C_TGID_Y_EN(info->info.cs.uses_block_id[1]) |
@@ -618,8 +797,40 @@ static void radv_postprocess_config(const struct radv_physical_device *pdevice,
                break;
        }
 
-       if (pdevice->rad_info.chip_class >= GFX9 &&
-           stage == MESA_SHADER_GEOMETRY) {
+       if (pdevice->rad_info.chip_class >= GFX10 && info->is_ngg &&
+           (stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL || stage == MESA_SHADER_GEOMETRY)) {
+               unsigned gs_vgpr_comp_cnt, es_vgpr_comp_cnt;
+               gl_shader_stage es_stage = stage;
+               if (stage == MESA_SHADER_GEOMETRY)
+                       es_stage = info->gs.es_type;
+
+               /* VGPR5-8: (VertexID, UserVGPR0, UserVGPR1, UserVGPR2 / InstanceID) */
+               if (es_stage == MESA_SHADER_VERTEX) {
+                       es_vgpr_comp_cnt = info->info.vs.needs_instance_id ? 3 : 0;
+               } else if (es_stage == MESA_SHADER_TESS_EVAL) {
+                       bool enable_prim_id = info->tes.export_prim_id || info->info.uses_prim_id;
+                       es_vgpr_comp_cnt = enable_prim_id ? 3 : 2;
+               }
+
+               bool tes_triangles = stage == MESA_SHADER_TESS_EVAL &&
+                       info->tes.primitive_mode >= 4; /* GL_TRIANGLES */
+               if (info->info.uses_invocation_id || stage == MESA_SHADER_VERTEX) {
+                       gs_vgpr_comp_cnt = 3; /* VGPR3 contains InvocationID. */
+               } else if (info->info.uses_prim_id) {
+                       gs_vgpr_comp_cnt = 2; /* VGPR2 contains PrimitiveID. */
+               } else if (info->gs.vertices_in >= 3 || tes_triangles) {
+                       gs_vgpr_comp_cnt = 1; /* VGPR1 contains offsets 2, 3 */
+               } else {
+                       gs_vgpr_comp_cnt = 0; /* VGPR0 contains offsets 0, 1 */
+               }
+
+               config_out->rsrc1 |= S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt) |
+                                    S_00B228_WGP_MODE(1);
+               config_out->rsrc2 |= S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) |
+                                    S_00B22C_LDS_SIZE(config_in->lds_size) |
+                                    S_00B22C_OC_LDS_EN(es_stage == MESA_SHADER_TESS_EVAL);
+       } else if (pdevice->rad_info.chip_class >= GFX9 &&
+                  stage == MESA_SHADER_GEOMETRY) {
                unsigned es_type = info->gs.es_type;
                unsigned gs_vgpr_comp_cnt, es_vgpr_comp_cnt;
 
@@ -712,16 +923,40 @@ radv_shader_variant_create(struct radv_device *device,
                unsigned num_lds_symbols = 0;
                const char *elf_data = (const char *)((struct radv_shader_binary_rtld *)binary)->data;
                size_t elf_size = ((struct radv_shader_binary_rtld *)binary)->elf_size;
+               unsigned esgs_ring_size = 0;
 
                if (device->physical_device->rad_info.chip_class >= GFX9 &&
                    binary->stage == MESA_SHADER_GEOMETRY && !binary->is_gs_copy_shader) {
+                       /* TODO: Do not hardcode this value */
+                       esgs_ring_size = 32 * 1024;
+               }
+
+               if (binary->variant_info.is_ngg) {
+                       /* GS stores Primitive IDs into LDS at the address
+                        * corresponding to the ES thread of the provoking
+                        * vertex. All ES threads load and export PrimitiveID
+                        * for their thread.
+                        */
+                       if (binary->stage == MESA_SHADER_VERTEX &&
+                           binary->variant_info.vs.export_prim_id) {
+                               /* TODO: Do not harcode this value */
+                               esgs_ring_size = 256 /* max_out_verts */ * 4;
+                       }
+               }
+
+               if (esgs_ring_size) {
                        /* We add this symbol even on LLVM <= 8 to ensure that
                         * shader->config.lds_size is set correctly below.
                         */
                        struct ac_rtld_symbol *sym = &lds_symbols[num_lds_symbols++];
                        sym->name = "esgs_ring";
-                       sym->size = 32 * 1024;
+                       sym->size = esgs_ring_size;
                        sym->align = 64 * 1024;
+
+                       /* Make sure to have LDS space for NGG scratch. */
+                       /* TODO: Compute this correctly somehow? */
+                       if (binary->variant_info.is_ngg)
+                               sym->size -= 32;
                }
                struct ac_rtld_open_info open_info = {
                        .info = &device->physical_device->rad_info,
@@ -776,18 +1011,20 @@ radv_shader_variant_create(struct radv_device *device,
                        return NULL;
                }
 
-               const char *disasm_data;
-               size_t disasm_size;
-               if (!ac_rtld_get_section_by_name(&rtld_binary, ".AMDGPU.disasm", &disasm_data, &disasm_size)) {
-                       radv_shader_variant_destroy(device, variant);
-                       ac_rtld_close(&rtld_binary);
-                       return NULL;
-               }
+               if (device->keep_shader_info) {
+                       const char *disasm_data;
+                       size_t disasm_size;
+                       if (!ac_rtld_get_section_by_name(&rtld_binary, ".AMDGPU.disasm", &disasm_data, &disasm_size)) {
+                               radv_shader_variant_destroy(device, variant);
+                               ac_rtld_close(&rtld_binary);
+                               return NULL;
+                       }
 
-               variant->llvm_ir_string = bin->llvm_ir_size ? strdup((const char*)(bin->data + bin->elf_size)) : NULL;
-               variant->disasm_string = malloc(disasm_size + 1);
-               memcpy(variant->disasm_string, disasm_data, disasm_size);
-               variant->disasm_string[disasm_size] = 0;
+                       variant->llvm_ir_string = bin->llvm_ir_size ? strdup((const char*)(bin->data + bin->elf_size)) : NULL;
+                       variant->disasm_string = malloc(disasm_size + 1);
+                       memcpy(variant->disasm_string, disasm_data, disasm_size);
+                       variant->disasm_string[disasm_size] = 0;
+               }
 
                ac_rtld_close(&rtld_binary);
        } else {
@@ -822,6 +1059,9 @@ shader_variant_compile(struct radv_device *device,
        struct radv_shader_variant_info variant_info = {0};
        bool thread_compiler;
 
+       if (shaders[0]->info.stage == MESA_SHADER_FRAGMENT)
+               lower_fs_io(shaders[0], &variant_info);
+
        options->family = chip_family;
        options->chip_class = device->physical_device->rad_info.chip_class;
        options->dump_shader = radv_can_dump_shader(device, module, gs_copy_shader);
@@ -939,15 +1179,34 @@ radv_shader_variant_destroy(struct radv_device *device,
 }
 
 const char *
-radv_get_shader_name(struct radv_shader_variant *var, gl_shader_stage stage)
+radv_get_shader_name(struct radv_shader_variant_info *info,
+                    gl_shader_stage stage)
 {
        switch (stage) {
-       case MESA_SHADER_VERTEX: return var->info.vs.as_ls ? "Vertex Shader as LS" : var->info.vs.as_es ? "Vertex Shader as ES" : "Vertex Shader as VS";
-       case MESA_SHADER_GEOMETRY: return "Geometry Shader";
-       case MESA_SHADER_FRAGMENT: return "Pixel Shader";
-       case MESA_SHADER_COMPUTE: return "Compute Shader";
-       case MESA_SHADER_TESS_CTRL: return "Tessellation Control Shader";
-       case MESA_SHADER_TESS_EVAL: return var->info.tes.as_es ? "Tessellation Evaluation Shader as ES" : "Tessellation Evaluation Shader as VS";
+       case MESA_SHADER_VERTEX:
+               if (info->vs.as_ls)
+                       return "Vertex Shader as LS";
+               else if (info->vs.as_es)
+                       return "Vertex Shader as ES";
+               else if (info->is_ngg)
+                       return "Vertex Shader as ESGS";
+               else
+                       return "Vertex Shader as VS";
+       case MESA_SHADER_TESS_CTRL:
+               return "Tessellation Control Shader";
+       case MESA_SHADER_TESS_EVAL:
+               if (info->tes.as_es)
+                       return "Tessellation Evaluation Shader as ES";
+               else if (info->is_ngg)
+                       return "Tessellation Evaluation Shader as ESGS";
+               else
+                       return "Tessellation Evaluation Shader as VS";
+       case MESA_SHADER_GEOMETRY:
+               return "Geometry Shader";
+       case MESA_SHADER_FRAGMENT:
+               return "Pixel Shader";
+       case MESA_SHADER_COMPUTE:
+               return "Compute Shader";
        default:
                return "Unknown shader";
        };
@@ -1031,7 +1290,7 @@ radv_shader_dump_stats(struct radv_device *device,
 
        generate_shader_stats(device, variant, stage, buf);
 
-       fprintf(file, "\n%s:\n", radv_get_shader_name(variant, stage));
+       fprintf(file, "\n%s:\n", radv_get_shader_name(&variant->info, stage));
        fprintf(file, "%s", buf->buf);
 
        _mesa_string_buffer_destroy(buf);
@@ -1104,7 +1363,7 @@ radv_GetShaderInfoAMD(VkDevice _device,
        case VK_SHADER_INFO_TYPE_DISASSEMBLY_AMD:
                buf = _mesa_string_buffer_create(NULL, 1024);
 
-               _mesa_string_buffer_printf(buf, "%s:\n", radv_get_shader_name(variant, stage));
+               _mesa_string_buffer_printf(buf, "%s:\n", radv_get_shader_name(&variant->info, stage));
                _mesa_string_buffer_printf(buf, "%s\n\n", variant->llvm_ir_string);
                _mesa_string_buffer_printf(buf, "%s\n\n", variant->disasm_string);
                generate_shader_stats(device, variant, stage, buf);