From ef63a330b4f156e7e05f7ebe41f33a882cc69d01 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Fri, 23 Mar 2018 21:54:51 +0100 Subject: [PATCH] support/scripts/pkg-stats-new: rewrite in Python This commit adds a new version of the pkg-stats script, rewritten in Python. It is for now implemented in a separate file called, pkg-stats-new, in order to make the diff easily readable. A future commit will rename it to pkg-stats. Compared to the existing shell-based pkg-stats script, the functionality and output is basically the same. The main difference is that the output no longer goes to stdout, but to the file passed as argument using the -o option. This allows stdout to be used for more debugging related information. The way the script works is that a first function get_pkglist() returns a list of Package objects. Then, the function package_init_make_info() uses 'make printvars' to gather information about all packages, stored as class variables in the Package class. Then, we iterate over all packages, and use various methods of the Package class to retrieve all details about the package: infrastructure, presence of hash file, presence of license information, etc. calculate_stats() then calculates global statistics (how packages have license information, how packages have a hash file, etc.). Finally, dump_html() produces the HTML output, using a number of sub-functions. One improvement over the shell-based version is that we can use regexps to exclude some .mk files. Thanks to this, we can exclude all linux-ext-*.mk files, avoiding incorrect matches. Signed-off-by: Thomas Petazzoni Reviewed-by: Ricardo Martincoski --- support/scripts/pkg-stats-new | 459 ++++++++++++++++++++++++++++++++++ 1 file changed, 459 insertions(+) create mode 100755 support/scripts/pkg-stats-new diff --git a/support/scripts/pkg-stats-new b/support/scripts/pkg-stats-new new file mode 100755 index 0000000000..955d3ce990 --- /dev/null +++ b/support/scripts/pkg-stats-new @@ -0,0 +1,459 @@ +#!/usr/bin/env python + +# Copyright (C) 2009 by Thomas Petazzoni +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +import argparse +import datetime +import fnmatch +import os +from collections import defaultdict +import re +import subprocess + +INFRA_RE = re.compile("\$\(eval \$\(([a-z-]*)-package\)\)") + + +class Package: + all_licenses = list() + all_license_files = list() + + def __init__(self, name, path): + self.name = name + self.path = path + self.infras = None + self.has_license = False + self.has_license_files = False + self.has_hash = False + self.patch_count = 0 + self.warnings = 0 + + def pkgvar(self): + return self.name.upper().replace("-", "_") + + def set_infra(self): + """ + Fills in the .infras field + """ + self.infras = list() + with open(self.path, 'r') as f: + lines = f.readlines() + for l in lines: + match = INFRA_RE.match(l) + if not match: + continue + infra = match.group(1) + if infra.startswith("host-"): + self.infras.append(("host", infra[5:])) + else: + self.infras.append(("target", infra)) + + def set_license(self): + """ + Fills in the .has_license and .has_license_files fields + """ + var = self.pkgvar() + if var in self.all_licenses: + self.has_license = True + if var in self.all_license_files: + self.has_license_files = True + + def set_hash_info(self): + """ + Fills in the .has_hash field + """ + hashpath = self.path.replace(".mk", ".hash") + self.has_hash = os.path.exists(hashpath) + + def set_patch_count(self): + """ + Fills in the .patch_count field + """ + self.patch_count = 0 + pkgdir = os.path.dirname(self.path) + for subdir, _, _ in os.walk(pkgdir): + self.patch_count += len(fnmatch.filter(os.listdir(subdir), '*.patch')) + + def set_check_package_warnings(self): + """ + Fills in the .warnings field + """ + cmd = ["./utils/check-package"] + pkgdir = os.path.dirname(self.path) + for root, dirs, files in os.walk(pkgdir): + for f in files: + if f.endswith(".mk") or f.endswith(".hash") or f == "Config.in" or f == "Config.in.host": + cmd.append(os.path.join(root, f)) + o = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()[1] + lines = o.splitlines() + for line in lines: + m = re.match("^([0-9]*) warnings generated", line) + if m: + self.warnings = int(m.group(1)) + return + + def __eq__(self, other): + return self.path == other.path + + def __lt__(self, other): + return self.path < other.path + + def __str__(self): + return "%s (path='%s', license='%s', license_files='%s', hash='%s', patches=%d)" % \ + (self.name, self.path, self.has_license, self.has_license_files, self.has_hash, self.patch_count) + + +def get_pkglist(): + """ + Builds the list of Buildroot packages, returning a list of Package + objects. Only the .name and .path fields of the Package object are + initialized. + """ + WALK_USEFUL_SUBDIRS = ["boot", "linux", "package", "toolchain"] + WALK_EXCLUDES = ["boot/common.mk", + "linux/linux-ext-.*.mk", + "package/freescale-imx/freescale-imx.mk", + "package/gcc/gcc.mk", + "package/gstreamer/gstreamer.mk", + "package/gstreamer1/gstreamer1.mk", + "package/gtk2-themes/gtk2-themes.mk", + "package/matchbox/matchbox.mk", + "package/opengl/opengl.mk", + "package/qt5/qt5.mk", + "package/x11r7/x11r7.mk", + "package/doc-asciidoc.mk", + "package/pkg-.*.mk", + "package/nvidia-tegra23/nvidia-tegra23.mk", + "toolchain/toolchain-external/pkg-toolchain-external.mk", + "toolchain/toolchain-external/toolchain-external.mk", + "toolchain/toolchain.mk", + "toolchain/helpers.mk", + "toolchain/toolchain-wrapper.mk"] + packages = list() + for root, dirs, files in os.walk("."): + rootdir = root.split("/") + if len(rootdir) < 2: + continue + if rootdir[1] not in WALK_USEFUL_SUBDIRS: + continue + for f in files: + if not f.endswith(".mk"): + continue + # Strip ending ".mk" + pkgname = f[:-3] + pkgpath = os.path.join(root, f) + skip = False + for exclude in WALK_EXCLUDES: + # pkgpath[2:] strips the initial './' + if re.match(exclude, pkgpath[2:]): + skip = True + continue + if skip: + continue + p = Package(pkgname, pkgpath) + packages.append(p) + return packages + + +def package_init_make_info(): + # Licenses + o = subprocess.check_output(["make", "BR2_HAVE_DOT_CONFIG=y", + "-s", "printvars", "VARS=%_LICENSE"]) + for l in o.splitlines(): + # Get variable name and value + pkgvar, value = l.split("=") + + # If present, strip HOST_ from variable name + if pkgvar.startswith("HOST_"): + pkgvar = pkgvar[5:] + + # Strip _LICENSE + pkgvar = pkgvar[:-8] + + # If value is "unknown", no license details available + if value == "unknown": + continue + Package.all_licenses.append(pkgvar) + + # License files + o = subprocess.check_output(["make", "BR2_HAVE_DOT_CONFIG=y", + "-s", "printvars", "VARS=%_LICENSE_FILES"]) + for l in o.splitlines(): + # Get variable name and value + pkgvar, value = l.split("=") + + # If present, strip HOST_ from variable name + if pkgvar.startswith("HOST_"): + pkgvar = pkgvar[5:] + + if pkgvar.endswith("_MANIFEST_LICENSE_FILES"): + continue + + # Strip _LICENSE_FILES + pkgvar = pkgvar[:-14] + + Package.all_license_files.append(pkgvar) + + +def calculate_stats(packages): + stats = defaultdict(int) + for pkg in packages: + # If packages have multiple infra, take the first one. For the + # vast majority of packages, the target and host infra are the + # same. There are very few packages that use a different infra + # for the host and target variants. + if len(pkg.infras) > 0: + infra = pkg.infras[0][1] + stats["infra-%s" % infra] += 1 + else: + stats["infra-unknown"] += 1 + if pkg.has_license: + stats["license"] += 1 + else: + stats["no-license"] += 1 + if pkg.has_license_files: + stats["license-files"] += 1 + else: + stats["no-license-files"] += 1 + if pkg.has_hash: + stats["hash"] += 1 + else: + stats["no-hash"] += 1 + stats["patches"] += pkg.patch_count + return stats + + +html_header = """ + + + +Statistics of Buildroot packages + + +Results
+ +

+""" + + +html_footer = """ + + + +""" + + +def infra_str(infra_list): + if not infra_list: + return "Unknown" + elif len(infra_list) == 1: + return "%s
%s" % (infra_list[0][1], infra_list[0][0]) + elif infra_list[0][1] == infra_list[1][1]: + return "%s
%s + %s" % \ + (infra_list[0][1], infra_list[0][0], infra_list[1][0]) + else: + return "%s (%s)
%s (%s)" % \ + (infra_list[0][1], infra_list[0][0], + infra_list[1][1], infra_list[1][0]) + + +def boolean_str(b): + if b: + return "Yes" + else: + return "No" + + +def dump_html_pkg(f, pkg): + f.write(" \n") + f.write(" %s\n" % pkg.path[2:]) + + # Patch count + td_class = ["centered"] + if pkg.patch_count == 0: + td_class.append("nopatches") + elif pkg.patch_count < 5: + td_class.append("somepatches") + else: + td_class.append("lotsofpatches") + f.write(" %s\n" % + (" ".join(td_class), str(pkg.patch_count))) + + # Infrastructure + infra = infra_str(pkg.infras) + td_class = ["centered"] + if infra == "Unknown": + td_class.append("wrong") + else: + td_class.append("correct") + f.write(" %s\n" % + (" ".join(td_class), infra_str(pkg.infras))) + + # License + td_class = ["centered"] + if pkg.has_license: + td_class.append("correct") + else: + td_class.append("wrong") + f.write(" %s\n" % + (" ".join(td_class), boolean_str(pkg.has_license))) + + # License files + td_class = ["centered"] + if pkg.has_license_files: + td_class.append("correct") + else: + td_class.append("wrong") + f.write(" %s\n" % + (" ".join(td_class), boolean_str(pkg.has_license_files))) + + # Hash + td_class = ["centered"] + if pkg.has_hash: + td_class.append("correct") + else: + td_class.append("wrong") + f.write(" %s\n" % + (" ".join(td_class), boolean_str(pkg.has_hash))) + + # Warnings + td_class = ["centered"] + if pkg.warnings == 0: + td_class.append("correct") + else: + td_class.append("wrong") + f.write(" %d\n" % + (" ".join(td_class), pkg.warnings)) + + f.write(" \n") + + +def dump_html_all_pkgs(f, packages): + f.write(""" + + + + + + + + + + +""") + for pkg in sorted(packages): + dump_html_pkg(f, pkg) + f.write("
PackagePatch countInfrastructureLicenseLicense filesHash fileWarnings
") + + +def dump_html_stats(f, stats): + f.write("\n") + f.write("\n") + infras = [infra[6:] for infra in stats.keys() if infra.startswith("infra-")] + for infra in infras: + f.write(" \n" % + (infra, stats["infra-%s" % infra])) + f.write(" \n" % + stats["license"]) + f.write(" \n" % + stats["no-license"]) + f.write(" \n" % + stats["license-files"]) + f.write(" \n" % + stats["no-license-files"]) + f.write(" \n" % + stats["hash"]) + f.write(" \n" % + stats["no-hash"]) + f.write(" \n" % + stats["patches"]) + f.write("
Packages using the %s infrastructure%s
Packages having license information%s
Packages not having license information%s
Packages having license files information%s
Packages not having license files information%s
Packages having a hash file%s
Packages not having a hash file%s
Total number of patches%s
\n") + + +def dump_gen_info(f): + # Updated on Mon Feb 19 08:12:08 CET 2018, Git commit aa77030b8f5e41f1c53eb1c1ad664b8c814ba032 + o = subprocess.check_output(["git", "log", "master", "-n", "1", "--pretty=format:%H"]) + git_commit = o.splitlines()[0] + f.write("

Updated on %s, git commit %s

\n" % + (str(datetime.datetime.utcnow()), git_commit)) + + +def dump_html(packages, stats, output): + with open(output, 'w') as f: + f.write(html_header) + dump_html_all_pkgs(f, packages) + dump_html_stats(f, stats) + dump_gen_info(f) + f.write(html_footer) + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument('-o', dest='output', action='store', required=True, + help='HTML output file') + return parser.parse_args() + + +def __main__(): + args = parse_args() + print "Build package list ..." + packages = get_pkglist() + print "Getting package make info ..." + package_init_make_info() + print "Getting package details ..." + for pkg in packages: + pkg.set_infra() + pkg.set_license() + pkg.set_hash_info() + pkg.set_patch_count() + pkg.set_check_package_warnings() + print "Calculate stats" + stats = calculate_stats(packages) + print "Write HTML" + dump_html(packages, stats, args.output) + + +__main__() -- 2.30.2