From e76069c05d18bab215a503f8d86079035f8d77c0 Mon Sep 17 00:00:00 2001 From: Jan Rodak Date: Tue, 19 Mar 2024 11:07:43 +0100 Subject: [PATCH 01/11] Create command line entry point --- build-scripts/profile_tool.py | 26 +++++++++++++++++++++- utils/profile_tool/__init__.py | 1 + utils/profile_tool/most_used_components.py | 3 +++ 3 files changed, 29 insertions(+), 1 deletion(-) create mode 100644 utils/profile_tool/most_used_components.py diff --git a/build-scripts/profile_tool.py b/build-scripts/profile_tool.py index 4825cdb5ded..503df2dffba 100755 --- a/build-scripts/profile_tool.py +++ b/build-scripts/profile_tool.py @@ -5,7 +5,12 @@ import argparse try: - from utils.profile_tool import command_stats, command_sub, command_most_used_rules + from utils.profile_tool import ( + command_stats, + command_sub, + command_most_used_rules, + command_most_used_components, + ) except ImportError: print("The ssg module could not be found.") print( @@ -276,6 +281,23 @@ def parse_most_used_rules_subcommand(subparsers): ) +def parse_most_used_components(subparsers): + parser_most_used_components = subparsers.add_parser( + "most-used-components", + description=( + "Generates list of all components used by the rules in existing profiles." + " In various formats." + ), + help="Generates list of all components used by the rules in existing profiles.", + ) + parser_most_used_components.add_argument( + "--format", + default="plain", + choices=["plain", "json", "csv"], + help="Which format to use for output.", + ) + + def parse_args(): parser = argparse.ArgumentParser(description="Profile statistics and utilities tool") subparsers = parser.add_subparsers(title="subcommands", dest="subcommand", required=True) @@ -283,6 +305,7 @@ def parse_args(): parse_stats_subcommand(subparsers) parse_sub_subcommand(subparsers) parse_most_used_rules_subcommand(subparsers) + parse_most_used_components(subparsers) args = parser.parse_args() @@ -319,6 +342,7 @@ def parse_args(): "stats": command_stats, "sub": command_sub, "most-used-rules": command_most_used_rules, + "most-used-components": command_most_used_components, } diff --git a/utils/profile_tool/__init__.py b/utils/profile_tool/__init__.py index 2b2f56c25ee..1aa3293ac4d 100644 --- a/utils/profile_tool/__init__.py +++ b/utils/profile_tool/__init__.py @@ -1,3 +1,4 @@ from .sub import command_sub from .stats import command_stats from .most_used_rules import command_most_used_rules +from .most_used_components import command_most_used_components diff --git a/utils/profile_tool/most_used_components.py b/utils/profile_tool/most_used_components.py new file mode 100644 index 00000000000..8a8ca6e5a6c --- /dev/null +++ b/utils/profile_tool/most_used_components.py @@ -0,0 +1,3 @@ + +def command_most_used_components(args): + pass From f2691291357be8d9d3491903ea26329c32a51989 Mon Sep 17 00:00:00 2001 From: Jan Rodak Date: Tue, 19 Mar 2024 11:49:37 +0100 Subject: [PATCH 02/11] Rename sort function --- utils/profile_tool/most_used_rules.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/utils/profile_tool/most_used_rules.py b/utils/profile_tool/most_used_rules.py index c8072ed3bb1..fd4617184b5 100644 --- a/utils/profile_tool/most_used_rules.py +++ b/utils/profile_tool/most_used_rules.py @@ -48,12 +48,9 @@ def _process_all_products_from_controls(rules): _count_rules_per_rules_list(profile.rules, rules) -def _sorted_rules(rules): - sorted_rules = { - k: v - for k, v in sorted(rules.items(), key=lambda x: x[1], reverse=True) - } - return sorted_rules +def _sorted_dict_by_num_value(dict_): + sorted_ = {k: v for k, v in sorted(dict_.items(), key=lambda x: x[1], reverse=True)} + return sorted_ def command_most_used_rules(args): @@ -65,7 +62,7 @@ def command_most_used_rules(args): for benchmark in args.BENCHMARKS: _count_rules_per_benchmark(benchmark, rules) - sorted_rules = _sorted_rules(rules) + sorted_rules = _sorted_dict_by_num_value(rules) f_string = "{}: {}" From 0b3652a76f3a48630bc9aa50af17309f98f80e26 Mon Sep 17 00:00:00 2001 From: Jan Rodak Date: Tue, 19 Mar 2024 11:55:36 +0100 Subject: [PATCH 03/11] Implement most used components command --- utils/profile_tool/most_used_components.py | 69 +++++++++++++++++++++- 1 file changed, 68 insertions(+), 1 deletion(-) diff --git a/utils/profile_tool/most_used_components.py b/utils/profile_tool/most_used_components.py index 8a8ca6e5a6c..7d65dd22e45 100644 --- a/utils/profile_tool/most_used_components.py +++ b/utils/profile_tool/most_used_components.py @@ -1,3 +1,70 @@ +import json +import sys +import os +from ssg.components import Component +from .most_used_rules import _sorted_dict_by_num_value + +PYTHON_2 = sys.version_info[0] < 3 + +if not PYTHON_2: + from .most_used_rules import _get_profiles_for_product + from ..controleval import ( + load_controls_manager, + get_available_products, + ) + + +def _count_components(components, rules_list, components_out): + for rule in rules_list: + component = get_component_name_by_rule_id(rule, components) + if component in components_out: + components_out[component] += 1 + else: + components_out[component] = 1 + + +def get_component_name_by_rule_id(rule_id, components): + for component in components.values(): + if rule_id in component.rules: + return component.name + return "without_component" + + +def load_components(components_dir): + components = {} + for component_file in os.listdir(os.path.abspath(components_dir)): + component_path = os.path.join(components_dir, component_file) + component = Component(component_path) + components[component.name] = component + return components + + +def _process_all_products_from_controls(components_out): + components = load_components("./components/") + if PYTHON_2: + raise Exception("This feature is not supported for python2.") + + for product in get_available_products(): + controls_manager = load_controls_manager("./controls/", product) + for profile in _get_profiles_for_product(controls_manager, product): + _count_components(components, profile.rules, components_out) + def command_most_used_components(args): - pass + components = {} + + _process_all_products_from_controls(components) + + sorted_components = _sorted_dict_by_num_value(components) + + f_string = "{}: {}" + + if args.format == "json": + print(json.dumps(sorted_components, indent=4)) + return + elif args.format == "csv": + print("component_name,count_of_rules") + f_string = "{},{}" + + for rule_id, rule_count in sorted_components.items(): + print(f_string.format(rule_id, rule_count)) From 271693b08b779b05da235b26aa90a607b844f290 Mon Sep 17 00:00:00 2001 From: Jan Rodak Date: Tue, 19 Mar 2024 15:02:07 +0100 Subject: [PATCH 04/11] Add documentation --- docs/manual/developer/05_tools_and_utilities.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/manual/developer/05_tools_and_utilities.md b/docs/manual/developer/05_tools_and_utilities.md index 8ec39032574..c245e74f524 100644 --- a/docs/manual/developer/05_tools_and_utilities.md +++ b/docs/manual/developer/05_tools_and_utilities.md @@ -61,6 +61,16 @@ The result will be a list of rules with the number of uses in the profiles. The list can be generated as plain text, JSON or CVS. Via the `--format FORMAT` parameter. +The tool can also generate a list of the most used component based on rules contained in profiles from the entire project: + +```bash + $ ./build-scripts/profile_tool.py most-used-components +``` + +The result will be a list of rules with the number of uses in the profiles. +The list can be generated as plain text, JSON or CVS. +Via the `--format FORMAT` parameter. + ## Generating Controls from DISA's XCCDF Files If you want a control file for product from DISA's XCCDF files you can run the following command: From e7095b87d3fb3261250c604df33b5bb9a9b077c7 Mon Sep 17 00:00:00 2001 From: Jan Rodak Date: Wed, 20 Mar 2024 12:17:57 +0100 Subject: [PATCH 05/11] Process products that support components --- utils/profile_tool/most_used_components.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/utils/profile_tool/most_used_components.py b/utils/profile_tool/most_used_components.py index 7d65dd22e45..b5f6fe4b224 100644 --- a/utils/profile_tool/most_used_components.py +++ b/utils/profile_tool/most_used_components.py @@ -1,7 +1,7 @@ import json import sys import os -from ssg.components import Component +import ssg.components from .most_used_rules import _sorted_dict_by_num_value PYTHON_2 = sys.version_info[0] < 3 @@ -11,6 +11,7 @@ from ..controleval import ( load_controls_manager, get_available_products, + load_product_yaml, ) @@ -30,21 +31,24 @@ def get_component_name_by_rule_id(rule_id, components): return "without_component" -def load_components(components_dir): - components = {} - for component_file in os.listdir(os.path.abspath(components_dir)): - component_path = os.path.join(components_dir, component_file) - component = Component(component_path) - components[component.name] = component - return components +def load_components(product): + product_yaml = load_product_yaml(product) + product_dir = product_yaml.get("product_dir") + components_root = product_yaml.get("components_root") + if components_root is None: + return None + components_dir = os.path.abspath(os.path.join(product_dir, components_root)) + return ssg.components.load(components_dir) def _process_all_products_from_controls(components_out): - components = load_components("./components/") if PYTHON_2: raise Exception("This feature is not supported for python2.") for product in get_available_products(): + components = load_components(product) + if components is None: + continue controls_manager = load_controls_manager("./controls/", product) for profile in _get_profiles_for_product(controls_manager, product): _count_components(components, profile.rules, components_out) From 6f2e505f4c3ac1da7dd4195857369ab8a2e19f54 Mon Sep 17 00:00:00 2001 From: Jan Rodak Date: Wed, 20 Mar 2024 12:24:17 +0100 Subject: [PATCH 06/11] Reduce duplication of output generation code --- utils/profile_tool/__init__.py | 1 + utils/profile_tool/common.py | 15 +++++++++++++++ utils/profile_tool/most_used_components.py | 16 +++------------- utils/profile_tool/most_used_rules.py | 17 ++++------------- 4 files changed, 23 insertions(+), 26 deletions(-) create mode 100644 utils/profile_tool/common.py diff --git a/utils/profile_tool/__init__.py b/utils/profile_tool/__init__.py index 1aa3293ac4d..4f4deebda82 100644 --- a/utils/profile_tool/__init__.py +++ b/utils/profile_tool/__init__.py @@ -2,3 +2,4 @@ from .stats import command_stats from .most_used_rules import command_most_used_rules from .most_used_components import command_most_used_components +from .common import generate_output diff --git a/utils/profile_tool/common.py b/utils/profile_tool/common.py new file mode 100644 index 00000000000..ae5729451f0 --- /dev/null +++ b/utils/profile_tool/common.py @@ -0,0 +1,15 @@ +import json + + +def generate_output(dict_, format, csv_header): + f_string = "{}: {}" + + if format == "json": + print(json.dumps(dict_, indent=4)) + return + elif format == "csv": + print(csv_header) + f_string = "{},{}" + + for rule_id, rule_count in dict_.items(): + print(f_string.format(rule_id, rule_count)) diff --git a/utils/profile_tool/most_used_components.py b/utils/profile_tool/most_used_components.py index b5f6fe4b224..731fb929ee6 100644 --- a/utils/profile_tool/most_used_components.py +++ b/utils/profile_tool/most_used_components.py @@ -1,8 +1,8 @@ -import json import sys import os import ssg.components from .most_used_rules import _sorted_dict_by_num_value +from .common import generate_output PYTHON_2 = sys.version_info[0] < 3 @@ -60,15 +60,5 @@ def command_most_used_components(args): _process_all_products_from_controls(components) sorted_components = _sorted_dict_by_num_value(components) - - f_string = "{}: {}" - - if args.format == "json": - print(json.dumps(sorted_components, indent=4)) - return - elif args.format == "csv": - print("component_name,count_of_rules") - f_string = "{},{}" - - for rule_id, rule_count in sorted_components.items(): - print(f_string.format(rule_id, rule_count)) + csv_header = "component_name,count_of_rules" + generate_output(sorted_components, args.format, csv_header) diff --git a/utils/profile_tool/most_used_rules.py b/utils/profile_tool/most_used_rules.py index fd4617184b5..b52fe67d7d4 100644 --- a/utils/profile_tool/most_used_rules.py +++ b/utils/profile_tool/most_used_rules.py @@ -1,8 +1,9 @@ import sys -import json from ssg.build_profile import XCCDFBenchmark +from .common import generate_output + PYTHON_2 = sys.version_info[0] < 3 @@ -63,15 +64,5 @@ def command_most_used_rules(args): _count_rules_per_benchmark(benchmark, rules) sorted_rules = _sorted_dict_by_num_value(rules) - - f_string = "{}: {}" - - if args.format == "json": - print(json.dumps(sorted_rules, indent=4)) - return - elif args.format == "csv": - print("rule_id,count_of_profiles") - f_string = "{},{}" - - for rule_id, rule_count in sorted_rules.items(): - print(f_string.format(rule_id, rule_count)) + csv_header = "rule_id,count_of_profiles" + generate_output(sorted_rules, args.format, csv_header) From 585a0e7d9c51d4f75333f136cbddbb53e2f95247 Mon Sep 17 00:00:00 2001 From: Jan Rodak Date: Wed, 20 Mar 2024 12:30:42 +0100 Subject: [PATCH 07/11] Use defaultdict for results --- utils/profile_tool/most_used_components.py | 10 +++++----- utils/profile_tool/most_used_rules.py | 8 +++----- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/utils/profile_tool/most_used_components.py b/utils/profile_tool/most_used_components.py index 731fb929ee6..69c34d914a4 100644 --- a/utils/profile_tool/most_used_components.py +++ b/utils/profile_tool/most_used_components.py @@ -1,6 +1,9 @@ import sys import os +from collections import defaultdict + import ssg.components + from .most_used_rules import _sorted_dict_by_num_value from .common import generate_output @@ -18,10 +21,7 @@ def _count_components(components, rules_list, components_out): for rule in rules_list: component = get_component_name_by_rule_id(rule, components) - if component in components_out: - components_out[component] += 1 - else: - components_out[component] = 1 + components_out[component] += 1 def get_component_name_by_rule_id(rule_id, components): @@ -55,7 +55,7 @@ def _process_all_products_from_controls(components_out): def command_most_used_components(args): - components = {} + components = defaultdict(int) _process_all_products_from_controls(components) diff --git a/utils/profile_tool/most_used_rules.py b/utils/profile_tool/most_used_rules.py index b52fe67d7d4..ca159a10f49 100644 --- a/utils/profile_tool/most_used_rules.py +++ b/utils/profile_tool/most_used_rules.py @@ -1,4 +1,5 @@ import sys +from collections import defaultdict from ssg.build_profile import XCCDFBenchmark @@ -18,10 +19,7 @@ def _count_rules_per_rules_list(rules_list, rules): for rule in rules_list: - if rule in rules: - rules[rule] += 1 - else: - rules[rule] = 1 + rules[rule] += 1 def _count_rules_per_benchmark(benchmark, rules): @@ -55,7 +53,7 @@ def _sorted_dict_by_num_value(dict_): def command_most_used_rules(args): - rules = {} + rules = defaultdict(int) if not args.BENCHMARKS: _process_all_products_from_controls(rules) From 657997327026bca9f01481a9194da2bc2d36d2a6 Mon Sep 17 00:00:00 2001 From: Jan Rodak Date: Tue, 19 Mar 2024 14:54:26 +0100 Subject: [PATCH 08/11] Add parameter products to most used rules sub command --- build-scripts/profile_tool.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/build-scripts/profile_tool.py b/build-scripts/profile_tool.py index 503df2dffba..c64221d7e22 100755 --- a/build-scripts/profile_tool.py +++ b/build-scripts/profile_tool.py @@ -5,6 +5,7 @@ import argparse try: + from utils.controleval import get_available_products from utils.profile_tool import ( command_stats, command_sub, @@ -279,6 +280,13 @@ def parse_most_used_rules_subcommand(subparsers): choices=["plain", "json", "csv"], help="Which format to use for output.", ) + parser_most_used_rules.add_argument( + "--products", + help="List of products to be considered. If not specified will by used all products.", + nargs="+", + choices=get_available_products(), + default=get_available_products(), + ) def parse_most_used_components(subparsers): From 00b1a4b34bd7b0da03e16a5d11f80d737aa37f04 Mon Sep 17 00:00:00 2001 From: Jan Rodak Date: Wed, 20 Mar 2024 15:54:22 +0100 Subject: [PATCH 09/11] Add parameter products to most used components sub command --- build-scripts/profile_tool.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/build-scripts/profile_tool.py b/build-scripts/profile_tool.py index c64221d7e22..043d2a3e413 100755 --- a/build-scripts/profile_tool.py +++ b/build-scripts/profile_tool.py @@ -5,7 +5,7 @@ import argparse try: - from utils.controleval import get_available_products + from utils.controleval import get_available_products, load_product_yaml from utils.profile_tool import ( command_stats, command_sub, @@ -304,6 +304,26 @@ def parse_most_used_components(subparsers): choices=["plain", "json", "csv"], help="Which format to use for output.", ) + parser_most_used_components.add_argument( + "--products", + help=( + "List of products to be considered. " + "If not specified will by used all products with components_root." + ), + nargs="+", + choices=get_available_products_with_components_root(), + default=get_available_products_with_components_root(), + ) + + +def get_available_products_with_components_root(): + out = set() + for product in get_available_products(): + product_yaml = load_product_yaml(product) + components_root = product_yaml.get("components_root") + if components_root is not None: + out.add(product) + return out def parse_args(): From 430f6f580727685f343d7937c900edfdf4c7ee32 Mon Sep 17 00:00:00 2001 From: Jan Rodak Date: Tue, 19 Mar 2024 14:54:57 +0100 Subject: [PATCH 10/11] Implement product filter --- utils/profile_tool/most_used_components.py | 7 +++---- utils/profile_tool/most_used_rules.py | 7 +++---- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/utils/profile_tool/most_used_components.py b/utils/profile_tool/most_used_components.py index 69c34d914a4..929ff91f492 100644 --- a/utils/profile_tool/most_used_components.py +++ b/utils/profile_tool/most_used_components.py @@ -13,7 +13,6 @@ from .most_used_rules import _get_profiles_for_product from ..controleval import ( load_controls_manager, - get_available_products, load_product_yaml, ) @@ -41,11 +40,11 @@ def load_components(product): return ssg.components.load(components_dir) -def _process_all_products_from_controls(components_out): +def _process_all_products_from_controls(components_out, products): if PYTHON_2: raise Exception("This feature is not supported for python2.") - for product in get_available_products(): + for product in products: components = load_components(product) if components is None: continue @@ -57,7 +56,7 @@ def _process_all_products_from_controls(components_out): def command_most_used_components(args): components = defaultdict(int) - _process_all_products_from_controls(components) + _process_all_products_from_controls(components, args.products) sorted_components = _sorted_dict_by_num_value(components) csv_header = "component_name,count_of_rules" diff --git a/utils/profile_tool/most_used_rules.py b/utils/profile_tool/most_used_rules.py index ca159a10f49..85cd5bd5185 100644 --- a/utils/profile_tool/most_used_rules.py +++ b/utils/profile_tool/most_used_rules.py @@ -12,7 +12,6 @@ from .profile import get_profile from ..controleval import ( load_controls_manager, - get_available_products, get_product_profiles_files, ) @@ -37,11 +36,11 @@ def _get_profiles_for_product(ctrls_mgr, product): return profiles -def _process_all_products_from_controls(rules): +def _process_all_products_from_controls(rules, products): if PYTHON_2: raise Exception("This feature is not supported for python2.") - for product in get_available_products(): + for product in products: controls_manager = load_controls_manager("./controls/", product) for profile in _get_profiles_for_product(controls_manager, product): _count_rules_per_rules_list(profile.rules, rules) @@ -56,7 +55,7 @@ def command_most_used_rules(args): rules = defaultdict(int) if not args.BENCHMARKS: - _process_all_products_from_controls(rules) + _process_all_products_from_controls(rules, args.products) else: for benchmark in args.BENCHMARKS: _count_rules_per_benchmark(benchmark, rules) From 352cfb9338137e77ff493a02ae9173601cd8dbd1 Mon Sep 17 00:00:00 2001 From: Jan Rodak Date: Tue, 19 Mar 2024 15:09:18 +0100 Subject: [PATCH 11/11] Add documentation --- docs/manual/developer/05_tools_and_utilities.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/docs/manual/developer/05_tools_and_utilities.md b/docs/manual/developer/05_tools_and_utilities.md index c245e74f524..a2ea6289567 100644 --- a/docs/manual/developer/05_tools_and_utilities.md +++ b/docs/manual/developer/05_tools_and_utilities.md @@ -57,6 +57,12 @@ Or you can also run this command to get a list of the most used rules in the ent $ ./build-scripts/profile_tool.py most-used-rules ``` +Optionally, you can use this command to limit the statistics for a specific product: + +```bash + $ ./build-scripts/profile_tool.py most-used-rules --products rhel9 +``` + The result will be a list of rules with the number of uses in the profiles. The list can be generated as plain text, JSON or CVS. Via the `--format FORMAT` parameter. @@ -67,6 +73,12 @@ The tool can also generate a list of the most used component based on rules cont $ ./build-scripts/profile_tool.py most-used-components ``` +Optionally, you can use this command to limit the statistics for a specific product: + +```bash + $ ./build-scripts/profile_tool.py most-used-components --products rhel9 +``` + The result will be a list of rules with the number of uses in the profiles. The list can be generated as plain text, JSON or CVS. Via the `--format FORMAT` parameter.