ComplianceAsCode · Mab879 · Mar 20, 2024 · Mar 19, 2024 · Mar 19, 2024 · Mar 19, 2024
diff --git a/build-scripts/profile_tool.py b/build-scripts/profile_tool.py
@@ -5,7 +5,13 @@
 import argparse
 
 try:
-    from utils.profile_tool import command_stats, command_sub, command_most_used_rules
+    from utils.controleval import get_available_products, load_product_yaml
+    from utils.profile_tool import (
+        command_stats,
+        command_sub,
+        command_most_used_rules,
+        command_most_used_components,
+    )
 except ImportError:
     print("The ssg module could not be found.")
     print(
@@ -274,6 +280,50 @@ def parse_most_used_rules_subcommand(subparsers):
         choices=["plain", "json", "csv"],
         help="Which format to use for output.",
     )
+    parser_most_used_rules.add_argument(
+        "--products",
+        help="List of products to be considered. If not specified will by used all products.",
+        nargs="+",
+        choices=get_available_products(),
+        default=get_available_products(),
+    )
+
+
+def parse_most_used_components(subparsers):
+    parser_most_used_components = subparsers.add_parser(
+        "most-used-components",
+        description=(
+            "Generates list of all components used by the rules in existing profiles."
+            " In various formats."
+        ),
+        help="Generates list of all components used by the rules in existing profiles.",
+    )
+    parser_most_used_components.add_argument(
+        "--format",
+        default="plain",
+        choices=["plain", "json", "csv"],
+        help="Which format to use for output.",
+    )
+    parser_most_used_components.add_argument(
+        "--products",
+        help=(
+            "List of products to be considered. "
+            "If not specified will by used all products with components_root."
+        ),
+        nargs="+",
+        choices=get_available_products_with_components_root(),
+        default=get_available_products_with_components_root(),
+    )
+
+
+def get_available_products_with_components_root():
+    out = set()
+    for product in get_available_products():
+        product_yaml = load_product_yaml(product)
+        components_root = product_yaml.get("components_root")
+        if components_root is not None:
+            out.add(product)
+    return out
 
 
 def parse_args():
@@ -283,6 +333,7 @@ def parse_args():
     parse_stats_subcommand(subparsers)
     parse_sub_subcommand(subparsers)
     parse_most_used_rules_subcommand(subparsers)
+    parse_most_used_components(subparsers)
 
     args = parser.parse_args()
 
@@ -319,6 +370,7 @@ def parse_args():
     "stats": command_stats,
     "sub": command_sub,
     "most-used-rules": command_most_used_rules,
+    "most-used-components": command_most_used_components,
 }
 
 

diff --git a/docs/manual/developer/05_tools_and_utilities.md b/docs/manual/developer/05_tools_and_utilities.md
@@ -57,6 +57,28 @@ Or you can also run this command to get a list of the most used rules in the ent
     $ ./build-scripts/profile_tool.py most-used-rules
 ```
 
+Optionally, you can use this command to limit the statistics for a specific product:
+
+```bash
+    $ ./build-scripts/profile_tool.py most-used-rules --products rhel9
+```
+
+The result will be a list of rules with the number of uses in the profiles.
+The list can be generated as plain text, JSON or CVS.
+Via the `--format FORMAT` parameter.
+
+The tool can also generate a list of the most used component based on rules contained in profiles from the entire project:
+
+```bash
+    $ ./build-scripts/profile_tool.py most-used-components
+```
+
+Optionally, you can use this command to limit the statistics for a specific product:
+
+```bash
+    $ ./build-scripts/profile_tool.py most-used-components --products rhel9
+```
+
 The result will be a list of rules with the number of uses in the profiles.
 The list can be generated as plain text, JSON or CVS.
 Via the `--format FORMAT` parameter.

diff --git a/utils/profile_tool/__init__.py b/utils/profile_tool/__init__.py
@@ -1,3 +1,5 @@
 from .sub import command_sub
 from .stats import command_stats
 from .most_used_rules import command_most_used_rules
+from .most_used_components import command_most_used_components
+from .common import generate_output
diff --git a/utils/profile_tool/common.py b/utils/profile_tool/common.py
@@ -0,0 +1,15 @@
+import json
+
+
+def generate_output(dict_, format, csv_header):
+    f_string = "{}: {}"
+
+    if format == "json":
+        print(json.dumps(dict_, indent=4))
+        return
+    elif format == "csv":
+        print(csv_header)
+        f_string = "{},{}"
+
+    for rule_id, rule_count in dict_.items():
+        print(f_string.format(rule_id, rule_count))
diff --git a/utils/profile_tool/most_used_components.py b/utils/profile_tool/most_used_components.py
@@ -0,0 +1,63 @@
+import sys
+import os
+from collections import defaultdict
+
+import ssg.components
+
+from .most_used_rules import _sorted_dict_by_num_value
+from .common import generate_output
+
+PYTHON_2 = sys.version_info[0] < 3
+
+if not PYTHON_2:
+    from .most_used_rules import _get_profiles_for_product
+    from ..controleval import (
+        load_controls_manager,
+        load_product_yaml,
+    )
+
+
+def _count_components(components, rules_list, components_out):
+    for rule in rules_list:
+        component = get_component_name_by_rule_id(rule, components)
+        components_out[component] += 1
+
+
+def get_component_name_by_rule_id(rule_id, components):
+    for component in components.values():
+        if rule_id in component.rules:
+            return component.name
+    return "without_component"
+
+
+def load_components(product):
+    product_yaml = load_product_yaml(product)
+    product_dir = product_yaml.get("product_dir")
+    components_root = product_yaml.get("components_root")
+    if components_root is None:
+        return None
+    components_dir = os.path.abspath(os.path.join(product_dir, components_root))
+    return ssg.components.load(components_dir)
+
+
+def _process_all_products_from_controls(components_out, products):
+    if PYTHON_2:
+        raise Exception("This feature is not supported for python2.")
+
+    for product in products:
+        components = load_components(product)
+        if components is None:
+            continue
+        controls_manager = load_controls_manager("./controls/", product)
+        for profile in _get_profiles_for_product(controls_manager, product):
+            _count_components(components, profile.rules, components_out)
+
+
+def command_most_used_components(args):
+    components = defaultdict(int)
+
+    _process_all_products_from_controls(components, args.products)
+
+    sorted_components = _sorted_dict_by_num_value(components)
+    csv_header = "component_name,count_of_rules"
+    generate_output(sorted_components, args.format, csv_header)
diff --git a/utils/profile_tool/most_used_rules.py b/utils/profile_tool/most_used_rules.py
@@ -1,26 +1,24 @@
 import sys
-import json
+from collections import defaultdict
 
 from ssg.build_profile import XCCDFBenchmark
 
+from .common import generate_output
+
 
 PYTHON_2 = sys.version_info[0] < 3
 
 if not PYTHON_2:
     from .profile import get_profile
     from ..controleval import (
         load_controls_manager,
-        get_available_products,
         get_product_profiles_files,
     )
 
 
 def _count_rules_per_rules_list(rules_list, rules):
     for rule in rules_list:
-        if rule in rules:
-            rules[rule] += 1
-        else:
-            rules[rule] = 1
+        rules[rule] += 1
 
 
 def _count_rules_per_benchmark(benchmark, rules):
@@ -38,43 +36,30 @@ def _get_profiles_for_product(ctrls_mgr, product):
     return profiles
 
 
-def _process_all_products_from_controls(rules):
+def _process_all_products_from_controls(rules, products):
     if PYTHON_2:
         raise Exception("This feature is not supported for python2.")
 
-    for product in get_available_products():
+    for product in products:
         controls_manager = load_controls_manager("./controls/", product)
         for profile in _get_profiles_for_product(controls_manager, product):
             _count_rules_per_rules_list(profile.rules, rules)
 
 
-def _sorted_rules(rules):
-    sorted_rules = {
-        k: v
-        for k, v in sorted(rules.items(), key=lambda x: x[1], reverse=True)
-    }
-    return sorted_rules
+def _sorted_dict_by_num_value(dict_):
+    sorted_ = {k: v for k, v in sorted(dict_.items(), key=lambda x: x[1], reverse=True)}
+    return sorted_
 
 
 def command_most_used_rules(args):
-    rules = {}
+    rules = defaultdict(int)
 
     if not args.BENCHMARKS:
-        _process_all_products_from_controls(rules)
+        _process_all_products_from_controls(rules, args.products)
     else:
         for benchmark in args.BENCHMARKS:
             _count_rules_per_benchmark(benchmark, rules)
 
-    sorted_rules = _sorted_rules(rules)
-
-    f_string = "{}: {}"
-
-    if args.format == "json":
-        print(json.dumps(sorted_rules, indent=4))
-        return
-    elif args.format == "csv":
-        print("rule_id,count_of_profiles")
-        f_string = "{},{}"
-
-    for rule_id, rule_count in sorted_rules.items():
-        print(f_string.format(rule_id, rule_count))
+    sorted_rules = _sorted_dict_by_num_value(rules)
+    csv_header = "rule_id,count_of_profiles"
+    generate_output(sorted_rules, args.format, csv_header)