Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Extension of the most-used-rules and most-used-components subcommands of the profile_tool.py script to specify a list of products to be considered #11733

Merged
merged 11 commits into from
Mar 20, 2024
54 changes: 53 additions & 1 deletion build-scripts/profile_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,13 @@
import argparse

try:
from utils.profile_tool import command_stats, command_sub, command_most_used_rules
from utils.controleval import get_available_products, load_product_yaml
from utils.profile_tool import (
command_stats,
command_sub,
command_most_used_rules,
command_most_used_components,
)
except ImportError:
print("The ssg module could not be found.")
print(
Expand Down Expand Up @@ -274,6 +280,50 @@ def parse_most_used_rules_subcommand(subparsers):
choices=["plain", "json", "csv"],
help="Which format to use for output.",
)
parser_most_used_rules.add_argument(
"--products",
help="List of products to be considered. If not specified will by used all products.",
nargs="+",
choices=get_available_products(),
default=get_available_products(),
)


def parse_most_used_components(subparsers):
parser_most_used_components = subparsers.add_parser(
"most-used-components",
description=(
"Generates list of all components used by the rules in existing profiles."
" In various formats."
),
help="Generates list of all components used by the rules in existing profiles.",
)
parser_most_used_components.add_argument(
"--format",
default="plain",
choices=["plain", "json", "csv"],
help="Which format to use for output.",
)
parser_most_used_components.add_argument(
"--products",
help=(
"List of products to be considered. "
"If not specified will by used all products with components_root."
),
nargs="+",
choices=get_available_products_with_components_root(),
default=get_available_products_with_components_root(),
)


def get_available_products_with_components_root():
out = set()
for product in get_available_products():
product_yaml = load_product_yaml(product)
components_root = product_yaml.get("components_root")
if components_root is not None:
out.add(product)
return out


def parse_args():
Expand All @@ -283,6 +333,7 @@ def parse_args():
parse_stats_subcommand(subparsers)
parse_sub_subcommand(subparsers)
parse_most_used_rules_subcommand(subparsers)
parse_most_used_components(subparsers)

args = parser.parse_args()

Expand Down Expand Up @@ -319,6 +370,7 @@ def parse_args():
"stats": command_stats,
"sub": command_sub,
"most-used-rules": command_most_used_rules,
"most-used-components": command_most_used_components,
}


Expand Down
22 changes: 22 additions & 0 deletions docs/manual/developer/05_tools_and_utilities.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,28 @@ Or you can also run this command to get a list of the most used rules in the ent
$ ./build-scripts/profile_tool.py most-used-rules
```

Optionally, you can use this command to limit the statistics for a specific product:

```bash
$ ./build-scripts/profile_tool.py most-used-rules --products rhel9
```

The result will be a list of rules with the number of uses in the profiles.
The list can be generated as plain text, JSON or CVS.
Via the `--format FORMAT` parameter.

The tool can also generate a list of the most used component based on rules contained in profiles from the entire project:

```bash
$ ./build-scripts/profile_tool.py most-used-components
```

Optionally, you can use this command to limit the statistics for a specific product:

```bash
$ ./build-scripts/profile_tool.py most-used-components --products rhel9
```

The result will be a list of rules with the number of uses in the profiles.
The list can be generated as plain text, JSON or CVS.
Via the `--format FORMAT` parameter.
Expand Down
2 changes: 2 additions & 0 deletions utils/profile_tool/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from .sub import command_sub
from .stats import command_stats
from .most_used_rules import command_most_used_rules
from .most_used_components import command_most_used_components
from .common import generate_output
15 changes: 15 additions & 0 deletions utils/profile_tool/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import json


def generate_output(dict_, format, csv_header):
f_string = "{}: {}"

if format == "json":
print(json.dumps(dict_, indent=4))
return
elif format == "csv":
print(csv_header)
f_string = "{},{}"

for rule_id, rule_count in dict_.items():
print(f_string.format(rule_id, rule_count))
63 changes: 63 additions & 0 deletions utils/profile_tool/most_used_components.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import sys
import os
from collections import defaultdict

import ssg.components

from .most_used_rules import _sorted_dict_by_num_value
from .common import generate_output

PYTHON_2 = sys.version_info[0] < 3

if not PYTHON_2:
from .most_used_rules import _get_profiles_for_product
from ..controleval import (
load_controls_manager,
load_product_yaml,
)


def _count_components(components, rules_list, components_out):
for rule in rules_list:
component = get_component_name_by_rule_id(rule, components)
components_out[component] += 1


def get_component_name_by_rule_id(rule_id, components):
for component in components.values():
if rule_id in component.rules:
return component.name
return "without_component"


def load_components(product):
product_yaml = load_product_yaml(product)
product_dir = product_yaml.get("product_dir")
components_root = product_yaml.get("components_root")
if components_root is None:
return None
components_dir = os.path.abspath(os.path.join(product_dir, components_root))
return ssg.components.load(components_dir)


def _process_all_products_from_controls(components_out, products):
if PYTHON_2:
raise Exception("This feature is not supported for python2.")

for product in products:
components = load_components(product)
if components is None:
continue
controls_manager = load_controls_manager("./controls/", product)
for profile in _get_profiles_for_product(controls_manager, product):
_count_components(components, profile.rules, components_out)


def command_most_used_components(args):
components = defaultdict(int)

_process_all_products_from_controls(components, args.products)

sorted_components = _sorted_dict_by_num_value(components)
csv_header = "component_name,count_of_rules"
generate_output(sorted_components, args.format, csv_header)
43 changes: 14 additions & 29 deletions utils/profile_tool/most_used_rules.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,24 @@
import sys
import json
from collections import defaultdict

from ssg.build_profile import XCCDFBenchmark

from .common import generate_output


PYTHON_2 = sys.version_info[0] < 3

if not PYTHON_2:
from .profile import get_profile
from ..controleval import (
load_controls_manager,
get_available_products,
get_product_profiles_files,
)


def _count_rules_per_rules_list(rules_list, rules):
for rule in rules_list:
if rule in rules:
rules[rule] += 1
else:
rules[rule] = 1
rules[rule] += 1


def _count_rules_per_benchmark(benchmark, rules):
Expand All @@ -38,43 +36,30 @@ def _get_profiles_for_product(ctrls_mgr, product):
return profiles


def _process_all_products_from_controls(rules):
def _process_all_products_from_controls(rules, products):
if PYTHON_2:
raise Exception("This feature is not supported for python2.")

for product in get_available_products():
for product in products:
controls_manager = load_controls_manager("./controls/", product)
for profile in _get_profiles_for_product(controls_manager, product):
_count_rules_per_rules_list(profile.rules, rules)


def _sorted_rules(rules):
sorted_rules = {
k: v
for k, v in sorted(rules.items(), key=lambda x: x[1], reverse=True)
}
return sorted_rules
def _sorted_dict_by_num_value(dict_):
sorted_ = {k: v for k, v in sorted(dict_.items(), key=lambda x: x[1], reverse=True)}
return sorted_


def command_most_used_rules(args):
rules = {}
rules = defaultdict(int)

if not args.BENCHMARKS:
_process_all_products_from_controls(rules)
_process_all_products_from_controls(rules, args.products)
else:
for benchmark in args.BENCHMARKS:
_count_rules_per_benchmark(benchmark, rules)

sorted_rules = _sorted_rules(rules)

f_string = "{}: {}"

if args.format == "json":
print(json.dumps(sorted_rules, indent=4))
return
elif args.format == "csv":
print("rule_id,count_of_profiles")
f_string = "{},{}"

for rule_id, rule_count in sorted_rules.items():
print(f_string.format(rule_id, rule_count))
sorted_rules = _sorted_dict_by_num_value(rules)
csv_header = "rule_id,count_of_profiles"
generate_output(sorted_rules, args.format, csv_header)
Loading