-
-
Notifications
You must be signed in to change notification settings - Fork 552
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Remove Python code from JSON rules #1745
* New rules: /scancode-categories/src/json_rules/json_rules_simple_01.json * Seems to work well on test codebase bionic-master-libc-bionic.tar.gz-extract (largely C++). * Next steps include expanding rules using more-diverse test codebases. * No formal test suite yet but coming soon. * This branch also includes code for 'Hello ScanCode' plugin illustrated in ScanCode wiki entry 'How To: Add a post scan plugin' (see /scancode-hello/). Signed-off-by: John M. Horan <johnmhoran@gmail.com>
- Loading branch information
1 parent
b6287a3
commit f0417d7
Showing
15 changed files
with
937 additions
and
8 deletions.
There are no files selected for viewing
116 changes: 116 additions & 0 deletions
116
plugins/scancode-categories/src/json_rules/json_rules_simple_01.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
{ | ||
"new_rules": [ | ||
{ | ||
"rule": "Blueprint files", | ||
"domain": "General", | ||
"notes": "This is a non-core Blueprint file.", | ||
"status": "Non-core Blueprint file", | ||
"test": [ | ||
{ | ||
"operator": "and", | ||
"extension": [".bp"], | ||
"file_type": ["ASCII text"], | ||
"mime_type": [], | ||
"name": [], | ||
"programming_language": [] | ||
} | ||
] | ||
}, | ||
{ | ||
"rule": "C++ files", | ||
"domain": "General", | ||
"notes": "This is a C++ file.", | ||
"status": "Core code", | ||
"test": [ | ||
{ | ||
"operator": "and", | ||
"extension": [".cpp"], | ||
"file_type": ["C source, ASCII text", "C++ source, ASCII text", "ASCII text"], | ||
"mime_type": [], | ||
"name": [], | ||
"programming_language": ["C++"] | ||
} | ||
] | ||
}, | ||
{ | ||
"rule": "C++ files with .c extension", | ||
"domain": "General", | ||
"notes": "This is a C++ file with a .c extension.", | ||
"status": "Core code", | ||
"test": [ | ||
{ | ||
"operator": "and", | ||
"extension": [".c"], | ||
"file_type": ["C source, ASCII text"], | ||
"mime_type": [], | ||
"name": [], | ||
"programming_language": ["C++"] | ||
} | ||
] | ||
}, | ||
{ | ||
"rule": "C++ header files", | ||
"domain": "General", | ||
"notes": "This is a C++ header file, i.e., with a .h extension.", | ||
"status": "Core code", | ||
"test": [ | ||
{ | ||
"operator": "and", | ||
"extension": [".h"], | ||
"file_type": ["ASCII text", "C++ source, ASCII text", "C source, ASCII text"], | ||
"mime_type": [], | ||
"name": [], | ||
"programming_language": ["C++"] | ||
} | ||
] | ||
}, | ||
{ | ||
"rule": "JavaScript map files", | ||
"domain": "General", | ||
"notes": "This is a non-core JavaScript map file.", | ||
"status": "Non-core JavaScript map file", | ||
"test": [ | ||
{ | ||
"operator": "and", | ||
"extension": [".map"], | ||
"file_type": ["ASCII text"], | ||
"mime_type": [], | ||
"name": [], | ||
"programming_language": [] | ||
} | ||
] | ||
}, | ||
{ | ||
"rule": "Test: all empty values", | ||
"domain": "General", | ||
"notes": "This is an empty value test -- every file (but no directories) should pass.", | ||
"status": "Empty value test", | ||
"test": [ | ||
{ | ||
"operator": "and", | ||
"extension": [], | ||
"file_type": [], | ||
"mime_type": [], | ||
"name": [], | ||
"programming_language": [] | ||
} | ||
] | ||
}, | ||
{ | ||
"rule": "Test: specific file name with OR operator", | ||
"domain": "General", | ||
"notes": "This is a file name OR test -- it identifies 2 specific files in the libc-bionic archive.", | ||
"status": "File name OR test", | ||
"test": [ | ||
{ | ||
"operator": "or", | ||
"extension": ["any_value"], | ||
"file_type": ["any_value"], | ||
"mime_type": ["any_value"], | ||
"name": ["scudo.cpp", "exported64.map"], | ||
"programming_language": ["any_value"] | ||
} | ||
] | ||
} | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
92 changes: 92 additions & 0 deletions
92
plugins/scancode-categories/src/plugin_categories/plugin_categories_test_05.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
# | ||
# Copyright (c) 2019 nexB Inc. and others. All rights reserved. | ||
# http://nexb.com and https://github.com/nexB/scancode-toolkit/ | ||
# The ScanCode software is licensed under the Apache License version 2.0. | ||
# Data generated with ScanCode require an acknowledgment. | ||
# ScanCode is a trademark of nexB Inc. | ||
# | ||
# You may not use this software except in compliance with the License. | ||
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0 | ||
# Unless required by applicable law or agreed to in writing, software distributed | ||
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | ||
# CONDITIONS OF ANY KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations under the License. | ||
# | ||
# When you publish or redistribute any data created with ScanCode or any ScanCode | ||
# derivative work, you must accompany this data with the following acknowledgment: | ||
# | ||
# Generated with ScanCode and provided on an "AS IS" BASIS, WITHOUT WARRANTIES | ||
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from | ||
# ScanCode should be considered or used as legal advice. Consult an Attorney | ||
# for any legal advice. | ||
# ScanCode is a free software code scanning tool from nexB Inc. and others. | ||
# Visit https://github.com/nexB/scancode-toolkit/ for support and download. | ||
|
||
from __future__ import absolute_import | ||
from __future__ import division | ||
from __future__ import print_function | ||
from __future__ import unicode_literals | ||
|
||
from collections import OrderedDict | ||
|
||
import attr | ||
import json | ||
|
||
from plugincode.post_scan import PostScanPlugin | ||
from plugincode.post_scan import post_scan_impl | ||
from scancode import CommandLineOption | ||
from scancode import POST_SCAN_GROUP | ||
|
||
|
||
@post_scan_impl | ||
class CategoryRules(PostScanPlugin): | ||
""" | ||
Identify the category (e.g., Java, JavaScript, Python) for each file in the codebase being scanned. | ||
""" | ||
|
||
options = [ | ||
CommandLineOption(('--categories',), | ||
help='Identify the category (e.g., Java, JavaScript, Python) for each file in the codebase being scanned. Rules comprise a set of any() and all() functions contained as string values in a list of JSON objects. The category and related information (including the rule applied to the file) will be added to a new "category" field in the ScanCode JSON output file.', | ||
metavar='FILE', | ||
help_group=POST_SCAN_GROUP) | ||
] | ||
|
||
resource_attributes = dict(category=attr.ib(default=attr.Factory(dict))) | ||
|
||
def is_enabled(self, categories, **kwargs): | ||
return categories | ||
|
||
def process_codebase(self, codebase, categories, **kwargs): | ||
""" | ||
Populate a category mapping. | ||
""" | ||
if not self.is_enabled(categories): | ||
return | ||
|
||
ruleset_path = categories | ||
with open(ruleset_path) as json_file: | ||
data = json.load(json_file) | ||
|
||
for resource in codebase.walk(topdown=False): | ||
self.vet_resource(resource, categories, data) | ||
codebase.save_resource(resource) | ||
|
||
def vet_resource(self, resource, categories, data, **kwargs): | ||
matched_rules = [] | ||
resource.category = matched_rules | ||
for i in data["new_rules"]: | ||
scope = locals() | ||
if eval(i["test"], scope): | ||
if resource.type == 'directory': | ||
resource.category = 'directory' | ||
elif resource.type == 'file': | ||
matched_rules.append(OrderedDict((k, i[k]) for k in ('name', 'test', 'domain', 'status'))) | ||
resource.category = matched_rules | ||
|
||
if not resource.category: | ||
if resource.type == 'directory': | ||
resource.category = 'directory' | ||
else: | ||
resource.category = "no match" | ||
|
||
return resource |
Oops, something went wrong.