Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Add URLscan support #7

Merged
merged 2 commits into from
Jan 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions engines/urlscan.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import json
import requests

# Disable SSL warning
requests.packages.urllib3.disable_warnings()

def query_urlscan(observable, observable_type, PROXIES):
"""
Queries the urlscan.io API for information about a given observable.
Args:
observable (str): The observable to query (e.g., a URL or domain).
observable_type (str): The type of the observable (e.g., "URL").
PROXIES (dict): A dictionary of proxies to use for the request.
Returns:
dict: A dictionary containing the scan count, top domains, and a link to the urlscan.io search results.
Example:
{
"scan_count": 10,
"top_domains": [
{"domain": "example.com", "count": 5},
{"domain": "example.org", "count": 3},
{"domain": "example.net", "count": 2}
],
"link": "https://urlscan.io/search/#page.domain:observable"
None: If an error occurs during the request or processing.
Raises:
Exception: If an error occurs during the request or processing.
"""

if observable_type == "URL":
observable = observable.split("/")[2].split(":")[0]

url = f"https://urlscan.io/api/v1/search/?q=page.domain:{observable}"

try:
response = requests.get(url, proxies=PROXIES, verify=False)
response.raise_for_status()
result = response.json()

results = result.get("results", [])

scan_count = result.get("total", 0)

domain_count = {}

for entry in results:
page = entry.get("page", {})
domain = page.get("domain", "Unknown")
domain_count[domain] = domain_count.get(domain, 0) + 1

sorted_domains = sorted(domain_count.items(), key=lambda domain_count_item: domain_count_item[1], reverse=True)
top_domains = sorted_domains[:5]

top_domains_list = [{"domain": domain, "count": count} for domain, count in top_domains]

return {
"scan_count": scan_count,
"top_domains": top_domains_list,
"link": f"https://urlscan.io/search/#page.domain:{observable}"
}

except Exception as e:
print(e)
# Always return None in case of failure
return None
44 changes: 43 additions & 1 deletion templates/index-demo.html
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@ <h1>DEMO - Cyberbro - Observable Analysis - All info is public!!!</h1>
<br>

<div>
<h3 title="Use your mouse to hover over the engine name to see what it does">Select the engines to use (use your own instance to get all engines)</h3>
<h3 title="Use your mouse to hover over the engine name to see what it does">Select the engines to use (use your own instance to get all engines, API limits may apply here)</h3>
</div>
<div>
<label title="Performs a reverse DNS lookup for IP, domain, URL (on your machine)"><input type="checkbox" name="engines" value="reverse_dns" data-supports="default domain ip abuse free_no_key" checked> Reverse DNS </label><br>
Expand All @@ -390,6 +390,7 @@ <h3 title="Use your mouse to hover over the engine name to see what it does">Sel
<label title="Checks Shodan, reversed obtained IP for a given domain / URL, free API key required"><input type="checkbox" name="engines" value="shodan" data-supports="ports ip"> Shodan</label><br>
<label title="Checks Phishtank for domains, URL, free, no API key"><input type="checkbox" name="engines" value="phishtank" data-supports="risk domain url free_no_key"> Phishtank</label><br>
<label title="Checks ThreatFox by Abuse.ch for IP, domains, URL, free, no API key"><input type="checkbox" name="engines" value="threatfox" data-supports="ip domain url free_no_key"> ThreatFox</label><br>
<label title="Checks URLscan for URL, free, no API key"><input type="checkbox" name="engines" value="urlscan" data-supports="domain url free_no_key"> URLscan</label><br>
<label title="Scraps Google search results for all types of observable, free, no API key"><input type="checkbox" name="engines" value="google" data-supports="domain url ip hash free_no_key scraping"> Google</label><br>
<label title="Scraps Github search results for all types of observable, free, no API key"><input type="checkbox" name="engines" value="github" data-supports="domain url ip hash free_no_key scraping"> Github</label><br>
<label title="Checks abuse contact with Abusix for IP, reversed obtained IP for a given domain / URL, free, no API key"><input type="checkbox" name="engines" value="abusix" data-supports="abuse free_no_key"> Abusix</label><br><br>
Expand Down Expand Up @@ -583,6 +584,9 @@ <h1>Cyberbro - Analysis Results</h1>
{% if "ioc_one_pdf" in analysis_results.selected_engines %}
<th>Ioc.One (PDF)</th>
{% endif %}
{% if "urlscan" in analysis_results.selected_engines %}
<th>URLscan</th>
{% endif %}
</tr>
</thead>
<tbody>
Expand Down Expand Up @@ -881,6 +885,25 @@ <h1>Cyberbro - Analysis Results</h1>
{% endif %}
</td>
{% endif %}
{% if "urlscan" in analysis_results.selected_engines %}
<td>
{% if result.urlscan %}
<strong>Scan count: </strong><a href="{{ result.urlscan.link }}" target="_blank">{{ result.urlscan.scan_count }}</a><br>
{% if result.urlscan.scan_count == 0 %}
Not Found
{% else %}
<strong>Top domains: </strong>
<ul>
{% for domain in result.urlscan.top_domains %}
<li>{{ domain.domain }} ({{ domain.count }})</li>
{% endfor %}
</ul>
{% endif %}
{% else %}
Not applicable
{% endif %}
</td>
{% endif %}
</tr>
{% endfor %}
</tbody>
Expand Down Expand Up @@ -1001,6 +1024,25 @@ <h3>Spur.us</h3>
{% endif %}
</div>

<div class="cards-results-container">
{% if analysis_results.results[0].urlscan %}
<div class="card">
<h3>URLscan</h3>
<p><strong>Scan count: </strong><a href="{{ analysis_results.results[0].urlscan.link }}" target="_blank">{{ analysis_results.results[0].urlscan.scan_count }}</a></p>
{% if analysis_results.results[0].urlscan.scan_count == 0 %}
<p>Not Found</p>
{% else %}
<p><strong>Top domains: </strong></p>
<ul>
{% for domain in analysis_results.results[0].urlscan.top_domains %}
<li>{{ domain.domain }} ({{ domain.count }})</li>
{% endfor %}
</ul>
</div>
{% endif %}
{% endif %}
</div>

<div class="cards-results-container">
{% if analysis_results.results[0].mde %}
<div class="card">
Expand Down
46 changes: 44 additions & 2 deletions templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,7 @@ <h3 title="Use your mouse to hover over the engine name to see what it does">Sel
<label title="Checks Shodan, reversed obtained IP for a given domain / URL, free API key required"><input type="checkbox" name="engines" value="shodan" data-supports="ports ip"> Shodan</label><br>
<label title="Checks Phishtank for domains, URL, free, no API key"><input type="checkbox" name="engines" value="phishtank" data-supports="risk domain url free_no_key"> Phishtank</label><br>
<label title="Checks ThreatFox by Abuse.ch for IP, domains, URL, free, no API key"><input type="checkbox" name="engines" value="threatfox" data-supports="ip domain url free_no_key"> ThreatFox</label><br>
<label title="Checks URLscan for URL, free, no API key"><input type="checkbox" name="engines" value="urlscan" data-supports="domain url free_no_key"> URLscan</label><br>
<label title="Scraps Google search results for all types of observable, free, no API key"><input type="checkbox" name="engines" value="google" data-supports="domain url ip hash free_no_key scraping"> Google</label><br>
<label title="Scraps Github search results for all types of observable, free, no API key"><input type="checkbox" name="engines" value="github" data-supports="domain url ip hash free_no_key scraping"> Github</label><br>
<label title="Scraps (can be long) Ioc.One HTML search results for all types of observable, free, no API key"><input type="checkbox" name="engines" value="ioc_one_html" data-supports="domain url ip hash scraping"> Ioc.One (HTML)</label><br>
Expand Down Expand Up @@ -586,6 +587,9 @@ <h1>Cyberbro - Analysis Results</h1>
{% if "ioc_one_pdf" in analysis_results.selected_engines %}
<th>Ioc.One (PDF)</th>
{% endif %}
{% if "urlscan" in analysis_results.selected_engines %}
<th>URLscan</th>
{% endif %}
</tr>
</thead>
<tbody>
Expand All @@ -608,7 +612,7 @@ <h1>Cyberbro - Analysis Results</h1>
high-risk
{% endif %}
">
<td>{{ result.observable[:60] }}{% if result.observable | length > 60 %}...{% endif %}</td>
<td>{{ result.observable }}</td>
<td>{{ result.type }}</td>
{% if "reverse_dns" in analysis_results.selected_engines %}
<td>
Expand Down Expand Up @@ -884,6 +888,25 @@ <h1>Cyberbro - Analysis Results</h1>
{% endif %}
</td>
{% endif %}
{% if "urlscan" in analysis_results.selected_engines %}
<td>
{% if result.urlscan %}
<strong>Scan count: </strong><a href="{{ result.urlscan.link }}" target="_blank">{{ result.urlscan.scan_count }}</a><br>
{% if result.urlscan.scan_count == 0 %}
Not Found
{% else %}
<strong>Top domains: </strong>
<ul>
{% for domain in result.urlscan.top_domains %}
<li>{{ domain.domain }} ({{ domain.count }})</li>
{% endfor %}
</ul>
{% endif %}
{% else %}
Not applicable
{% endif %}
</td>
{% endif %}
</tr>
{% endfor %}
</tbody>
Expand All @@ -893,7 +916,7 @@ <h1>Cyberbro - Analysis Results</h1>
<div class="cards-container">
<div class="cards-results-container">
<div class="card">
<h3 title="{{ analysis_results.results[0].observable }}">{{ analysis_results.results[0].observable[:60] }}{% if analysis_results.results[0].observable | length > 80 %}...{% endif %}</h3>
<h3 title="{{ analysis_results.results[0].observable }}">{{ analysis_results.results[0].observable[:60] }}{% if analysis_results.results[0].observable | length > 80 %}...{% endif %}</h3>
<p>{{ analysis_results.results[0].type }}</p>
</div>
{% if analysis_results.results[0].reverse_dns and analysis_results.results[0].reverse_dns.reverse_dns %}
Expand Down Expand Up @@ -1004,6 +1027,25 @@ <h3>Spur.us</h3>
{% endif %}
</div>

<div class="cards-results-container">
{% if analysis_results.results[0].urlscan %}
<div class="card">
<h3>URLscan</h3>
<p><strong>Scan count: </strong><a href="{{ analysis_results.results[0].urlscan.link }}" target="_blank">{{ analysis_results.results[0].urlscan.scan_count }}</a></p>
{% if analysis_results.results[0].urlscan.scan_count == 0 %}
<p>Not Found</p>
{% else %}
<p><strong>Top domains: </strong></p>
<ul>
{% for domain in analysis_results.results[0].urlscan.top_domains %}
<li>{{ domain.domain }} ({{ domain.count }})</li>
{% endfor %}
</ul>
</div>
{% endif %}
{% endif %}
</div>

<div class="cards-results-container">
{% if analysis_results.results[0].mde %}
<div class="card">
Expand Down
5 changes: 4 additions & 1 deletion utils/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from engines import (
abuseipdb, virustotal, ipinfo, reverse_dns, google_safe_browsing,
microsoft_defender_for_endpoint, spur_us_free, shodan, phishtank, abusix, rdap, threatfox, google, github, ioc_one, ipquery
microsoft_defender_for_endpoint, spur_us_free, shodan, phishtank, abusix, rdap, threatfox, google, github, ioc_one, ipquery, urlscan
)

from models.analysis_result import AnalysisResult
Expand Down Expand Up @@ -89,6 +89,9 @@ def perform_engine_queries(observable, selected_engines, result):
if observable["type"] in ["IPv4", "IPv6"] and is_bogon(observable["value"]):
observable["type"] = "BOGON"

if "urlscan" in selected_engines and observable["type"] in ["URL", "FQDN"]:
result['urlscan'] = urlscan.query_urlscan(observable["value"], observable["type"], PROXIES)

if "ioc_one_html" in selected_engines and observable["type"] in ["MD5", "SHA1", "SHA256", "URL", "FQDN", "IPv4", "IPv6"]:
result['ioc_one_html'] = ioc_one.query_ioc_one_html(observable["value"], PROXIES)

Expand Down
5 changes: 5 additions & 0 deletions utils/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,11 @@ def prepare_row(result, selected_engines):
row["phishtank_verified"] = phishtank_data.get("verified") if phishtank_data else None
row["phishtank_valid"] = phishtank_data.get("valid") if phishtank_data else None

if "urlscan" in selected_engines:
urlscan_data = result.get("urlscan", {})
row["urlscan_count"] = urlscan_data.get("scan_count") if urlscan_data else None
row["urlscan_top_domains"] = urlscan_data.get("top_domains") if urlscan_data else None

return row

def prepare_data_for_export(analysis_results):
Expand Down
Loading