diff --git a/pyanchor/link_checker.py b/pyanchor/link_checker.py index 9e0e825..5bcc0e6 100644 --- a/pyanchor/link_checker.py +++ b/pyanchor/link_checker.py @@ -12,7 +12,7 @@ import requests from bs4 import BeautifulSoup - +import re class LinkResults: def __init__(self, url: str): @@ -42,11 +42,23 @@ def check_link_for_http_scheme(self, href: str) -> str: if href.startswith(self.base_url): return href + elif href.startswith("/"): href = self.base_url + href.lstrip("/") return href - else: # This catches any href set to '#' - return None # TODO: Deal with ./ or ../ relative links. + + elif href.startswith("./"): + return self.base_url + re.sub("./", "", href) #using re.sub to remove all instances of ./ in href + + elif href.startswith("../"): + return self.base_url + re.sub("../", "", href) #using re.sub to remove all instances of ./ in href + + elif href.startswith("#"): + if "#" not in list(self.base_url): + return self.base_url + re.sub("#", "", href) #checks if # exists in base_url + else: + return None #returns none if # already exists in self.base_url + def find_all_atags(self, url: str): """Find all anchor tags on a given URL. diff --git a/tests/test_cli.py b/tests/test_cli.py index e619773..ddd95d9 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -16,6 +16,8 @@ def test_exception_on_invalid_url_http_scheme(self): def test_successful_result_prints(self): results = runner.invoke(app, ["http://127.0.0.1:5000/", "--verbose"]) assert "[ 200 ] - http://127.0.0.1:5000/about" in results.output + assert "[ 200 ] - http://127.0.0.1:5000/rel" in results.output + assert "[ 200 ] - http://127.0.0.1:5000/rel2" in results.output def test_failing_result_prints(self): results = runner.invoke(app, ["http://127.0.0.1:5000/"]) diff --git a/tests/test_webapp/app.py b/tests/test_webapp/app.py index a262397..2970b9b 100644 --- a/tests/test_webapp/app.py +++ b/tests/test_webapp/app.py @@ -29,12 +29,18 @@ def about_link_2(): def contact(): return render_template("success.html", name="Contact") +@app.route("/rel") +def relative_link(): + return render_template("success.html",name="Rel") + +@app.route("/rel2") +def relative_link2(): + return render_template("success.html",name="Rel2") @app.route("/login") def login(): return render_template("error.html", status_code=401), 401 - @app.route("/500") def five_hundred(): return render_template("error.html", status_code=500), 500 @@ -47,3 +53,4 @@ def sitemap(): if __name__ == "__main__": app.run() + diff --git a/tests/test_webapp/templates/index.html b/tests/test_webapp/templates/index.html index 0345daa..ac5fe80 100644 --- a/tests/test_webapp/templates/index.html +++ b/tests/test_webapp/templates/index.html @@ -11,7 +11,8 @@ About Page Contact Page Hash Link - Rel Link + Rel Link + Second Rel Link Failing Link