From 142cac9448e73d89a8c2e9fe385c860a82acc216 Mon Sep 17 00:00:00 2001 From: Gabriel Gradinaru Date: Thu, 28 Sep 2017 21:06:01 +0300 Subject: [PATCH] Added port filtering --- proxybroker/api.py | 15 +++++++++++++-- proxybroker/cli.py | 10 ++++++++-- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/proxybroker/api.py b/proxybroker/api.py index fa432e8..c591bb4 100644 --- a/proxybroker/api.py +++ b/proxybroker/api.py @@ -64,6 +64,7 @@ def __init__(self, queue=None, timeout=8, max_conn=200, max_tries=3, self._server = None self._limit = 0 # not limited self._countries = None + self._ports = None max_concurrent_conn = kwargs.get('max_concurrent_conn') if max_concurrent_conn: @@ -96,21 +97,24 @@ def __init__(self, queue=None, timeout=8, max_conn=200, max_tries=3, except NotImplementedError: pass - async def grab(self, *, countries=None, limit=0): + async def grab(self, *, countries=None, ports=None, limit=0): """Gather proxies from the providers without checking. :param list countries: (optional) List of ISO country codes where should be located proxies + :param list ports: (optional) List of ports indicating + allowed proxy ports :param int limit: (optional) The maximum number of proxies :ref:`Example of usage `. """ self._countries = countries + self._ports = ports self._limit = limit task = asyncio.ensure_future(self._grab(check=False)) self._all_tasks.append(task) - async def find(self, *, types=None, data=None, countries=None, + async def find(self, *, types=None, data=None, countries=None, ports=None, post=False, strict=False, dnsbl=None, limit=0, **kwargs): """Gather and check proxies from providers or from a passed data. @@ -126,6 +130,8 @@ async def find(self, *, types=None, data=None, countries=None, :param list countries: (optional) List of ISO country codes where should be located proxies + :param list ports: + (optional) List of ports indicating allowed proxy ports :param bool post: (optional) Flag indicating use POST instead of GET for requests when checking proxies @@ -159,6 +165,7 @@ async def find(self, *, types=None, data=None, countries=None, real_ext_ip=ip, types=types, post=post, strict=strict, dnsbl=dnsbl, loop=self._loop) self._countries = countries + self._ports = ports self._limit = limit tasks = [asyncio.ensure_future(self._checker.check_judges())] @@ -306,6 +313,10 @@ async def _handle(self, proxy, check=False): if not self._is_unique(proxy) or not self._geo_passed(proxy): return + if self._ports and proxy.port not in self._ports: + proxy.log('Proxy port is not in allowed ports list') + return + if check: await self._push_to_check(proxy) else: diff --git a/proxybroker/cli.py b/proxybroker/cli.py index c7f3cc9..10229b7 100644 --- a/proxybroker/cli.py +++ b/proxybroker/cli.py @@ -168,6 +168,12 @@ def add_grab_args(group): nargs='+', help='List of ISO country codes where should be located proxies') + group.add_argument( + '--ports', '-p', + type=int, + nargs='+', + help='List of ports indicating allowed proxy ports') + def add_serve_args(group): group.add_argument( @@ -332,10 +338,10 @@ def cli(args=sys.argv[1:]): if ns.command == 'find': tasks.append(broker.find( - data=ns.data, types=ns.types, countries=ns.countries, + data=ns.data, types=ns.types, countries=ns.countries, ports=ns.ports, post=ns.post, strict=ns.strict, dnsbl=ns.dnsbl, limit=ns.limit)) elif ns.command == 'grab': - tasks.append(broker.grab(countries=ns.countries, limit=ns.limit)) + tasks.append(broker.grab(countries=ns.countries, ports=ns.ports, limit=ns.limit)) elif ns.command == 'serve': broker.serve( host=ns.host, port=ns.port, limit=ns.limit,