-
Notifications
You must be signed in to change notification settings - Fork 143
/
Copy pathnewsapi_client.py
415 lines (340 loc) · 16.5 KB
/
newsapi_client.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
from __future__ import unicode_literals
import requests
from newsapi import const
from newsapi.newsapi_auth import NewsApiAuth
from newsapi.newsapi_exception import NewsAPIException
from newsapi.utils import (
is_valid_string, is_valid_string_or_list, stringify_date_param
)
class NewsApiClient(object):
"""The core client object used to fetch data from News API endpoints.
:param api_key: Your API key, a length-32 UUID string provided for your News API account.
You must `register <https://newsapi.org/register>`_ for a News API key.
:type api_key: str
:param session: An optional :class:`requests.Session` instance from which to execute requests.
**Note**: If you provide a ``session`` instance, :class:`NewsApiClient` will *not* close the session
for you. Remember to call ``session.close()``, or use the session as a context manager, to close
the socket and free up resources.
:type session: `requests.Session <https://2.python-requests.org/en/master/user/advanced/#session-objects>`_ or None
"""
def __init__(self, api_key, session=None, proxy=None):
self.auth = NewsApiAuth(api_key=api_key)
if session is None:
self.request_method = requests
else:
self.request_method = session
self.proxy = proxy
def send_request(self, url, payload):
# Helper method to send requests with proxy support if available
if self.proxy:
# Set up proxy configuration for the request
proxies = {
"http": self.proxy,
"https": self.proxy,
}
r = self.request_method.get(url, auth=self.auth, timeout=30, params=payload, proxies=proxies)
else:
r = self.request_method.get(url, auth=self.auth, timeout=30, params=payload)
return r
def get_top_headlines( # noqa: C901
self, q=None, qintitle=None, sources=None, language="en", country=None, category=None, page_size=None, page=None
):
"""Call the `/top-headlines` endpoint.
Fetch live top and breaking headlines.
This endpoint provides live top and breaking headlines for a country, specific category in a country,
single source, or multiple sources. You can also search with keywords. Articles are sorted by the earliest
date published first.
:param q: Keywords or a phrase to search for in the article title and body. See the official News API
`documentation <https://newsapi.org/docs/endpoints/everything>`_ for search syntax and examples.
:type q: str or None
:param qintitle: Keywords or a phrase to search for in the article title and body. See the official News API
`documentation <https://newsapi.org/docs/endpoints/everything>`_ for search syntax and examples.
:type q: str or None
:param sources: A comma-seperated string of identifiers for the news sources or blogs you want headlines from.
Use :meth:`NewsApiClient.get_sources` to locate these programmatically, or look at the
`sources index <https://newsapi.org/sources>`_. **Note**: you can't mix this param with the
``country`` or ``category`` params.
:type sources: str or None
:param language: The 2-letter ISO-639-1 code of the language you want to get headlines for.
See :data:`newsapi.const.languages` for the set of allowed values.
The default for this method is ``"en"`` (English). **Note**: this parameter is not mentioned in the
`/top-headlines documentation <https://newsapi.org/docs/endpoints/top-headlines>`_ as of Sep. 2019,
but *is* supported by the API.
:type language: str or None
:param country: The 2-letter ISO 3166-1 code of the country you want to get headlines for.
See :data:`newsapi.const.countries` for the set of allowed values.
**Note**: you can't mix this parameter with the ``sources`` param.
:type country: str or None
:param category: The category you want to get headlines for.
See :data:`newsapi.const.categories` for the set of allowed values.
**Note**: you can't mix this parameter with the ``sources`` param.
:type category: str or None
:param page_size: Use this to page through the results if the total results found is
greater than the page size.
:type page_size: int or None
:param page: The number of results to return per page (request).
20 is the default, 100 is the maximum.
:type page: int or None
:return: JSON response as nested Python dictionary.
:rtype: dict
:raises NewsAPIException: If the ``"status"`` value of the response is ``"error"`` rather than ``"ok"``.
"""
payload = {}
# Keyword/Phrase
if q is not None:
if is_valid_string_or_list(q):
payload["q"] = q
else:
raise TypeError("keyword/phrase q param should be of type str or list of type str")
# Keyword/Phrase in Title
if qintitle is not None:
if is_valid_string(qintitle):
payload["qintitle"] = qintitle
else:
raise TypeError("keyword/phrase qintitle param should be of type str")
# Sources
if (sources is not None) and ((country is not None) or (category is not None)):
raise ValueError("cannot mix country/category param with sources param.")
# Sources
if sources is not None:
if is_valid_string(sources):
payload["sources"] = sources
else:
raise TypeError("sources param should be of type str")
# Language
if language is not None:
if is_valid_string(language):
if language in const.languages:
payload["language"] = language
else:
raise ValueError("invalid language")
else:
raise TypeError("language param should be of type str")
# Country
if country is not None:
if is_valid_string(country):
if country in const.countries:
payload["country"] = country
else:
raise ValueError("invalid country")
else:
raise TypeError("country param should be of type str")
# Category
if category is not None:
if is_valid_string(category):
if category in const.categories:
payload["category"] = category
else:
raise ValueError("invalid category")
else:
raise TypeError("category param should be of type str")
# Page Size
if page_size is not None:
if type(page_size) == int:
if 0 <= page_size <= 100:
payload["pageSize"] = page_size
else:
raise ValueError("page_size param should be an int between 1 and 100")
else:
raise TypeError("page_size param should be an int")
# Page
if page is not None:
if type(page) == int:
if page > 0:
payload["page"] = page
else:
raise ValueError("page param should be an int greater than 0")
else:
raise TypeError("page param should be an int")
# Send Request
r = self.send_request(const.TOP_HEADLINES_URL, payload)
# Check Status of Request
if r.status_code != requests.codes.ok:
raise NewsAPIException(r.json())
return r.json()
def get_everything( # noqa: C901
self,
q=None,
qintitle=None,
sources=None,
domains=None,
exclude_domains=None,
from_param=None,
to=None,
language=None,
sort_by=None,
page=None,
page_size=None,
):
"""Call the `/everything` endpoint.
Search through millions of articles from over 30,000 large and small news sources and blogs.
:param q: Keywords or a phrase to search for in the article title and body. See the official News API
`documentation <https://newsapi.org/docs/endpoints/everything>`_ for search syntax and examples.
:type q: str or None
:param qintitle: Keywords or a phrase to search for in the article title and body. See the official News API
`documentation <https://newsapi.org/docs/endpoints/everything>`_ for search syntax and examples.
:type q: str or None
:param sources: A comma-seperated string of identifiers for the news sources or blogs you want headlines from.
Use :meth:`NewsApiClient.get_sources` to locate these programmatically, or look at the
`sources index <https://newsapi.org/sources>`_.
:type sources: str or None
:param domains: A comma-seperated string of domains (eg bbc.co.uk, techcrunch.com, engadget.com)
to restrict the search to.
:type domains: str or None
:param exclude_domains: A comma-seperated string of domains (eg bbc.co.uk, techcrunch.com, engadget.com)
to remove from the results.
:type exclude_domains: str or None
:param from_param: A date and optional time for the oldest article allowed.
If a str, the format must conform to ISO-8601 specifically as one of either
``%Y-%m-%d`` (e.g. *2019-09-07*) or ``%Y-%m-%dT%H:%M:%S`` (e.g. *2019-09-07T13:04:15*).
An int or float is assumed to represent a Unix timestamp. All datetime inputs are assumed to be UTC.
:type from_param: str or datetime.datetime or datetime.date or int or float or None
:param to: A date and optional time for the newest article allowed.
If a str, the format must conform to ISO-8601 specifically as one of either
``%Y-%m-%d`` (e.g. *2019-09-07*) or ``%Y-%m-%dT%H:%M:%S`` (e.g. *2019-09-07T13:04:15*).
An int or float is assumed to represent a Unix timestamp. All datetime inputs are assumed to be UTC.
:type to: str or datetime.datetime or datetime.date or int or float or None
:param language: The 2-letter ISO-639-1 code of the language you want to get headlines for.
See :data:`newsapi.const.languages` for the set of allowed values.
:type language: str or None
:param sort_by: The order to sort articles in.
See :data:`newsapi.const.sort_method` for the set of allowed values.
:type sort_by: str or None
:param page: The number of results to return per page (request).
20 is the default, 100 is the maximum.
:type page: int or None
:param page_size: Use this to page through the results if the total results found is
greater than the page size.
:type page_size: int or None
:return: JSON response as nested Python dictionary.
:rtype: dict
:raises NewsAPIException: If the ``"status"`` value of the response is ``"error"`` rather than ``"ok"``.
"""
payload = {}
# Keyword/Phrase
if q is not None:
if is_valid_string_or_list(q):
payload["q"] = q
else:
raise TypeError("keyword/phrase q param should be of type str or list of type str")
# Keyword/Phrase in Title
if qintitle is not None:
if is_valid_string(qintitle):
payload["qintitle"] = qintitle
else:
raise TypeError("keyword/phrase qintitle param should be of type str")
# Sources
if sources is not None:
if is_valid_string(sources):
payload["sources"] = sources
else:
raise TypeError("sources param should be of type str")
# Domains To Search
if domains is not None:
if is_valid_string(domains):
payload["domains"] = domains
else:
raise TypeError("domains param should be of type str")
if exclude_domains is not None:
if isinstance(exclude_domains, str):
payload["excludeDomains"] = exclude_domains
else:
raise TypeError("exclude_domains param should be of type str")
# Search From This Date ...
if from_param is not None:
payload["from"] = stringify_date_param(from_param)
# ... To This Date
if to is not None:
payload["to"] = stringify_date_param(to)
# Language
if language is not None:
if is_valid_string(language):
if language not in const.languages:
raise ValueError("invalid language")
else:
payload["language"] = language
else:
raise TypeError("language param should be of type str")
# Sort Method
if sort_by is not None:
if is_valid_string(sort_by):
if sort_by in const.sort_method:
payload["sortBy"] = sort_by
else:
raise ValueError("invalid sort")
else:
raise TypeError("sort_by param should be of type str")
# Page Size
if page_size is not None:
if type(page_size) == int:
if 0 <= page_size <= 100:
payload["pageSize"] = page_size
else:
raise ValueError("page_size param should be an int between 1 and 100")
else:
raise TypeError("page_size param should be an int")
# Page
if page is not None:
if type(page) == int:
if page > 0:
payload["page"] = page
else:
raise ValueError("page param should be an int greater than 0")
else:
raise TypeError("page param should be an int")
# Send Request
r = self.send_request(const.EVERYTHING_URL, payload)
# Check Status of Request
if r.status_code != requests.codes.ok:
raise NewsAPIException(r.json())
return r.json()
def get_sources(self, category=None, language=None, country=None): # noqa: C901
"""Call the `/sources` endpoint.
Fetch the subset of news publishers that /top-headlines are available from.
:param category: Find sources that display news of this category.
See :data:`newsapi.const.categories` for the set of allowed values.
:type category: str or None
:param language: Find sources that display news in a specific language.
See :data:`newsapi.const.languages` for the set of allowed values.
:type language: str or None
:param country: Find sources that display news in a specific country.
See :data:`newsapi.const.countries` for the set of allowed values.
:type country: str or None
:return: JSON response as nested Python dictionary.
:rtype: dict
:raises NewsAPIException: If the ``"status"`` value of the response is ``"error"`` rather than ``"ok"``.
"""
payload = {}
# Language
if language is not None:
if is_valid_string(language):
if language in const.languages:
payload["language"] = language
else:
raise ValueError("invalid language")
else:
raise TypeError("language param should be of type str")
# Country
if country is not None:
if is_valid_string(country):
if country in const.countries:
payload["country"] = country
else:
raise ValueError("invalid country")
else:
raise TypeError("country param should be of type str")
# Category
if category is not None:
if is_valid_string(category):
if category in const.categories:
payload["category"] = category
else:
raise ValueError("invalid category")
else:
raise TypeError("category param should be of type str")
# Send Request
r = self.send_request(const.SOURCES_URL, payload)
# Check Status of Request
if r.status_code != requests.codes.ok:
raise NewsAPIException(r.json())
return r.json()