Skip to content

Commit

Permalink
implement title search
Browse files Browse the repository at this point in the history
  • Loading branch information
Koushikphy committed Jul 30, 2024
1 parent 435aaeb commit 7ef67be
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 11 deletions.
85 changes: 75 additions & 10 deletions kbib/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from datetime import timedelta
from rich import print as rprint
from collections import Counter, defaultdict
from jellyfish import damerau_levenshtein_distance as dld
try:
import pdf2doi
PDF_AVAILABLE = True
Expand All @@ -35,7 +36,7 @@ def error(self, message):



BARE_API = "http://api.crossref.org/" # API to get bibtex information
BARE_API = "https://api.crossref.org/" # API to get bibtex information
ABVR_API = "https://abbreviso.toolforge.org/abbreviso/a/" # API to get abbreviated journal name
DOI_API = 'https://doi.org/'

Expand All @@ -48,6 +49,7 @@ def error(self, message):
TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
" ETA:",
TimeRemainingColumn(),
# transient=True
)


Expand Down Expand Up @@ -193,24 +195,34 @@ def reconfigureBibs(bibs):
def getFullRefList(doi):
# Get bibtex information for all the references
found, tRefs = get_all_ref(doi)

if found:
refDOIs, noDOIs = [], []
for r in tRefs:
if "DOI" in r:
refDOIs.append(r)
else:
noDOIs.append(r)
refDOIs, artDOIs, noDOIs = [], [], []
with progress:
for r in progress.track(tRefs,description='[green bold]Parsing reference list...'):
if "DOI" in r:
refDOIs.append(r)
elif 'article-title' in r:
artDOIs.append(r)
res= check_again(r['article-title'])
if res:
refDOIs.append(res)
else:
noDOIs.append(r)
else:
noDOIs.append(r)

fullRef = []
if len(noDOIs):
rprint(f"[red]DOIs not found for following {len(noDOIs)} references:")
for r in noDOIs:
print(r)

fullRef = []
# for ref in tqdm(refDOIs,desc='Parsing bibtex entries from reference list'):
with progress:

# for ref in tqdm(refDOIs,desc='Parsing bibtex entries from reference list'):
for ref in progress.track(refDOIs,description='[green bold]Parsing bibtex entries from reference list...'):

f, refVal = get_bib(ref['DOI'])
if f:
fullRef.append(refVal)
Expand All @@ -220,6 +232,59 @@ def getFullRefList(doi):



headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Accept-Language': 'en-GB,en-US;q=0.9,en;q=0.8',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'cross-site',
'Sec-Fetch-User': '?1',
'content-type': 'application/json',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36',
'sec-ch-ua': '"Google Chrome";v="125", "Chromium";v="125", "Not.A/Brand";v="24"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Linux"',
}


methods = [
lambda a,b : a==b,
lambda a,b : a in b or b in a,
lambda a,b : (dld(a,b)/max(len(a),len(b)))<0.1
]

def check_again(title):
url = "{}works".format(BARE_API)
params = {
'rows': '5',
'query.bibliographic': title,
}
r = session.get(url, params=params, headers=headers)
item = r.json()
allItems = item['message']['items']


for method in methods:
for it in allItems:
if 'title' in it:
if method(it['title'][0].lower(), title.lower()):
if "DOI" in it:
return it
elif 'URL'in it:
it['DOI'] = it['DOI']
return it


# print(title)
# print('-'*10)
# for it in allItems:
# if 'title' in it:
# print(it['title'][0])
# print()



def checkPdf(files):
Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
txt = f.read()

setup(name='kbib',
version='0.1.10',
version='0.2.0',
description='A command line tool to get bibtex information from DOIs and PDFs',
long_description=txt,
long_description_content_type='text/markdown',
Expand All @@ -30,6 +30,7 @@
install_requires=[
'bibtexparser>=1.4.0',
'rich>=12.6.0',
'jellyfish>=1.1.0',
'requests_cache'
],
extras_require = {
Expand Down

0 comments on commit 7ef67be

Please # to comment.