diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..5cfe738 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,64 @@ +# Changelog + +## [v2021.08.28.1](https://github.com/seanap/Audiobooks.bundle/tree/v2021.08.28.1) (2021-08-28) + +[Full Changelog](https://github.com/seanap/Audiobooks.bundle/compare/2021.08.27.1...v2021.08.28.1) + +**Implemented enhancements:** + +- Improve score calculation; Fix crash on single-genre [\#18](https://github.com/seanap/Audiobooks.bundle/pull/18) ([djdembeck](https://github.com/djdembeck)) + +**Fixed bugs:** + +- Handle 'special' releases [\#17](https://github.com/seanap/Audiobooks.bundle/issues/17) +- Improve scoring for auto scan [\#15](https://github.com/seanap/Audiobooks.bundle/issues/15) + +## [2021.08.27.1](https://github.com/seanap/Audiobooks.bundle/tree/2021.08.27.1) (2021-08-27) + +[Full Changelog](https://github.com/seanap/Audiobooks.bundle/compare/v2021.08.24.2...2021.08.27.1) + +**Implemented enhancements:** + +- Use keywords search instead of title search [\#9](https://github.com/seanap/Audiobooks.bundle/issues/9) +- Maintainability/CodeFactor updates [\#13](https://github.com/seanap/Audiobooks.bundle/pull/13) ([djdembeck](https://github.com/djdembeck)) + +**Fixed bugs:** + +- Narrator is used as genre tag [\#11](https://github.com/seanap/Audiobooks.bundle/issues/11) + +## [v2021.08.24.2](https://github.com/seanap/Audiobooks.bundle/tree/v2021.08.24.2) (2021-08-25) + +[Full Changelog](https://github.com/seanap/Audiobooks.bundle/compare/v2021.08.24.1...v2021.08.24.2) + +**Implemented enhancements:** + +- Updates from unending fork; pep8 [\#12](https://github.com/seanap/Audiobooks.bundle/pull/12) ([djdembeck](https://github.com/djdembeck)) + +## [v2021.08.24.1](https://github.com/seanap/Audiobooks.bundle/tree/v2021.08.24.1) (2021-08-25) + +[Full Changelog](https://github.com/seanap/Audiobooks.bundle/compare/v2019.07.29.1...v2021.08.24.1) + +**Implemented enhancements:** + +- Add author and year to search results [\#10](https://github.com/seanap/Audiobooks.bundle/pull/10) ([djdembeck](https://github.com/djdembeck)) + +## [v2019.07.29.1](https://github.com/seanap/Audiobooks.bundle/tree/v2019.07.29.1) (2021-05-10) + +[Full Changelog](https://github.com/seanap/Audiobooks.bundle/compare/dfb7a67fe342ef85aa8b8866125bc00570a5e53a...v2019.07.29.1) + +**Implemented enhancements:** + +- Series info [\#1](https://github.com/seanap/Audiobooks.bundle/issues/1) +- Code cleanup/Preparation for Python3 [\#4](https://github.com/seanap/Audiobooks.bundle/pull/4) ([Cabalist](https://github.com/Cabalist)) + +**Closed issues:** + +- No poster [\#2](https://github.com/seanap/Audiobooks.bundle/issues/2) + +**Merged pull requests:** + +- Fix typos [\#3](https://github.com/seanap/Audiobooks.bundle/pull/3) ([gene1wood](https://github.com/gene1wood)) + + + +\* *This Changelog was automatically generated by [github_changelog_generator](https://github.com/github-changelog-generator/github-changelog-generator)* diff --git a/Contents/Code/__init__.py b/Contents/Code/__init__.py index 5314459..833eb32 100644 --- a/Contents/Code/__init__.py +++ b/Contents/Code/__init__.py @@ -10,11 +10,11 @@ from update_tools import UpdateTool from urls import SiteUrl -VERSION_NO = '2021.08.28.1' +VERSION_NO = '2021.08.29.2' # Delay used when requesting HTML, # may be good to have to prevent being banned from the site -REQUEST_DELAY = 10 +REQUEST_DELAY = 1 # Starting value for score before deductions are taken. INITIAL_SCORE = 100 @@ -23,7 +23,7 @@ # Any score lower than this will be ignored. IGNORE_SCORE = 45 -THREAD_MAX = 20 +#THREAD_MAX = 20 # Setup logger log = Logging() @@ -172,11 +172,13 @@ def search(self, results, media, lang, manual): search_helper.pre_search_logging() # Run helper before passing to SearchTool - normalizedName = self.normalize_name(media.album) + normalizedName = self.normalize_name(search_helper.media.album) # Strip title of things like unabridged and spaces search_helper.strip_title(normalizedName) + # Validate author name + search_helper.validate_author_name() # Generate search url - searchUrl = self.create_search_url(ctx, media, search_helper.normalizedName) + searchUrl = self.create_search_url(ctx, search_helper) # Run actual search, and set the variable to it's return result = self.doSearch(ctx, searchUrl) @@ -193,7 +195,7 @@ def search(self, results, media, lang, manual): normalizedName ) - info = self.run_search(search_helper, media, result) + info = self.run_search(search_helper, result) # Output the final results. log.separator(log_level="debug") @@ -249,6 +251,7 @@ def update(self, metadata, media, lang, force=False): html = HTML.ElementFromURL(url, sleep=REQUEST_DELAY) except Exception as e: log.info(e) + # Instantiate update helper update_helper = UpdateTool(force, lang, media, metadata, url) @@ -262,7 +265,6 @@ def update(self, metadata, media, lang, force=False): self.use_copyright_date(update_helper, html) update_helper.date = self.getDateFromString(update_helper.date) - self.handle_series(update_helper, html) # cleanup synopsis @@ -329,20 +331,20 @@ def normalize_name(self, input_name): ) return normalizedName - def create_search_url(self, ctx, media, normalizedName): + def create_search_url(self, ctx, helper): # Make the URL - if media.artist: + if helper.media.artist: searchUrl = ctx['AUD_SEARCH_URL'].format( ( - String.Quote((normalizedName).encode('utf-8'), usePlus=True) + String.Quote((helper.normalizedName).encode('utf-8'), usePlus=True) ), ( - String.Quote((media.artist).encode('utf-8'), usePlus=True) + String.Quote((helper.media.artist).encode('utf-8'), usePlus=True) ) ) else: searchUrl = ctx['AUD_KEYWORD_SEARCH_URL'] % ( - String.Quote((normalizedName).encode('utf-8'), usePlus=True) + String.Quote((helper.normalizedName).encode('utf-8'), usePlus=True) ) return searchUrl @@ -350,14 +352,9 @@ def doSearch(self, ctx, url): html = HTML.ElementFromURL(url, sleep=REQUEST_DELAY) found = [] - log.separator(msg='just before new xpath line', log_level="debug") # Set append to the returned array from this function found = self.before_xpath(ctx, found, html) - log.separator(msg='just after new xpath line', log_level="debug") - # Set append to the returned array from this function - found = self.after_xpath(ctx, found, html) - return found def before_xpath(self, ctx, found, html): @@ -378,6 +375,12 @@ def before_xpath(self, ctx, found, html): ) datetext = re.sub(r'[^0-9\-]', '', datetext) date = self.getDateFromString(datetext) + language = self.getStringContentFromXPath( + r, ( + u'div/div/div/div/div/div/span/ul/li' + '[contains (@class,"languageLabel")]/span' + ) + ).split()[1] narrator = self.getStringContentFromXPath( r, ( u'div/div/div/div/div/div/span/ul/li' @@ -403,6 +406,7 @@ def before_xpath(self, ctx, found, html): { 'author': author, 'date': date, + 'language': language, 'narrator': narrator, 'thumb': thumb, 'title': title, @@ -411,55 +415,7 @@ def before_xpath(self, ctx, found, html): ) return found - def after_xpath(self, ctx, found, html): - for r in html.xpath( - '//div[contains (@class, "adbl-search-result")]' - ): - author = self.getStringContentFromXPath( - r, ( - 'div/div/ul/li/' - '/a[contains (@class,"author-profile-link")][1]' - ) - ) - date = self.getDateFromString( - self.getStringContentFromXPath( - r, ( - u'div/div/ul/li[contains (., "{0}")]' - '/span[2]//text()' - ).format( - ctx['REL_DATE'] - ) - ) - ) - murl = self.getAnchorUrlFromXPath( - r, 'div/div/div/div/a[1]' - ) - narrator = self.getStringContentFromXPath( - r, u'div/div/ul/li[contains (., "{0}")]//a[1]'.format( - ctx['NAR_BY'] - ) - ) - thumb = self.getImageUrlFromXPath( - r, 'div[contains (@class,"adbl-prod-image-sample-cont")]/a/img' - ) - title = self.getStringContentFromXPath( - r, 'div/div/div/div/a[1]' - ) - log.separator(msg='XPATH SEARCH HIT', log_level="debug") - - found.append( - { - 'author': author, - 'date': date, - 'narrator': narrator, - 'thumb': thumb, - 'title': title, - 'url': murl, - } - ) - return found - - def run_search(self, helper, media, result): + def run_search(self, helper, result): # Walk the found items and gather extended information info = [] @@ -469,13 +425,8 @@ def run_search(self, helper, media, result): if not valid_itemId: continue - title = f['title'] - thumb = f['thumb'] date = f['date'] year = '' - author = f['author'] - narrator = f['narrator'] - if date is not None: year = date.year @@ -483,59 +434,7 @@ def run_search(self, helper, media, result): if helper.check_if_preorder(date): continue - # Score the album name - scorebase1 = media.album - scorebase2 = title.encode('utf-8') - album_score = INITIAL_SCORE - Util.LevenshteinDistance( - scorebase1, scorebase2 - ) - log.debug("Score from album: " + str(album_score)) - - # Score the author name - if media.artist: - scorebase3 = media.artist - scorebase4 = author - author_score = INITIAL_SCORE - Util.LevenshteinDistance( - scorebase3, scorebase4 - ) - log.debug("Score from author: " + str(author_score)) - # Find the difference in score between name and author - score = ( - album_score + author_score - ) - INITIAL_SCORE - else: - score = album_score - - log.info("Result #" + str(i + 1)) - # Log basic metadata - data_to_log = [ - {'ID is': valid_itemId}, - {'Title is': title}, - {'Author is': author}, - {'Narrator is': narrator}, - {'Date is ': str(date)}, - {'Score is': str(score)}, - {'Thumb is': thumb}, - ] - log.metadata(data_to_log, log_level="info") - - if score >= IGNORE_SCORE: - info.append( - { - 'id': valid_itemId, - 'title': title, - 'year': year, - 'date': date, - 'score': score, - 'thumb': thumb, - 'artist': author - } - ) - else: - log.info( - '# Score is below ignore boundary (%s)... Skipping!', - IGNORE_SCORE - ) + self.score_result(f, helper, i, info, valid_itemId, year) # Print separators for easy reading if i <= len(result): @@ -544,6 +443,110 @@ def run_search(self, helper, media, result): info = sorted(info, key=lambda inf: inf['score'], reverse=True) return info + def score_result(self, f, helper, i, info, valid_itemId, year): + author = f['author'] + date = f['date'] + language = f['language'] + narrator = f['narrator'] + thumb = f['thumb'] + title = f['title'] + + # Array to hold score points for processing + all_scores = [] + + # Album name score + all_scores.append( + self.score_album(helper, title) + ) + # Author name score + all_scores.append( + self.score_author(author, helper) + ) + # Library language score + all_scores.append( + self.score_language(helper, language) + ) + + # Because builtin sum() isn't available + sum=lambda numberlist:reduce(lambda x,y:x+y,numberlist,0) + # Subtract difference from initial score + score = INITIAL_SCORE - sum(all_scores) + + log.info("Result #" + str(i + 1)) + # Log basic metadata + data_to_log = [ + {'ID is': valid_itemId}, + {'Title is': title}, + {'Author is': author}, + {'Narrator is': narrator}, + {'Date is ': str(date)}, + {'Score is': str(score)}, + {'Thumb is': thumb}, + ] + log.metadata(data_to_log, log_level="info") + + if score >= IGNORE_SCORE: + info.append( + { + 'id': valid_itemId, + 'title': title, + 'year': year, + 'date': date, + 'score': score, + 'thumb': thumb, + 'artist': author + } + ) + else: + log.info( + '# Score is below ignore boundary (%s)... Skipping!', + IGNORE_SCORE + ) + + def score_album(self, helper, title): + """ + Compare the input album similarity to the search result album. + Score is calculated with LevenshteinDistance + """ + scorebase1 = helper.media.album + scorebase2 = title.encode('utf-8') + album_score = Util.LevenshteinDistance( + scorebase1, scorebase2 + ) + log.debug("Score from album: " + str(album_score)) + return album_score + + def score_author(self, author, helper): + """ + Compare the input author similarity to the search result author. + Score is calculated with LevenshteinDistance + """ + if helper.media.artist: + scorebase3 = helper.media.artist + scorebase4 = author + author_score = Util.LevenshteinDistance( + scorebase3, scorebase4 + ) + log.debug("Score from author: " + str(author_score)) + return author_score + + def score_language(self, helper, language): + """ + Compare the library language to search results + and knock off 2 points if they don't match. + """ + lang_dict = { + Locale.Language.English: 'English', + 'de': 'German', + 'fr': 'French', + 'it': 'Italian' + } + + if language != lang_dict[helper.lang]: + log.debug("Book is not library language, deduct 2 points") + return 2 + return 0 + """ Update functions that require PMS imports, thus we cannot 'outsource' them to UpdateTool @@ -659,7 +662,7 @@ def use_copyright_date(self, helper, html): helper.date = re.match(".?(\d{4}).*", cstring).group(1) def handle_series(self, helper, html): - for r in html.xpath('//span[contains(@class, "seriesLabel")]'): + for r in html.xpath('//li[contains(@class, "seriesLabel")]'): helper.series = self.getStringContentFromXPath( r, '//li[contains(@class, "seriesLabel")]//a[1]' ) @@ -667,7 +670,9 @@ def handle_series(self, helper, html): r, '//li[contains(@class, "seriesLabel")]//a[2]' ) - helper.series_def = helper.series2 if helper.series2 else helper.series + helper.series_def = ( + helper.series2 if helper.series2 else helper.series + ) helper.volume = self.getStringContentFromXPath( r, '//li[contains(@class, "seriesLabel")]/text()[2]' @@ -680,7 +685,9 @@ def handle_series(self, helper, html): if helper.volume2 == ",": helper.volume2 = "" - helper.volume_def = helper.helper.volume2 if helper.volume2 else helper.volume + helper.volume_def = ( + helper.helper.volume2 if helper.volume2 else helper.volume + ) # fix series when audible 'forgets' the series linkā€¦ if not helper.series_def: @@ -709,22 +716,30 @@ def compile_metadata(self, helper): if helper.genre_child: helper.metadata.genres.add(helper.genre_child) - self.parse_author_narrator(helper) - + self.add_narrators_to_moods(helper) + self.add_authors_to_moods(helper) self.parse_series(helper) # Other metadata helper.metadata.title = helper.title helper.metadata.title_sort = ' - '.join( - filter(None, [(helper.series_def + helper.volume_def), helper.title]) + filter( + None, [(helper.series_def + helper.volume_def), helper.title] + ) ) helper.metadata.studio = helper.studio helper.metadata.summary = helper.synopsis - if Prefs['cover_options'] == "Use Audible cover": - helper.metadata.posters[1] = Proxy.Media(HTTP.Request(helper.thumb)) + if Prefs['cover_options'] == ( + "Use Audible cover" + ): + helper.metadata.posters[1] = Proxy.Media( + HTTP.Request(helper.thumb) + ) helper.metadata.posters.validate_keys(helper.thumb) - elif Prefs['cover_options'] == "Download cover but don't overwrite existing": + elif Prefs['cover_options'] == ( + "Download cover but don't overwrite existing" + ): helper.metadata.posters[helper.thumb] = Proxy.Media( HTTP.Request(helper.thumb), sort_order=1 ) @@ -741,7 +756,7 @@ def compile_metadata(self, helper): helper.metadata.collections.add(helper.series2) helper.writeInfo() - def parse_author_narrator(self, helper): + def add_narrators_to_moods(self, helper): # Add Narrators to Styles narrators_list = helper.narrator.split(",") narr_contributors_list = [ @@ -757,6 +772,7 @@ def parse_author_narrator(self, helper): ]: helper.metadata.styles.add(narrator.strip()) + def add_authors_to_moods(self, helper): # Add Authors to Moods author_list = helper.author.split(",") author_contributers_list = [ @@ -789,15 +805,20 @@ def parse_series(self, helper): if helper.series_def.endswith(checkseries): seriesshort = helper.series_def[:-len(checkseries)] - y = re.match( - "(.*)((: .* " + helper.volume_def[2:] + ": A .* Series)|" - "(((:|,|-) )((" + seriesshort + helper.volume_def + ")|" - "((? current_date: + log.debug("Excluding pre-order book") return True def get_id_from_url(self, item): @@ -107,3 +108,21 @@ def strip_title(self, normalizedName): ) # Give access of this variable to the class self.normalizedName = normalizedName + + def validate_author_name(self): + """ + Checks a list of known bad author names. + If matched, author name is set to None to prevent + it being used in search query. + """ + strings_to_check = [ + "[Unknown Artist]" + ] + for test_name in strings_to_check: + if self.media.artist == test_name: + self.media.artist = None + log.info( + "Artist name seems to be bad, " + "not using it in search." + ) + break diff --git a/Contents/Code/update_tools.py b/Contents/Code/update_tools.py index 3ccbd8f..2d2461d 100644 --- a/Contents/Code/update_tools.py +++ b/Contents/Code/update_tools.py @@ -88,4 +88,4 @@ def writeInfo(self): ] log.metadata_arrs(multi_arr, log_level="info") - log.separator(log_level="info") \ No newline at end of file + log.separator(log_level="info") diff --git a/README.md b/README.md index 2be11ab..0fa1ab6 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,6 @@ # SeaNap's Audiobook (Audible) metadata agent +[![CodeFactor](https://www.codefactor.io/repository/github/seanap/audiobooks.bundle/badge)](https://www.codefactor.io/repository/github/seanap/audiobooks.bundle) ## What is this? A Plex Metadata Agent for Audiobooks stored in a music library.