diff --git a/yoink/cli.py b/yoink/cli.py index 54d44e4..74d1547 100644 --- a/yoink/cli.py +++ b/yoink/cli.py @@ -1,5 +1,6 @@ from email.policy import default import os +from subprocess import check_output import sys import click from click_default_group import DefaultGroup @@ -11,6 +12,29 @@ from yoink.comic import Comic queue = [] + +def download_comic(url, path, series): + try: + comic = Comic(url, path=path if path else None) + except ValueError: + click.echo(f'{url} is not supported or is not a valid URL') + return 1 + + click.echo(f'Downloading {comic.title}') + comic.archiver.download() + + click.echo('Building comic archive') + comic.archiver.generate_archive() + + click.echo('Cleaning up') + comic.archiver.cleanup_worktree() + + click.echo('Success') + + if series and comic.next: + download_comic(comic.next, path, series) + + @click.group(cls=DefaultGroup, default='download', default_if_no_args=True) def yoink(): pass @@ -35,25 +59,7 @@ def download(url, path, series): click.echo('url cannot be blank') return 1 - try: - comic = Comic(url, path=path if path else None) - except ValueError: - click.echo(f'{url} is not supported or is not a valid URL') - return 1 - - if series: - comic.generate_series_queue() - - click.echo(f'Downloading {comic.title}') - comic.archiver.download() - - click.echo('Building comic archive') - comic.archiver.generate_archive() - - click.echo('Cleaning up') - comic.archiver.cleanup_worktree() - - click.echo('Success') + download_comic(url, path, series) diff --git a/yoink/comic.py b/yoink/comic.py index cce7679..f9b6d45 100644 --- a/yoink/comic.py +++ b/yoink/comic.py @@ -17,7 +17,7 @@ class Comic(Scrapable): return image.endswith('.jpg' or '.jpeg') - def __get_image_src(self, comic): + def __get_image_src(self, comic) -> str: if comic.attrs: try: return comic.attrs['src'] @@ -27,7 +27,7 @@ class Comic(Scrapable): for image in comic: return image.attrs['src'] - def __parse_soup(self): + def __parse_soup(self) -> list: soup = { 'default': self.soup.find_all('div', class_='separator'), 'no-div': self.soup.find_all('img', attrs={'width': '1000px'}), @@ -43,13 +43,13 @@ class Comic(Scrapable): return comics @property - def filelist(self): + def filelist(self) -> list: comics = self.__parse_soup() return [comic for comic in list(map(self.__get_image_src, comics)) if not comic.endswith(skippable_images)] @property - def title(self): + def title(self) -> str: if 'readallcomics' in self.url: return self.soup.title.string.replace(' | Read All Comics Online For Free', '').replace('…', '').replace('#', '').replace(':', '').strip() elif 'mangadex' in self.url: @@ -58,7 +58,7 @@ class Comic(Scrapable): return 'Uncategorized' @property - def category(self): + def category(self) -> str: data = self.soup.find('a', attrs={'rel': 'category tag'} ) return data.text @@ -71,11 +71,10 @@ class Comic(Scrapable): @property def issue_number(self) -> int: # matches any year in parentheses (xxxx) - # TODO yoink/comic.py:74: DeprecationWarning: invalid escape sequence '\(' - date_reg = re.search("(\([12]\d{3}\))", self.title) + year_regex = re.search("(\([12]\d{3}\))", self.title) try: - return int(self.title[:date_reg.start() - 1][-1]) + return int(self.title[:year_regex.start() - 1][-1]) except TypeError: return 1 except AttributeError: @@ -86,7 +85,7 @@ class Comic(Scrapable): return @property - def next(self): + def next(self) -> str: ''' returns the url of the next comic in the series. returns None if current''' try: return self.soup.find('img', attrs={'title': 'Next Issue'}).parent.attrs['href'] or None @@ -94,7 +93,7 @@ class Comic(Scrapable): return None @property - def prev(self): + def prev(self) -> str: ''' returns the url of the previous comic in the series. returns None if first''' try: return self.soup.find('img', attrs={'title': 'Previous Issue'}).parent.attrs['href'] @@ -102,7 +101,7 @@ class Comic(Scrapable): return None - def can_remove(self, filename): + def can_remove(self, filename : str) -> bool: return not filename.endswith(required_comic_files) @@ -112,10 +111,10 @@ class ComicArchiver: self.worktree = library if library else os.path.join(library_path, f'comics/{self.comic.title}') self.queue = [] - def add(self, link): + def add(self, link : str) -> None: self.queue.append(link) - def download(self): + def download(self) -> None: if not os.path.exists(self.worktree): os.makedirs(self.worktree, mode=0o777) @@ -163,8 +162,8 @@ if __name__ == '__main__': comic = Comic('http://www.readallcomics.com/static-season-one-4-2021/') # all links # comic = Comic('http://readallcomics.com/static-season-one-001-2021/') # no prev link # comic = Comic('http://readallcomics.com/static-season-one-6-2022/') # no next link - comic = Comic('http://readallcomics.com/superman-vs-lobo-4-2022/') - test_comic_b = 'http://readallcomics.com/captain-marvel-vs-rogue-2021-part-1/' - print(comic.next) - print(comic.prev) - print(comic.issue_number) \ No newline at end of file + # comic = Comic('http://readallcomics.com/superman-vs-lobo-4-2022/') + # test_comic_b = 'http://readallcomics.com/captain-marvel-vs-rogue-2021-part-1/' + # print(comic.next) + # print(comic.prev) + # print(comic.issue_number) \ No newline at end of file diff --git a/yoink/scraper.py b/yoink/scraper.py index 73bc5c8..ea91c6f 100644 --- a/yoink/scraper.py +++ b/yoink/scraper.py @@ -9,7 +9,7 @@ from yoink.common import supported_sites, library_path class Scrapable: - def __init__(self, url) -> None: + def __init__(self, url : str) -> None: self.url = url comic_path = os.path.join(library_path, 'comics') @@ -35,7 +35,7 @@ class Scrapable: def soup(self) -> BeautifulSoup: return BeautifulSoup(self.markup, 'html.parser') - def __check_site_support(self): + def __check_site_support(self) -> None: num_of_sites = len(supported_sites) while num_of_sites > 0: diff --git a/yoink/tests/test_basic.py b/yoink/tests/test_basic.py index f7ae7b0..4ec12c1 100644 --- a/yoink/tests/test_basic.py +++ b/yoink/tests/test_basic.py @@ -14,14 +14,16 @@ class BasicTestCase(unittest.TestCase): def setUp(self): self.test_comic = 'http://readallcomics.com/static-season-one-4-2021/' self.test_comic_b = 'http://readallcomics.com/captain-marvel-vs-rogue-2021-part-1/' - self.comic = Comic(self.test_comic_b) + self.comic = Comic(self.test_comic) self.archiver = ComicArchiver(self.comic) self.remove_queue = [] self.expected_title = 'Static Season One 4 (2021)' self.expected_title_b = 'Captain Marvel vs. Rogue (2021 – Part 1)' self.expected_category = 'Static: Season One' self.expected_category_b = 'Captain Marvel vs. Rogue' - self.expected_issue_num = 1 + self.expected_issue_num = 4 + self.expected_next_url = 'http://readallcomics.com/static-season-one-5-2022/' + self.expected_prev_url = 'http://readallcomics.com/static-season-one-003-2021/' def tearDown(self) -> None: @@ -34,10 +36,10 @@ class BasicTestCase(unittest.TestCase): self.assertTrue('!DOCTYPE html' in str(self.comic.markup)) def test_001_comic_has_valid_title(self): - self.assertEqual(self.expected_title_b, self.comic.title) + self.assertEqual(self.expected_title, self.comic.title) def test_002_comic_has_valid_category(self): - self.assertEqual(self.expected_category_b, self.comic.category) + self.assertEqual(self.expected_category, self.comic.category) def test_003_empty_comic_folder(self): self.assertEqual(len(os.listdir(os.path.join(library_path, 'comics'))), 0) @@ -73,4 +75,10 @@ class BasicTestCase(unittest.TestCase): def test_010_valid_issue_number(self): self.assertIsInstance(self.comic.issue_number, int) self.assertEqual(self.comic.issue_number, self.expected_issue_num) + + def test_011_has_next_link(self): + self.assertEqual(self.comic.next, self.expected_next_url) + + def test_012_has_prev_link(self): + self.assertEqual(self.comic.prev, self.expected_prev_url)