From ce1afe0ef8bd75b8db6e99464fb0454118591ee0 Mon Sep 17 00:00:00 2001 From: Bryan Bailey Date: Sun, 20 Mar 2022 19:26:04 -0400 Subject: [PATCH] added issue_num, prev and next properties; fixed issue with jpg file naming --- yoink/cli.py | 39 +++++++++--------- yoink/comic.py | 62 ++++++++++++++++++++++++++-- yoink/common.py | 5 +-- yoink/scraper.py | 7 ++-- yoink/tests/test_basic.py | 20 ++++++--- yoink/torrent.py | 86 --------------------------------------- 6 files changed, 97 insertions(+), 122 deletions(-) delete mode 100644 yoink/torrent.py diff --git a/yoink/cli.py b/yoink/cli.py index 085fbeb..54d44e4 100644 --- a/yoink/cli.py +++ b/yoink/cli.py @@ -24,37 +24,36 @@ def init(): @yoink.command() -@click.option('-c', '--comic', is_flag=True, help='Download a Comic file') -@click.option('-t', '--torrent', is_flag=True, help='Download a Torrent') +# @click.option('-c', '--comic', is_flag=True, help='Download a Comic file') +# @click.option('-t', '--torrent', is_flag=True, help='Download a Torrent') +@click.option('-s', '--series', is_flag=True, help='Download the entire series') @click.option('-p', '--path', help='Change the download path') @click.argument('url') -def download(url, comic, torrent, path): +def download(url, path, series): # Account for whitespace/blank urls if url.strip() == '': click.echo('url cannot be blank') return 1 - if comic: - try: - comic = Comic(url, path=path if path else None) - except ValueError: - click.echo(f'{url} is not supported or is not a valid URL') - return 1 + try: + comic = Comic(url, path=path if path else None) + except ValueError: + click.echo(f'{url} is not supported or is not a valid URL') + return 1 - click.echo(f'Downloading {comic.title}') - comic.archiver.download() + if series: + comic.generate_series_queue() - click.echo('Building comic archive') - comic.archiver.generate_archive() + click.echo(f'Downloading {comic.title}') + comic.archiver.download() - click.echo('Cleaning up') - comic.archiver.cleanup_worktree() - - click.echo('Success') - - if torrent: - click.echo('Opps! It looks like Torrents aren\'t yet fully supported.') + click.echo('Building comic archive') + comic.archiver.generate_archive() + click.echo('Cleaning up') + comic.archiver.cleanup_worktree() + + click.echo('Success') diff --git a/yoink/comic.py b/yoink/comic.py index c896f21..c72c556 100644 --- a/yoink/comic.py +++ b/yoink/comic.py @@ -4,6 +4,7 @@ from yoink.scraper import Scrapable import os import shutil import urllib +import re @@ -61,6 +62,44 @@ class Comic(Scrapable): data = self.soup.find('a', attrs={'rel': 'category tag'} ) return data.text + @property + def series_list(self) -> list: + queue = [] + + return queue + + @property + def issue_number(self) -> int: + date_reg = re.search("(\([12]\d{3}\))", self.title) + + try: + return int(self.title[:date_reg.start() - 1][-1]) + except TypeError: + return 1 + except AttributeError: + return 1 + + @property + def volume(self) -> int: + return + + @property + def next(self): + ''' returns the url of the next comic in the series. returns None if current''' + try: + return self.soup.find('img', attrs={'title': 'Next Issue'}).parent.attrs['href'] or None + except AttributeError: + return None + + @property + def prev(self): + ''' returns the url of the previous comic in the series. returns None if first''' + try: + return self.soup.find('img', attrs={'title': 'Previous Issue'}).parent.attrs['href'] + except AttributeError: + return None + + def can_remove(self, filename): return not filename.endswith(required_comic_files) @@ -69,6 +108,10 @@ class ComicArchiver: def __init__(self, comic : Comic, library=None) -> None: self.comic = comic self.worktree = library if library else os.path.join(library_path, f'comics/{self.comic.title}') + self.queue = [] + + def add(self, link): + self.queue.append(link) def download(self): @@ -78,6 +121,7 @@ class ComicArchiver: opener = urllib.request.build_opener() opener.addheaders = [('User-Agent', 'Mozilla/5.0')] urllib.request.install_opener(opener) + print('\n') for index,url in enumerate(self.comic.filelist): @@ -88,8 +132,12 @@ class ComicArchiver: else: page_number = str(index).zfill(3) file_extension = url.split('/')[-1].split('.')[1] + + if len(file_extension) > 3: + file_extension = 'jpg' + formatted_file = f'{self.comic.title} - {page_number}.{file_extension}' - print(formatted_file, end='\r') + print(formatted_file, end='\r',) urllib.request.urlretrieve(url, filename=os.path.join(self.worktree, formatted_file)) print() @@ -99,7 +147,7 @@ class ComicArchiver: if os.path.exists(os.path.join(self.worktree, f'{self.comic.title}{archive_format}')): return - output = shutil.make_archive(os.path.join(self.worktree, self.comic.title), 'zip', self.worktree, self.worktree) + output = shutil.make_archive(self.comic.title, 'zip', self.worktree, self.worktree) os.rename(output, os.path.join(self.worktree, f'{self.comic.title}{archive_format}')) @@ -109,5 +157,11 @@ class ComicArchiver: os.remove(os.path.join(self.worktree, image)) if __name__ == '__main__': - comic = Comic('http://www.readallcomics.com/static-season-one-4-2021/') - print(comic.category) \ No newline at end of file + comic = Comic('http://www.readallcomics.com/static-season-one-4-2021/') # all links + # comic = Comic('http://readallcomics.com/static-season-one-001-2021/') # no prev link + # comic = Comic('http://readallcomics.com/static-season-one-6-2022/') # no next link + comic = Comic('http://readallcomics.com/superman-vs-lobo-4-2022/') + test_comic_b = 'http://readallcomics.com/captain-marvel-vs-rogue-2021-part-1/' + print(comic.next) + print(comic.prev) + print(comic.issue_number) \ No newline at end of file diff --git a/yoink/common.py b/yoink/common.py index 146b7eb..59d13bb 100644 --- a/yoink/common.py +++ b/yoink/common.py @@ -1,9 +1,7 @@ -import pathlib - - import pathlib # TODO replace os path with pathlib import os +from enum import Enum, auto @@ -12,6 +10,5 @@ config_path = os.path.abspath(os.path.join(os.environ.get('HOME'), '.config/yoin library_path = os.path.abspath(os.path.join(os.environ.get('HOME'), 'yoink/library')) required_comic_files = ('.cbr', '.cbz', '000.jpg', '001.jpg') skippable_images = ('logo-1.png', 'logo.png', 'report.png', 'request.png', 'prev.png', 'Next.png', 'Donate.png', '11.png', 'navbar.svg') -torrent_concurrent_download_limit = 1 supported_sites = ['readallcomics.com', 'tpb.party', 'dragonballsupermanga.net', 'mangadex.tv'] headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36'} diff --git a/yoink/scraper.py b/yoink/scraper.py index de060b2..73bc5c8 100644 --- a/yoink/scraper.py +++ b/yoink/scraper.py @@ -1,7 +1,9 @@ -import os import requests from bs4 import BeautifulSoup +import os +from enum import Enum, auto + from yoink.common import supported_sites, library_path @@ -10,11 +12,10 @@ class Scrapable: def __init__(self, url) -> None: self.url = url comic_path = os.path.join(library_path, 'comics') + if not os.path.exists(comic_path): os.makedirs(comic_path) - - self.__check_site_support() diff --git a/yoink/tests/test_basic.py b/yoink/tests/test_basic.py index e236a01..f7ae7b0 100644 --- a/yoink/tests/test_basic.py +++ b/yoink/tests/test_basic.py @@ -4,7 +4,7 @@ import os import unittest from shutil import rmtree -from yoink.common import app_root, library_path, config_path, skippable_images, supported_sites, required_comic_files, torrent_concurrent_download_limit, headers +from yoink.common import app_root, library_path, config_path, skippable_images, supported_sites, required_comic_files from yoink.comic import Comic, ComicArchiver from yoink.scraper import Scrapable @@ -13,9 +13,15 @@ from yoink.scraper import Scrapable class BasicTestCase(unittest.TestCase): def setUp(self): self.test_comic = 'http://readallcomics.com/static-season-one-4-2021/' - self.comic = Comic(self.test_comic) + self.test_comic_b = 'http://readallcomics.com/captain-marvel-vs-rogue-2021-part-1/' + self.comic = Comic(self.test_comic_b) self.archiver = ComicArchiver(self.comic) self.remove_queue = [] + self.expected_title = 'Static Season One 4 (2021)' + self.expected_title_b = 'Captain Marvel vs. Rogue (2021 – Part 1)' + self.expected_category = 'Static: Season One' + self.expected_category_b = 'Captain Marvel vs. Rogue' + self.expected_issue_num = 1 def tearDown(self) -> None: @@ -28,10 +34,10 @@ class BasicTestCase(unittest.TestCase): self.assertTrue('!DOCTYPE html' in str(self.comic.markup)) def test_001_comic_has_valid_title(self): - self.assertEqual('Static Season One 4 (2021)', self.comic.title) + self.assertEqual(self.expected_title_b, self.comic.title) def test_002_comic_has_valid_category(self): - self.assertEqual('Static: Season One', self.comic.category) + self.assertEqual(self.expected_category_b, self.comic.category) def test_003_empty_comic_folder(self): self.assertEqual(len(os.listdir(os.path.join(library_path, 'comics'))), 0) @@ -47,7 +53,7 @@ class BasicTestCase(unittest.TestCase): def test_006_folder_cleaned_after_archive_generation(self): self.archiver.cleanup_worktree() - self.assertAlmostEqual(len(os.listdir(os.path.join(library_path, f'comics/{self.comic.title}'))), 3) + self.assertLessEqual(len(os.listdir(os.path.join(library_path, f'comics/{self.comic.title}'))), 3) def test_007_comic_instance_has_archiver(self): self.assertIsInstance(self.comic.archiver, ComicArchiver) @@ -63,4 +69,8 @@ class BasicTestCase(unittest.TestCase): self.assertTrue('Unsupported' in str(condition.exception)) self.remove_queue.append(os.path.join(library_path, f'comics/{self.comic.title}')) + + def test_010_valid_issue_number(self): + self.assertIsInstance(self.comic.issue_number, int) + self.assertEqual(self.comic.issue_number, self.expected_issue_num) diff --git a/yoink/torrent.py b/yoink/torrent.py deleted file mode 100644 index 6ad05ab..0000000 --- a/yoink/torrent.py +++ /dev/null @@ -1,86 +0,0 @@ -from bs4 import BeautifulSoup -import requests - -import os - -from yoink.common import library_path, config_path, app_root, headers -from yoink.scraper import Scrapable - - - -stopped_state = ('pausedUP', 'stalledUP', 'uploading', 'seeding') - - - - - - -class TorrentDownloader: - def __init__(self) -> None: - self.limit = 1 - self.queue = [] - self.download_path = self.set_path(os.path.join(library_path, 'downloads')) - - @classmethod - def create_torrent(cls, url): - return Torrent(url) - - # @classmethod - # def get_torrent(cls, name): - # return [torrent for torrent in new_downloader.torrents() if name == torrent['name']][0] - - @classmethod - def quick_download(cls, url): - if not isinstance(url, str): - raise TypeError('URL string expected') - - if not url.startswith('magnet'): - markup = requests.get(url, headers=headers).content - soup = BeautifulSoup(markup, 'html.parser') - magnet_link = soup.find('a', attrs={'title': 'Get this torrent'}.attrs['href']) - - - - - def set_path(self, path): - if path.strip() == '': raise ValueError('Path cannot be an empty string') - - if not os.path.exists(path): - os.makedirs(path) - - return path - - def empty_queue(self): - self.queue = [] - - def add(self, torrent): - if not isinstance(torrent, Torrent): - raise TypeError('Not a valid torrent') - - self.queue.append(torrent) - - # TODO separate download method into new thread - def download(self): - while len(self.queue) > 0: - for torrent in self.queue: - if not isinstance(torrent, Torrent): - raise TypeError('Not a valid torrent') - - print(torrent.magnet_link) - - - -downloader = TorrentDownloader() - - - -class Torrent(Scrapable): - def __init__(self, url) -> None: - super().__init__(url) - - - @property - def name(self) -> str: return self.soup.find('div', attrs={'id': 'title'}) - - @property - def magnet_link(self) -> str: return self.soup.find('a', attrs={'title': 'Get this torrent'}).attrs['href']