added -s/--series flag for downloading multiple comics related to original url
This commit is contained in:
44
yoink/cli.py
44
yoink/cli.py
@@ -1,5 +1,6 @@
|
|||||||
from email.policy import default
|
from email.policy import default
|
||||||
import os
|
import os
|
||||||
|
from subprocess import check_output
|
||||||
import sys
|
import sys
|
||||||
import click
|
import click
|
||||||
from click_default_group import DefaultGroup
|
from click_default_group import DefaultGroup
|
||||||
@@ -11,6 +12,29 @@ from yoink.comic import Comic
|
|||||||
|
|
||||||
queue = []
|
queue = []
|
||||||
|
|
||||||
|
|
||||||
|
def download_comic(url, path, series):
|
||||||
|
try:
|
||||||
|
comic = Comic(url, path=path if path else None)
|
||||||
|
except ValueError:
|
||||||
|
click.echo(f'{url} is not supported or is not a valid URL')
|
||||||
|
return 1
|
||||||
|
|
||||||
|
click.echo(f'Downloading {comic.title}')
|
||||||
|
comic.archiver.download()
|
||||||
|
|
||||||
|
click.echo('Building comic archive')
|
||||||
|
comic.archiver.generate_archive()
|
||||||
|
|
||||||
|
click.echo('Cleaning up')
|
||||||
|
comic.archiver.cleanup_worktree()
|
||||||
|
|
||||||
|
click.echo('Success')
|
||||||
|
|
||||||
|
if series and comic.next:
|
||||||
|
download_comic(comic.next, path, series)
|
||||||
|
|
||||||
|
|
||||||
@click.group(cls=DefaultGroup, default='download', default_if_no_args=True)
|
@click.group(cls=DefaultGroup, default='download', default_if_no_args=True)
|
||||||
def yoink():
|
def yoink():
|
||||||
pass
|
pass
|
||||||
@@ -35,25 +59,7 @@ def download(url, path, series):
|
|||||||
click.echo('url cannot be blank')
|
click.echo('url cannot be blank')
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
try:
|
download_comic(url, path, series)
|
||||||
comic = Comic(url, path=path if path else None)
|
|
||||||
except ValueError:
|
|
||||||
click.echo(f'{url} is not supported or is not a valid URL')
|
|
||||||
return 1
|
|
||||||
|
|
||||||
if series:
|
|
||||||
comic.generate_series_queue()
|
|
||||||
|
|
||||||
click.echo(f'Downloading {comic.title}')
|
|
||||||
comic.archiver.download()
|
|
||||||
|
|
||||||
click.echo('Building comic archive')
|
|
||||||
comic.archiver.generate_archive()
|
|
||||||
|
|
||||||
click.echo('Cleaning up')
|
|
||||||
comic.archiver.cleanup_worktree()
|
|
||||||
|
|
||||||
click.echo('Success')
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ class Comic(Scrapable):
|
|||||||
return image.endswith('.jpg' or '.jpeg')
|
return image.endswith('.jpg' or '.jpeg')
|
||||||
|
|
||||||
|
|
||||||
def __get_image_src(self, comic):
|
def __get_image_src(self, comic) -> str:
|
||||||
if comic.attrs:
|
if comic.attrs:
|
||||||
try:
|
try:
|
||||||
return comic.attrs['src']
|
return comic.attrs['src']
|
||||||
@@ -27,7 +27,7 @@ class Comic(Scrapable):
|
|||||||
for image in comic:
|
for image in comic:
|
||||||
return image.attrs['src']
|
return image.attrs['src']
|
||||||
|
|
||||||
def __parse_soup(self):
|
def __parse_soup(self) -> list:
|
||||||
soup = {
|
soup = {
|
||||||
'default': self.soup.find_all('div', class_='separator'),
|
'default': self.soup.find_all('div', class_='separator'),
|
||||||
'no-div': self.soup.find_all('img', attrs={'width': '1000px'}),
|
'no-div': self.soup.find_all('img', attrs={'width': '1000px'}),
|
||||||
@@ -43,13 +43,13 @@ class Comic(Scrapable):
|
|||||||
return comics
|
return comics
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def filelist(self):
|
def filelist(self) -> list:
|
||||||
comics = self.__parse_soup()
|
comics = self.__parse_soup()
|
||||||
return [comic for comic in list(map(self.__get_image_src, comics)) if not comic.endswith(skippable_images)]
|
return [comic for comic in list(map(self.__get_image_src, comics)) if not comic.endswith(skippable_images)]
|
||||||
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def title(self):
|
def title(self) -> str:
|
||||||
if 'readallcomics' in self.url:
|
if 'readallcomics' in self.url:
|
||||||
return self.soup.title.string.replace(' | Read All Comics Online For Free', '').replace('…', '').replace('#', '').replace(':', '').strip()
|
return self.soup.title.string.replace(' | Read All Comics Online For Free', '').replace('…', '').replace('#', '').replace(':', '').strip()
|
||||||
elif 'mangadex' in self.url:
|
elif 'mangadex' in self.url:
|
||||||
@@ -58,7 +58,7 @@ class Comic(Scrapable):
|
|||||||
return 'Uncategorized'
|
return 'Uncategorized'
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def category(self):
|
def category(self) -> str:
|
||||||
data = self.soup.find('a', attrs={'rel': 'category tag'} )
|
data = self.soup.find('a', attrs={'rel': 'category tag'} )
|
||||||
return data.text
|
return data.text
|
||||||
|
|
||||||
@@ -71,11 +71,10 @@ class Comic(Scrapable):
|
|||||||
@property
|
@property
|
||||||
def issue_number(self) -> int:
|
def issue_number(self) -> int:
|
||||||
# matches any year in parentheses (xxxx)
|
# matches any year in parentheses (xxxx)
|
||||||
# TODO yoink/comic.py:74: DeprecationWarning: invalid escape sequence '\('
|
year_regex = re.search("(\([12]\d{3}\))", self.title)
|
||||||
date_reg = re.search("(\([12]\d{3}\))", self.title)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return int(self.title[:date_reg.start() - 1][-1])
|
return int(self.title[:year_regex.start() - 1][-1])
|
||||||
except TypeError:
|
except TypeError:
|
||||||
return 1
|
return 1
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
@@ -86,7 +85,7 @@ class Comic(Scrapable):
|
|||||||
return
|
return
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def next(self):
|
def next(self) -> str:
|
||||||
''' returns the url of the next comic in the series. returns None if current'''
|
''' returns the url of the next comic in the series. returns None if current'''
|
||||||
try:
|
try:
|
||||||
return self.soup.find('img', attrs={'title': 'Next Issue'}).parent.attrs['href'] or None
|
return self.soup.find('img', attrs={'title': 'Next Issue'}).parent.attrs['href'] or None
|
||||||
@@ -94,7 +93,7 @@ class Comic(Scrapable):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def prev(self):
|
def prev(self) -> str:
|
||||||
''' returns the url of the previous comic in the series. returns None if first'''
|
''' returns the url of the previous comic in the series. returns None if first'''
|
||||||
try:
|
try:
|
||||||
return self.soup.find('img', attrs={'title': 'Previous Issue'}).parent.attrs['href']
|
return self.soup.find('img', attrs={'title': 'Previous Issue'}).parent.attrs['href']
|
||||||
@@ -102,7 +101,7 @@ class Comic(Scrapable):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def can_remove(self, filename):
|
def can_remove(self, filename : str) -> bool:
|
||||||
return not filename.endswith(required_comic_files)
|
return not filename.endswith(required_comic_files)
|
||||||
|
|
||||||
|
|
||||||
@@ -112,10 +111,10 @@ class ComicArchiver:
|
|||||||
self.worktree = library if library else os.path.join(library_path, f'comics/{self.comic.title}')
|
self.worktree = library if library else os.path.join(library_path, f'comics/{self.comic.title}')
|
||||||
self.queue = []
|
self.queue = []
|
||||||
|
|
||||||
def add(self, link):
|
def add(self, link : str) -> None:
|
||||||
self.queue.append(link)
|
self.queue.append(link)
|
||||||
|
|
||||||
def download(self):
|
def download(self) -> None:
|
||||||
|
|
||||||
if not os.path.exists(self.worktree):
|
if not os.path.exists(self.worktree):
|
||||||
os.makedirs(self.worktree, mode=0o777)
|
os.makedirs(self.worktree, mode=0o777)
|
||||||
@@ -163,8 +162,8 @@ if __name__ == '__main__':
|
|||||||
comic = Comic('http://www.readallcomics.com/static-season-one-4-2021/') # all links
|
comic = Comic('http://www.readallcomics.com/static-season-one-4-2021/') # all links
|
||||||
# comic = Comic('http://readallcomics.com/static-season-one-001-2021/') # no prev link
|
# comic = Comic('http://readallcomics.com/static-season-one-001-2021/') # no prev link
|
||||||
# comic = Comic('http://readallcomics.com/static-season-one-6-2022/') # no next link
|
# comic = Comic('http://readallcomics.com/static-season-one-6-2022/') # no next link
|
||||||
comic = Comic('http://readallcomics.com/superman-vs-lobo-4-2022/')
|
# comic = Comic('http://readallcomics.com/superman-vs-lobo-4-2022/')
|
||||||
test_comic_b = 'http://readallcomics.com/captain-marvel-vs-rogue-2021-part-1/'
|
# test_comic_b = 'http://readallcomics.com/captain-marvel-vs-rogue-2021-part-1/'
|
||||||
print(comic.next)
|
# print(comic.next)
|
||||||
print(comic.prev)
|
# print(comic.prev)
|
||||||
print(comic.issue_number)
|
# print(comic.issue_number)
|
||||||
@@ -9,7 +9,7 @@ from yoink.common import supported_sites, library_path
|
|||||||
|
|
||||||
|
|
||||||
class Scrapable:
|
class Scrapable:
|
||||||
def __init__(self, url) -> None:
|
def __init__(self, url : str) -> None:
|
||||||
self.url = url
|
self.url = url
|
||||||
comic_path = os.path.join(library_path, 'comics')
|
comic_path = os.path.join(library_path, 'comics')
|
||||||
|
|
||||||
@@ -35,7 +35,7 @@ class Scrapable:
|
|||||||
def soup(self) -> BeautifulSoup: return BeautifulSoup(self.markup, 'html.parser')
|
def soup(self) -> BeautifulSoup: return BeautifulSoup(self.markup, 'html.parser')
|
||||||
|
|
||||||
|
|
||||||
def __check_site_support(self):
|
def __check_site_support(self) -> None:
|
||||||
num_of_sites = len(supported_sites)
|
num_of_sites = len(supported_sites)
|
||||||
|
|
||||||
while num_of_sites > 0:
|
while num_of_sites > 0:
|
||||||
|
|||||||
@@ -14,14 +14,16 @@ class BasicTestCase(unittest.TestCase):
|
|||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.test_comic = 'http://readallcomics.com/static-season-one-4-2021/'
|
self.test_comic = 'http://readallcomics.com/static-season-one-4-2021/'
|
||||||
self.test_comic_b = 'http://readallcomics.com/captain-marvel-vs-rogue-2021-part-1/'
|
self.test_comic_b = 'http://readallcomics.com/captain-marvel-vs-rogue-2021-part-1/'
|
||||||
self.comic = Comic(self.test_comic_b)
|
self.comic = Comic(self.test_comic)
|
||||||
self.archiver = ComicArchiver(self.comic)
|
self.archiver = ComicArchiver(self.comic)
|
||||||
self.remove_queue = []
|
self.remove_queue = []
|
||||||
self.expected_title = 'Static Season One 4 (2021)'
|
self.expected_title = 'Static Season One 4 (2021)'
|
||||||
self.expected_title_b = 'Captain Marvel vs. Rogue (2021 – Part 1)'
|
self.expected_title_b = 'Captain Marvel vs. Rogue (2021 – Part 1)'
|
||||||
self.expected_category = 'Static: Season One'
|
self.expected_category = 'Static: Season One'
|
||||||
self.expected_category_b = 'Captain Marvel vs. Rogue'
|
self.expected_category_b = 'Captain Marvel vs. Rogue'
|
||||||
self.expected_issue_num = 1
|
self.expected_issue_num = 4
|
||||||
|
self.expected_next_url = 'http://readallcomics.com/static-season-one-5-2022/'
|
||||||
|
self.expected_prev_url = 'http://readallcomics.com/static-season-one-003-2021/'
|
||||||
|
|
||||||
|
|
||||||
def tearDown(self) -> None:
|
def tearDown(self) -> None:
|
||||||
@@ -34,10 +36,10 @@ class BasicTestCase(unittest.TestCase):
|
|||||||
self.assertTrue('!DOCTYPE html' in str(self.comic.markup))
|
self.assertTrue('!DOCTYPE html' in str(self.comic.markup))
|
||||||
|
|
||||||
def test_001_comic_has_valid_title(self):
|
def test_001_comic_has_valid_title(self):
|
||||||
self.assertEqual(self.expected_title_b, self.comic.title)
|
self.assertEqual(self.expected_title, self.comic.title)
|
||||||
|
|
||||||
def test_002_comic_has_valid_category(self):
|
def test_002_comic_has_valid_category(self):
|
||||||
self.assertEqual(self.expected_category_b, self.comic.category)
|
self.assertEqual(self.expected_category, self.comic.category)
|
||||||
|
|
||||||
def test_003_empty_comic_folder(self):
|
def test_003_empty_comic_folder(self):
|
||||||
self.assertEqual(len(os.listdir(os.path.join(library_path, 'comics'))), 0)
|
self.assertEqual(len(os.listdir(os.path.join(library_path, 'comics'))), 0)
|
||||||
@@ -74,3 +76,9 @@ class BasicTestCase(unittest.TestCase):
|
|||||||
self.assertIsInstance(self.comic.issue_number, int)
|
self.assertIsInstance(self.comic.issue_number, int)
|
||||||
self.assertEqual(self.comic.issue_number, self.expected_issue_num)
|
self.assertEqual(self.comic.issue_number, self.expected_issue_num)
|
||||||
|
|
||||||
|
def test_011_has_next_link(self):
|
||||||
|
self.assertEqual(self.comic.next, self.expected_next_url)
|
||||||
|
|
||||||
|
def test_012_has_prev_link(self):
|
||||||
|
self.assertEqual(self.comic.prev, self.expected_prev_url)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user