added issue_num, prev and next properties; fixed issue with jpg file naming
This commit is contained in:
15
yoink/cli.py
15
yoink/cli.py
@@ -24,23 +24,26 @@ def init():
|
|||||||
|
|
||||||
|
|
||||||
@yoink.command()
|
@yoink.command()
|
||||||
@click.option('-c', '--comic', is_flag=True, help='Download a Comic file')
|
# @click.option('-c', '--comic', is_flag=True, help='Download a Comic file')
|
||||||
@click.option('-t', '--torrent', is_flag=True, help='Download a Torrent')
|
# @click.option('-t', '--torrent', is_flag=True, help='Download a Torrent')
|
||||||
|
@click.option('-s', '--series', is_flag=True, help='Download the entire series')
|
||||||
@click.option('-p', '--path', help='Change the download path')
|
@click.option('-p', '--path', help='Change the download path')
|
||||||
@click.argument('url')
|
@click.argument('url')
|
||||||
def download(url, comic, torrent, path):
|
def download(url, path, series):
|
||||||
# Account for whitespace/blank urls
|
# Account for whitespace/blank urls
|
||||||
if url.strip() == '':
|
if url.strip() == '':
|
||||||
click.echo('url cannot be blank')
|
click.echo('url cannot be blank')
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
if comic:
|
|
||||||
try:
|
try:
|
||||||
comic = Comic(url, path=path if path else None)
|
comic = Comic(url, path=path if path else None)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
click.echo(f'{url} is not supported or is not a valid URL')
|
click.echo(f'{url} is not supported or is not a valid URL')
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
|
if series:
|
||||||
|
comic.generate_series_queue()
|
||||||
|
|
||||||
click.echo(f'Downloading {comic.title}')
|
click.echo(f'Downloading {comic.title}')
|
||||||
comic.archiver.download()
|
comic.archiver.download()
|
||||||
|
|
||||||
@@ -52,10 +55,6 @@ def download(url, comic, torrent, path):
|
|||||||
|
|
||||||
click.echo('Success')
|
click.echo('Success')
|
||||||
|
|
||||||
if torrent:
|
|
||||||
click.echo('Opps! It looks like Torrents aren\'t yet fully supported.')
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__=='__main__':
|
if __name__=='__main__':
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ from yoink.scraper import Scrapable
|
|||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
import urllib
|
import urllib
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -61,6 +62,44 @@ class Comic(Scrapable):
|
|||||||
data = self.soup.find('a', attrs={'rel': 'category tag'} )
|
data = self.soup.find('a', attrs={'rel': 'category tag'} )
|
||||||
return data.text
|
return data.text
|
||||||
|
|
||||||
|
@property
|
||||||
|
def series_list(self) -> list:
|
||||||
|
queue = []
|
||||||
|
|
||||||
|
return queue
|
||||||
|
|
||||||
|
@property
|
||||||
|
def issue_number(self) -> int:
|
||||||
|
date_reg = re.search("(\([12]\d{3}\))", self.title)
|
||||||
|
|
||||||
|
try:
|
||||||
|
return int(self.title[:date_reg.start() - 1][-1])
|
||||||
|
except TypeError:
|
||||||
|
return 1
|
||||||
|
except AttributeError:
|
||||||
|
return 1
|
||||||
|
|
||||||
|
@property
|
||||||
|
def volume(self) -> int:
|
||||||
|
return
|
||||||
|
|
||||||
|
@property
|
||||||
|
def next(self):
|
||||||
|
''' returns the url of the next comic in the series. returns None if current'''
|
||||||
|
try:
|
||||||
|
return self.soup.find('img', attrs={'title': 'Next Issue'}).parent.attrs['href'] or None
|
||||||
|
except AttributeError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def prev(self):
|
||||||
|
''' returns the url of the previous comic in the series. returns None if first'''
|
||||||
|
try:
|
||||||
|
return self.soup.find('img', attrs={'title': 'Previous Issue'}).parent.attrs['href']
|
||||||
|
except AttributeError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def can_remove(self, filename):
|
def can_remove(self, filename):
|
||||||
return not filename.endswith(required_comic_files)
|
return not filename.endswith(required_comic_files)
|
||||||
|
|
||||||
@@ -69,6 +108,10 @@ class ComicArchiver:
|
|||||||
def __init__(self, comic : Comic, library=None) -> None:
|
def __init__(self, comic : Comic, library=None) -> None:
|
||||||
self.comic = comic
|
self.comic = comic
|
||||||
self.worktree = library if library else os.path.join(library_path, f'comics/{self.comic.title}')
|
self.worktree = library if library else os.path.join(library_path, f'comics/{self.comic.title}')
|
||||||
|
self.queue = []
|
||||||
|
|
||||||
|
def add(self, link):
|
||||||
|
self.queue.append(link)
|
||||||
|
|
||||||
def download(self):
|
def download(self):
|
||||||
|
|
||||||
@@ -78,6 +121,7 @@ class ComicArchiver:
|
|||||||
opener = urllib.request.build_opener()
|
opener = urllib.request.build_opener()
|
||||||
opener.addheaders = [('User-Agent', 'Mozilla/5.0')]
|
opener.addheaders = [('User-Agent', 'Mozilla/5.0')]
|
||||||
urllib.request.install_opener(opener)
|
urllib.request.install_opener(opener)
|
||||||
|
print('\n')
|
||||||
|
|
||||||
for index,url in enumerate(self.comic.filelist):
|
for index,url in enumerate(self.comic.filelist):
|
||||||
|
|
||||||
@@ -88,8 +132,12 @@ class ComicArchiver:
|
|||||||
else:
|
else:
|
||||||
page_number = str(index).zfill(3)
|
page_number = str(index).zfill(3)
|
||||||
file_extension = url.split('/')[-1].split('.')[1]
|
file_extension = url.split('/')[-1].split('.')[1]
|
||||||
|
|
||||||
|
if len(file_extension) > 3:
|
||||||
|
file_extension = 'jpg'
|
||||||
|
|
||||||
formatted_file = f'{self.comic.title} - {page_number}.{file_extension}'
|
formatted_file = f'{self.comic.title} - {page_number}.{file_extension}'
|
||||||
print(formatted_file, end='\r')
|
print(formatted_file, end='\r',)
|
||||||
urllib.request.urlretrieve(url, filename=os.path.join(self.worktree, formatted_file))
|
urllib.request.urlretrieve(url, filename=os.path.join(self.worktree, formatted_file))
|
||||||
print()
|
print()
|
||||||
|
|
||||||
@@ -99,7 +147,7 @@ class ComicArchiver:
|
|||||||
if os.path.exists(os.path.join(self.worktree, f'{self.comic.title}{archive_format}')):
|
if os.path.exists(os.path.join(self.worktree, f'{self.comic.title}{archive_format}')):
|
||||||
return
|
return
|
||||||
|
|
||||||
output = shutil.make_archive(os.path.join(self.worktree, self.comic.title), 'zip', self.worktree, self.worktree)
|
output = shutil.make_archive(self.comic.title, 'zip', self.worktree, self.worktree)
|
||||||
os.rename(output, os.path.join(self.worktree, f'{self.comic.title}{archive_format}'))
|
os.rename(output, os.path.join(self.worktree, f'{self.comic.title}{archive_format}'))
|
||||||
|
|
||||||
|
|
||||||
@@ -109,5 +157,11 @@ class ComicArchiver:
|
|||||||
os.remove(os.path.join(self.worktree, image))
|
os.remove(os.path.join(self.worktree, image))
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
comic = Comic('http://www.readallcomics.com/static-season-one-4-2021/')
|
comic = Comic('http://www.readallcomics.com/static-season-one-4-2021/') # all links
|
||||||
print(comic.category)
|
# comic = Comic('http://readallcomics.com/static-season-one-001-2021/') # no prev link
|
||||||
|
# comic = Comic('http://readallcomics.com/static-season-one-6-2022/') # no next link
|
||||||
|
comic = Comic('http://readallcomics.com/superman-vs-lobo-4-2022/')
|
||||||
|
test_comic_b = 'http://readallcomics.com/captain-marvel-vs-rogue-2021-part-1/'
|
||||||
|
print(comic.next)
|
||||||
|
print(comic.prev)
|
||||||
|
print(comic.issue_number)
|
||||||
@@ -1,9 +1,7 @@
|
|||||||
import pathlib
|
|
||||||
|
|
||||||
|
|
||||||
import pathlib
|
import pathlib
|
||||||
# TODO replace os path with pathlib
|
# TODO replace os path with pathlib
|
||||||
import os
|
import os
|
||||||
|
from enum import Enum, auto
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -12,6 +10,5 @@ config_path = os.path.abspath(os.path.join(os.environ.get('HOME'), '.config/yoin
|
|||||||
library_path = os.path.abspath(os.path.join(os.environ.get('HOME'), 'yoink/library'))
|
library_path = os.path.abspath(os.path.join(os.environ.get('HOME'), 'yoink/library'))
|
||||||
required_comic_files = ('.cbr', '.cbz', '000.jpg', '001.jpg')
|
required_comic_files = ('.cbr', '.cbz', '000.jpg', '001.jpg')
|
||||||
skippable_images = ('logo-1.png', 'logo.png', 'report.png', 'request.png', 'prev.png', 'Next.png', 'Donate.png', '11.png', 'navbar.svg')
|
skippable_images = ('logo-1.png', 'logo.png', 'report.png', 'request.png', 'prev.png', 'Next.png', 'Donate.png', '11.png', 'navbar.svg')
|
||||||
torrent_concurrent_download_limit = 1
|
|
||||||
supported_sites = ['readallcomics.com', 'tpb.party', 'dragonballsupermanga.net', 'mangadex.tv']
|
supported_sites = ['readallcomics.com', 'tpb.party', 'dragonballsupermanga.net', 'mangadex.tv']
|
||||||
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36'}
|
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36'}
|
||||||
|
|||||||
@@ -1,7 +1,9 @@
|
|||||||
import os
|
|
||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
import os
|
||||||
|
from enum import Enum, auto
|
||||||
|
|
||||||
from yoink.common import supported_sites, library_path
|
from yoink.common import supported_sites, library_path
|
||||||
|
|
||||||
|
|
||||||
@@ -10,11 +12,10 @@ class Scrapable:
|
|||||||
def __init__(self, url) -> None:
|
def __init__(self, url) -> None:
|
||||||
self.url = url
|
self.url = url
|
||||||
comic_path = os.path.join(library_path, 'comics')
|
comic_path = os.path.join(library_path, 'comics')
|
||||||
|
|
||||||
if not os.path.exists(comic_path):
|
if not os.path.exists(comic_path):
|
||||||
os.makedirs(comic_path)
|
os.makedirs(comic_path)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
self.__check_site_support()
|
self.__check_site_support()
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ import os
|
|||||||
import unittest
|
import unittest
|
||||||
from shutil import rmtree
|
from shutil import rmtree
|
||||||
|
|
||||||
from yoink.common import app_root, library_path, config_path, skippable_images, supported_sites, required_comic_files, torrent_concurrent_download_limit, headers
|
from yoink.common import app_root, library_path, config_path, skippable_images, supported_sites, required_comic_files
|
||||||
from yoink.comic import Comic, ComicArchiver
|
from yoink.comic import Comic, ComicArchiver
|
||||||
from yoink.scraper import Scrapable
|
from yoink.scraper import Scrapable
|
||||||
|
|
||||||
@@ -13,9 +13,15 @@ from yoink.scraper import Scrapable
|
|||||||
class BasicTestCase(unittest.TestCase):
|
class BasicTestCase(unittest.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.test_comic = 'http://readallcomics.com/static-season-one-4-2021/'
|
self.test_comic = 'http://readallcomics.com/static-season-one-4-2021/'
|
||||||
self.comic = Comic(self.test_comic)
|
self.test_comic_b = 'http://readallcomics.com/captain-marvel-vs-rogue-2021-part-1/'
|
||||||
|
self.comic = Comic(self.test_comic_b)
|
||||||
self.archiver = ComicArchiver(self.comic)
|
self.archiver = ComicArchiver(self.comic)
|
||||||
self.remove_queue = []
|
self.remove_queue = []
|
||||||
|
self.expected_title = 'Static Season One 4 (2021)'
|
||||||
|
self.expected_title_b = 'Captain Marvel vs. Rogue (2021 – Part 1)'
|
||||||
|
self.expected_category = 'Static: Season One'
|
||||||
|
self.expected_category_b = 'Captain Marvel vs. Rogue'
|
||||||
|
self.expected_issue_num = 1
|
||||||
|
|
||||||
|
|
||||||
def tearDown(self) -> None:
|
def tearDown(self) -> None:
|
||||||
@@ -28,10 +34,10 @@ class BasicTestCase(unittest.TestCase):
|
|||||||
self.assertTrue('!DOCTYPE html' in str(self.comic.markup))
|
self.assertTrue('!DOCTYPE html' in str(self.comic.markup))
|
||||||
|
|
||||||
def test_001_comic_has_valid_title(self):
|
def test_001_comic_has_valid_title(self):
|
||||||
self.assertEqual('Static Season One 4 (2021)', self.comic.title)
|
self.assertEqual(self.expected_title_b, self.comic.title)
|
||||||
|
|
||||||
def test_002_comic_has_valid_category(self):
|
def test_002_comic_has_valid_category(self):
|
||||||
self.assertEqual('Static: Season One', self.comic.category)
|
self.assertEqual(self.expected_category_b, self.comic.category)
|
||||||
|
|
||||||
def test_003_empty_comic_folder(self):
|
def test_003_empty_comic_folder(self):
|
||||||
self.assertEqual(len(os.listdir(os.path.join(library_path, 'comics'))), 0)
|
self.assertEqual(len(os.listdir(os.path.join(library_path, 'comics'))), 0)
|
||||||
@@ -47,7 +53,7 @@ class BasicTestCase(unittest.TestCase):
|
|||||||
|
|
||||||
def test_006_folder_cleaned_after_archive_generation(self):
|
def test_006_folder_cleaned_after_archive_generation(self):
|
||||||
self.archiver.cleanup_worktree()
|
self.archiver.cleanup_worktree()
|
||||||
self.assertAlmostEqual(len(os.listdir(os.path.join(library_path, f'comics/{self.comic.title}'))), 3)
|
self.assertLessEqual(len(os.listdir(os.path.join(library_path, f'comics/{self.comic.title}'))), 3)
|
||||||
|
|
||||||
def test_007_comic_instance_has_archiver(self):
|
def test_007_comic_instance_has_archiver(self):
|
||||||
self.assertIsInstance(self.comic.archiver, ComicArchiver)
|
self.assertIsInstance(self.comic.archiver, ComicArchiver)
|
||||||
@@ -64,3 +70,7 @@ class BasicTestCase(unittest.TestCase):
|
|||||||
|
|
||||||
self.remove_queue.append(os.path.join(library_path, f'comics/{self.comic.title}'))
|
self.remove_queue.append(os.path.join(library_path, f'comics/{self.comic.title}'))
|
||||||
|
|
||||||
|
def test_010_valid_issue_number(self):
|
||||||
|
self.assertIsInstance(self.comic.issue_number, int)
|
||||||
|
self.assertEqual(self.comic.issue_number, self.expected_issue_num)
|
||||||
|
|
||||||
|
|||||||
@@ -1,86 +0,0 @@
|
|||||||
from bs4 import BeautifulSoup
|
|
||||||
import requests
|
|
||||||
|
|
||||||
import os
|
|
||||||
|
|
||||||
from yoink.common import library_path, config_path, app_root, headers
|
|
||||||
from yoink.scraper import Scrapable
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
stopped_state = ('pausedUP', 'stalledUP', 'uploading', 'seeding')
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class TorrentDownloader:
|
|
||||||
def __init__(self) -> None:
|
|
||||||
self.limit = 1
|
|
||||||
self.queue = []
|
|
||||||
self.download_path = self.set_path(os.path.join(library_path, 'downloads'))
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def create_torrent(cls, url):
|
|
||||||
return Torrent(url)
|
|
||||||
|
|
||||||
# @classmethod
|
|
||||||
# def get_torrent(cls, name):
|
|
||||||
# return [torrent for torrent in new_downloader.torrents() if name == torrent['name']][0]
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def quick_download(cls, url):
|
|
||||||
if not isinstance(url, str):
|
|
||||||
raise TypeError('URL string expected')
|
|
||||||
|
|
||||||
if not url.startswith('magnet'):
|
|
||||||
markup = requests.get(url, headers=headers).content
|
|
||||||
soup = BeautifulSoup(markup, 'html.parser')
|
|
||||||
magnet_link = soup.find('a', attrs={'title': 'Get this torrent'}.attrs['href'])
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def set_path(self, path):
|
|
||||||
if path.strip() == '': raise ValueError('Path cannot be an empty string')
|
|
||||||
|
|
||||||
if not os.path.exists(path):
|
|
||||||
os.makedirs(path)
|
|
||||||
|
|
||||||
return path
|
|
||||||
|
|
||||||
def empty_queue(self):
|
|
||||||
self.queue = []
|
|
||||||
|
|
||||||
def add(self, torrent):
|
|
||||||
if not isinstance(torrent, Torrent):
|
|
||||||
raise TypeError('Not a valid torrent')
|
|
||||||
|
|
||||||
self.queue.append(torrent)
|
|
||||||
|
|
||||||
# TODO separate download method into new thread
|
|
||||||
def download(self):
|
|
||||||
while len(self.queue) > 0:
|
|
||||||
for torrent in self.queue:
|
|
||||||
if not isinstance(torrent, Torrent):
|
|
||||||
raise TypeError('Not a valid torrent')
|
|
||||||
|
|
||||||
print(torrent.magnet_link)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
downloader = TorrentDownloader()
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Torrent(Scrapable):
|
|
||||||
def __init__(self, url) -> None:
|
|
||||||
super().__init__(url)
|
|
||||||
|
|
||||||
|
|
||||||
@property
|
|
||||||
def name(self) -> str: return self.soup.find('div', attrs={'id': 'title'})
|
|
||||||
|
|
||||||
@property
|
|
||||||
def magnet_link(self) -> str: return self.soup.find('a', attrs={'title': 'Get this torrent'}).attrs['href']
|
|
||||||
Reference in New Issue
Block a user