added issue_num, prev and next properties; fixed issue with jpg file naming

2022-03-20 19:26:04 -04:00
parent ca1df871a4
commit ce1afe0ef8
6 changed files with 97 additions and 122 deletions
--- a/yoink/cli.py
+++ b/yoink/cli.py
@@ -24,37 +24,36 @@ def init():


@yoink.command()
-@click.option('-c', '--comic', is_flag=True, help='Download a Comic file')
-@click.option('-t', '--torrent', is_flag=True, help='Download a Torrent')
+# @click.option('-c', '--comic', is_flag=True, help='Download a Comic file')
+# @click.option('-t', '--torrent', is_flag=True, help='Download a Torrent')
+@click.option('-s', '--series', is_flag=True, help='Download the entire series')
@click.option('-p', '--path', help='Change the download path')
@click.argument('url')
-def download(url, comic, torrent, path):
+def download(url, path, series):
    # Account for whitespace/blank urls
    if url.strip() == '':
        click.echo('url cannot be blank')
        return 1

-    if comic:
-        try:
-            comic = Comic(url, path=path if path else None)
-        except ValueError:
-            click.echo(f'{url} is not supported or is not a valid URL')
-            return 1
+    try:
+        comic = Comic(url, path=path if path else None)
+    except ValueError:
+        click.echo(f'{url} is not supported or is not a valid URL')
+        return 1

-        click.echo(f'Downloading {comic.title}')
-        comic.archiver.download()
+    if series:
+        comic.generate_series_queue()

-        click.echo('Building comic archive')
-        comic.archiver.generate_archive()
+    click.echo(f'Downloading {comic.title}')
+    comic.archiver.download()

-        click.echo('Cleaning up')
-        comic.archiver.cleanup_worktree()
+    click.echo('Building comic archive')
+    comic.archiver.generate_archive()

-        click.echo('Success')
-    
-    if torrent:
-        click.echo('Opps! It looks like Torrents aren\'t yet fully supported.')
+    click.echo('Cleaning up')
+    comic.archiver.cleanup_worktree()

+    click.echo('Success')
    


--- a/yoink/comic.py
+++ b/yoink/comic.py
@@ -4,6 +4,7 @@ from yoink.scraper import Scrapable
 import os
 import shutil
 import urllib
+import re



@@ -61,6 +62,44 @@ class Comic(Scrapable):
        data = self.soup.find('a', attrs={'rel': 'category tag'} )
        return data.text

+    @property
+    def series_list(self) -> list:
+        queue = []
+
+        return queue
+
+    @property
+    def issue_number(self) -> int:
+        date_reg = re.search("(\([12]\d{3}\))", self.title)
+
+        try:
+            return int(self.title[:date_reg.start() - 1][-1])
+        except TypeError:
+            return 1
+        except AttributeError:
+            return 1
+
+    @property
+    def volume(self) -> int:
+        return
+
+    @property
+    def next(self):
+        ''' returns the url of the next comic in the series. returns None if current'''
+        try:
+            return self.soup.find('img', attrs={'title': 'Next Issue'}).parent.attrs['href'] or None
+        except AttributeError:
+            return None
+
+    @property
+    def prev(self):
+        ''' returns the url of the previous comic in the series. returns None if first'''
+        try:
+            return self.soup.find('img', attrs={'title': 'Previous Issue'}).parent.attrs['href']
+        except AttributeError:
+            return None
+
+
    def can_remove(self, filename):
        return not filename.endswith(required_comic_files)

@@ -69,6 +108,10 @@ class ComicArchiver:
    def __init__(self, comic : Comic, library=None) -> None:
        self.comic = comic
        self.worktree = library if library else os.path.join(library_path, f'comics/{self.comic.title}')
+        self.queue = []
+
+    def add(self, link):
+        self.queue.append(link)
    
    def download(self):

@@ -78,6 +121,7 @@ class ComicArchiver:
        opener = urllib.request.build_opener()
        opener.addheaders = [('User-Agent', 'Mozilla/5.0')]
        urllib.request.install_opener(opener)
+        print('\n')

        for index,url in enumerate(self.comic.filelist):

@@ -88,8 +132,12 @@ class ComicArchiver:
            else:
                page_number = str(index).zfill(3)
                file_extension = url.split('/')[-1].split('.')[1]
+
+                if len(file_extension) > 3:
+                    file_extension = 'jpg'
+
                formatted_file = f'{self.comic.title} - {page_number}.{file_extension}'
-                print(formatted_file, end='\r')
+                print(formatted_file, end='\r',)
                urllib.request.urlretrieve(url, filename=os.path.join(self.worktree, formatted_file))
        print()

@@ -99,7 +147,7 @@ class ComicArchiver:
        if os.path.exists(os.path.join(self.worktree, f'{self.comic.title}{archive_format}')):
            return

-        output = shutil.make_archive(os.path.join(self.worktree, self.comic.title), 'zip', self.worktree, self.worktree)
+        output = shutil.make_archive(self.comic.title, 'zip', self.worktree, self.worktree)
        os.rename(output, os.path.join(self.worktree, f'{self.comic.title}{archive_format}'))


@@ -109,5 +157,11 @@ class ComicArchiver:
                os.remove(os.path.join(self.worktree, image))

 if __name__ == '__main__':
-    comic = Comic('http://www.readallcomics.com/static-season-one-4-2021/')
-    print(comic.category)
+    comic = Comic('http://www.readallcomics.com/static-season-one-4-2021/') # all links
+    # comic = Comic('http://readallcomics.com/static-season-one-001-2021/') # no prev link
+    # comic = Comic('http://readallcomics.com/static-season-one-6-2022/') # no next link
+    comic = Comic('http://readallcomics.com/superman-vs-lobo-4-2022/')
+    test_comic_b = 'http://readallcomics.com/captain-marvel-vs-rogue-2021-part-1/'
+    print(comic.next)
+    print(comic.prev)
+    print(comic.issue_number)
--- a/yoink/common.py
+++ b/yoink/common.py
@@ -1,9 +1,7 @@
-import pathlib
-
-
 import pathlib
 # TODO replace os path with pathlib
 import os
+from enum import Enum, auto



@@ -12,6 +10,5 @@ config_path = os.path.abspath(os.path.join(os.environ.get('HOME'), '.config/yoin
 library_path = os.path.abspath(os.path.join(os.environ.get('HOME'), 'yoink/library'))
 required_comic_files = ('.cbr', '.cbz', '000.jpg', '001.jpg')
 skippable_images = ('logo-1.png', 'logo.png', 'report.png', 'request.png', 'prev.png', 'Next.png', 'Donate.png', '11.png', 'navbar.svg')
-torrent_concurrent_download_limit = 1
 supported_sites = ['readallcomics.com', 'tpb.party', 'dragonballsupermanga.net', 'mangadex.tv']
 headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36'}
--- a/yoink/scraper.py
+++ b/yoink/scraper.py
@@ -1,7 +1,9 @@
-import os
 import requests
 from bs4 import BeautifulSoup

+import os
+from enum import Enum, auto
+
 from yoink.common import supported_sites, library_path


@@ -10,11 +12,10 @@ class Scrapable:
    def __init__(self, url) -> None:
        self.url = url
        comic_path = os.path.join(library_path, 'comics')
+        
        if not os.path.exists(comic_path):
            os.makedirs(comic_path)

-
-        
        self.__check_site_support()


--- a/yoink/tests/test_basic.py
+++ b/yoink/tests/test_basic.py
@@ -4,7 +4,7 @@ import os
 import unittest
 from shutil import rmtree

-from yoink.common import app_root, library_path, config_path, skippable_images, supported_sites, required_comic_files, torrent_concurrent_download_limit, headers
+from yoink.common import app_root, library_path, config_path, skippable_images, supported_sites, required_comic_files
 from yoink.comic import Comic, ComicArchiver
 from yoink.scraper import Scrapable

@@ -13,9 +13,15 @@ from yoink.scraper import Scrapable
 class BasicTestCase(unittest.TestCase):
    def setUp(self):
        self.test_comic = 'http://readallcomics.com/static-season-one-4-2021/'
-        self.comic = Comic(self.test_comic)
+        self.test_comic_b = 'http://readallcomics.com/captain-marvel-vs-rogue-2021-part-1/'
+        self.comic = Comic(self.test_comic_b)
        self.archiver = ComicArchiver(self.comic)
        self.remove_queue = []
+        self.expected_title = 'Static Season One 4 (2021)'
+        self.expected_title_b = 'Captain Marvel vs. Rogue (2021 – Part 1)'
+        self.expected_category = 'Static: Season One'
+        self.expected_category_b = 'Captain Marvel vs. Rogue'
+        self.expected_issue_num = 1

        
    def tearDown(self) -> None:
@@ -28,10 +34,10 @@ class BasicTestCase(unittest.TestCase):
        self.assertTrue('!DOCTYPE html' in str(self.comic.markup))

    def test_001_comic_has_valid_title(self):
-        self.assertEqual('Static Season One 4 (2021)', self.comic.title)
+        self.assertEqual(self.expected_title_b, self.comic.title)

    def test_002_comic_has_valid_category(self):
-        self.assertEqual('Static: Season One', self.comic.category)
+        self.assertEqual(self.expected_category_b, self.comic.category)

    def test_003_empty_comic_folder(self):
        self.assertEqual(len(os.listdir(os.path.join(library_path, 'comics'))), 0)
@@ -47,7 +53,7 @@ class BasicTestCase(unittest.TestCase):

    def test_006_folder_cleaned_after_archive_generation(self):
        self.archiver.cleanup_worktree()
-        self.assertAlmostEqual(len(os.listdir(os.path.join(library_path, f'comics/{self.comic.title}'))), 3)
+        self.assertLessEqual(len(os.listdir(os.path.join(library_path, f'comics/{self.comic.title}'))), 3)

    def test_007_comic_instance_has_archiver(self):
        self.assertIsInstance(self.comic.archiver, ComicArchiver)
@@ -64,3 +70,7 @@ class BasicTestCase(unittest.TestCase):

        self.remove_queue.append(os.path.join(library_path, f'comics/{self.comic.title}'))

+    def test_010_valid_issue_number(self):
+        self.assertIsInstance(self.comic.issue_number, int)
+        self.assertEqual(self.comic.issue_number, self.expected_issue_num)
+    
--- a/yoink/torrent.py
+++ b/yoink/torrent.py
@@ -1,86 +0,0 @@
-from bs4 import BeautifulSoup
-import requests
-
-import os
-
-from yoink.common import  library_path, config_path, app_root, headers
-from yoink.scraper import Scrapable
-
-
-
-stopped_state = ('pausedUP', 'stalledUP', 'uploading', 'seeding')
-
-
-
-
-
-
-class TorrentDownloader:
-    def __init__(self) -> None:
-        self.limit = 1
-        self.queue = []
-        self.download_path = self.set_path(os.path.join(library_path, 'downloads'))
-
-    @classmethod
-    def create_torrent(cls, url):
-        return Torrent(url)
-
-    # @classmethod
-    # def get_torrent(cls, name):
-    #     return [torrent for torrent in new_downloader.torrents() if name == torrent['name']][0]
-
-    @classmethod
-    def quick_download(cls, url):
-        if not isinstance(url, str):
-            raise TypeError('URL string expected')
-
-        if not url.startswith('magnet'):
-            markup = requests.get(url, headers=headers).content
-            soup = BeautifulSoup(markup, 'html.parser')
-            magnet_link = soup.find('a', attrs={'title': 'Get this torrent'}.attrs['href'])
-
-        
-
-
-    def set_path(self, path):
-        if path.strip() == '': raise ValueError('Path cannot be an empty string')
-
-        if not os.path.exists(path):
-            os.makedirs(path)
-
-        return path
-
-    def empty_queue(self):
-        self.queue = []
-
-    def add(self, torrent):
-        if not isinstance(torrent, Torrent):
-            raise TypeError('Not a valid torrent')
-
-        self.queue.append(torrent)
-
-    # TODO separate download method into new thread
-    def download(self):
-        while len(self.queue) > 0:
-            for torrent in self.queue:
-                if not isinstance(torrent, Torrent):
-                    raise TypeError('Not a valid torrent')
-
-                print(torrent.magnet_link)
-
-
-
-downloader = TorrentDownloader()
-
-
-
-class Torrent(Scrapable):
-    def __init__(self, url) -> None:
-        super().__init__(url)
-
-    
-    @property
-    def name(self) -> str: return self.soup.find('div', attrs={'id': 'title'})
-
-    @property
-    def magnet_link(self) -> str: return self.soup.find('a', attrs={'title': 'Get this torrent'}).attrs['href']