From 8cdb80315ad725e19323d8c45dddcf1e5d2e243f Mon Sep 17 00:00:00 2001 From: Bryan Bailey Date: Wed, 9 Mar 2022 00:39:40 -0500 Subject: [PATCH] passing unittests TODO fix comic downloads --- .gitignore | 3 + Dockerfile.dev | 1 + README.md | 1 + requirements.txt | 21 +++++++ setup.cfg | 0 setup.py | 0 test.sh | 69 +++++++++++++++++++++ yoink/__init__.py | 1 + yoink/bounty.py | 66 ++++++++++++++++++++ yoink/cli.py | 21 +++++++ yoink/provider.py | 126 ++++++++++++++++++++++++++++++++++++++ yoink/tests/__init__.py | 0 yoink/tests/test_basic.py | 42 +++++++++++++ 13 files changed, 351 insertions(+) create mode 100644 .gitignore create mode 100644 Dockerfile.dev create mode 100644 README.md create mode 100644 requirements.txt create mode 100644 setup.cfg create mode 100644 setup.py create mode 100644 test.sh create mode 100644 yoink/__init__.py create mode 100644 yoink/bounty.py create mode 100644 yoink/cli.py create mode 100644 yoink/provider.py create mode 100644 yoink/tests/__init__.py create mode 100644 yoink/tests/test_basic.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..fd30579 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +env +__pycache__ +.coverage \ No newline at end of file diff --git a/Dockerfile.dev b/Dockerfile.dev new file mode 100644 index 0000000..c3c78df --- /dev/null +++ b/Dockerfile.dev @@ -0,0 +1 @@ +FROM alpine \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..93d3813 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +# yoink \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..46fa72c --- /dev/null +++ b/requirements.txt @@ -0,0 +1,21 @@ +beautifulsoup4==4.10.0 +bs4==0.0.1 +certifi==2021.10.8 +charset-normalizer==2.0.12 +click==8.0.4 +coverage==6.3.2 +idna==3.3 +requests==2.27.1 +soupsieve==2.3.1 +urllib3==1.26.8 +beautifulsoup4==4.10.0 +bs4==0.0.1 +certifi==2021.10.8 +charset-normalizer==2.0.12 +click==8.0.4 +coverage==6.3.2 +idna==3.3 +qbittorrent==0.1.6 +requests==2.27.1 +soupsieve==2.3.1 +urllib3==1.26.8 diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..e69de29 diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..e69de29 diff --git a/test.sh b/test.sh new file mode 100644 index 0000000..d5c8a52 --- /dev/null +++ b/test.sh @@ -0,0 +1,69 @@ +# : ${DIALOG_OK=0} +# : ${DIALOG_CANCEL=1} +# : ${DIALOG_HELP=2} +# : ${DIALOG_EXTRA=3} +# : ${DIALOG_ITEM_HELP=4} +# : ${DIALOG_ESC=255} + +# lipsum="Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum." +# tmp_file=$(tempfile 2>/dev/null) || temp_file=/tmp/test$$ +# trap "rm -f $tmp_file" 0 1 2 5 15 + +# dialog --title "Testing" --clear --inputbox "${lipsum}" 16 51 2> $tmp_file + +# return_value=$? + +# case $return_value in +# $DIALOG_OK) +# echo "Result: $(cat $tmp_file)";; +# $DIALOG_CANCEL) +# echo "Cancel pressed.";; +# $DIALOG_HELP) +# echo "Help pressed.";; +# $DIALOG_EXTRA) +# echo "Extra button pressed.";; +# $DIALOG_ITEM_HELP) +# echo "Item-help button pressed.";; +# $DIALOG_ESC) +# if test -s $tmp_file ; then +# cat $tmp_file +# else +# echo "ESC pressed." +# fi +# ;; +# esac + +# dialog --begin 5 70 --backtitle "Shirak v0.1.0" --title "Info" --clear --msgbox 'Greetings, mortal...' 16 56 2> /dev/null + +output="/tmp/shit.txt" +>$output + +function hello() { + local name=${@-"Stranger"} + + dialog --backtitle "Shirak v0.1.0 | test" --title "Greetings" --clear --msgbox "Greetings, ${name}..." 10 41 +} + + +trap "rm $output; exit" SIGHUP SIGINT SIGTERM + + +dialog --title "Input your name" --backtitle "Shirak v0.1.0 | test" --inputbox "Enter your name " 8 60 2>$output + +response=$? + +name=$(<$output) + +case $response in + 0) + hello ${name} + ;; + 1) + echo "Cancel pressed." + ;; + 255) + echo "[esc] pressed." +esac + + +rm $output \ No newline at end of file diff --git a/yoink/__init__.py b/yoink/__init__.py new file mode 100644 index 0000000..555c5ba --- /dev/null +++ b/yoink/__init__.py @@ -0,0 +1 @@ +from yoink import * \ No newline at end of file diff --git a/yoink/bounty.py b/yoink/bounty.py new file mode 100644 index 0000000..ca658c4 --- /dev/null +++ b/yoink/bounty.py @@ -0,0 +1,66 @@ +import os +from qbittorrent import Client +from yoink.provider import PirateBay, Provider, ReadAllComics + + + +class Downloader: + def __init__(self) -> None: + self.qb = Client('http://127.0.0.1:8080') + self.qb.login('admin', 'adminadmin') + self.headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36'} + self.limit = 1 + self.queue = [] + self.config_path = self.set_path(os.path.abspath(os.path.join(os.environ.get('HOME'), '.config/yoink'))) + self.root_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) + self.download_path = self.set_path(os.path.join(os.environ.get('HOME'), 'yoink/downloads')) + + + def __download_torrent(self, magnetlink): + pass + + + def set_path(self, path): + if path.strip() == '': raise ValueError('Path cannot be an empty string') + + if not os.path.exists(path): + os.makedirs(path) + + return path + + def empty_queue(self): + self.queue = [] + + def add(self, item): + self.queue.append(item) + + def download(self, file): + if isinstance(file, ReadAllComics): + pass + elif isinstance(file, PirateBay): + pass + else: + raise TypeError('Downloads from this site are not yet supported') + + +class Bounty: + def __init__(self, url): + self.provider = Provider(site=url) + self.downloader = Downloader() + + def plunder(self, *args, **kwargs): + if isinstance(self.provider, ReadAllComics): + pass + else: + raise TypeError(f'{self.provider} is not a valid provider') + + + + + + +if __name__ == '__main__': + item = Bounty('http://readallcomics.com/static-season-one-4-2021/') + # downloader = Downloader() + # print(downloader.download_path) + item.provider.download() \ No newline at end of file diff --git a/yoink/cli.py b/yoink/cli.py new file mode 100644 index 0000000..4c16ca9 --- /dev/null +++ b/yoink/cli.py @@ -0,0 +1,21 @@ +import click + + + +@click.group() +def yoink(): + pass + + +@yoink.command() +@click.argument('url') +def download(url): + # Account for whitespace/blank urls + if url.strip() == '': + click.echo('url cannot be blank') + return 1 + + click.echo('Downloading') + +if __name__=='__main__': + yoink() \ No newline at end of file diff --git a/yoink/provider.py b/yoink/provider.py new file mode 100644 index 0000000..5b7d895 --- /dev/null +++ b/yoink/provider.py @@ -0,0 +1,126 @@ +import os +import requests +import urllib +from bs4 import BeautifulSoup +from urllib.parse import urlparse + + +root_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) +config_dir = os.path.abspath(os.environ.get('HOME')) + +class Downloadable(object): + stopped_state = ('pausedUP', 'stalledUP', 'uploading', 'seeding') + + def __init__(self, uri) -> None: + self.uri = uri + self.headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36'} + + @property + def markup(self): + return requests.get(self.uri) + + @property + def soup(self): + return BeautifulSoup(self.markup.content, 'html.parser') + + def download(self): + pass + + +class PirateBay(Downloadable): + @property + def magnet(self): + self.soup.find('', attrs={'title': 'Get this torrent'}).attrs['href'] + +class ReadAllComics(Downloadable): + + def __init__(self, uri) -> None: + super().__init__(uri) + self.filelist = self.__get_comic_filelist() + + @classmethod + def get_frontpage_links(cls): + markup = requests.get('http://www.readallcomics.com') + soup = BeautifulSoup(markup.content, 'html.parser') + posts = soup.find_all('div', class_='type-post') + links = [] + + for post in posts: + links.append({ + 'title': post.find('h2').text, + 'image': post.find('img', height='250').attrs['src'], + 'uri': post.find('a', class_='font-link').attrs['href'] + }) + + return links + + @property + def title(self): + return self.soup.title.string.replace(' | Read All Comics Online For Free', '').replace('…', '').replace('#', '').replace(':', '').strip() + + @property + def category(self): + data = self.soup.find('a', attrs={'rel': 'category tag'}) + return data.text + + def __can_remove(self, filename): + ignore = ('.cbr', '.cbz', '000.jpg', '001.jpg') + return not filename.endswith(ignore) + + def __get_image_src(self, comic): + if comic.attrs: + return comic.attrs['src'] + + for image in comic: + return image.attrs['src'] + + def __parse_soup(self): + soup = { + 'default': self.soup.find_all('div', class_='separator'), + 'no-div': self.soup.find_all('img', attrs={'width': '1000px'}), + 'excaliber': self.soup.find_all('img') + } + + for case in soup.keys(): + comics = soup.get(case) + + if len(comics) > 0: + return comics + + def __get_comic_filelist(self): + comics = self.__parse_soup() + return list(map(self.__get_image_src, comics)) + + + def download(self): + skippable_files = ('logo-1.png', 'logo.png', 'report.png', 'request.png', 'prev.png', 'Next.png', 'Donate.png', '11.png') + + for url in self.filelist: + opener = urllib.request.build_opener() + opener.addheaders = [('User-agent', self.headers['user-agent'])] + urllib.request.install_opener(opener) + + if url.endswith(skippable_files): + continue + + if not url.endswith('.jpg'): + urllib.request.urlretrieve(url, filename=os.path.join(self.download_path + f'/{self.title}', f'{self.title}'.join([str(url.index(url)).zfill(3), '.jpg']))) + else: + page_number = url.split('/')[-1].split('.')[0].zfill(3) + file_extension = url.split('/')[-1].split('.')[1] + urllib.request.urlretrieve(url, filename=os.path.join(self.download_path + f'/{self.title}', f'{self.title}{page_number}.{file_extension}')) + + +def Provider(site='http://readallcomics.com'): + providers = { + 'readallcomics': ReadAllComics + } + + domain = urlparse(site) + + name=domain.netloc.split('.')[0] + + if name not in providers: + raise ValueError('Downloads for this site are not yet supported') + + return providers[name](uri=site) \ No newline at end of file diff --git a/yoink/tests/__init__.py b/yoink/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/yoink/tests/test_basic.py b/yoink/tests/test_basic.py new file mode 100644 index 0000000..eb632da --- /dev/null +++ b/yoink/tests/test_basic.py @@ -0,0 +1,42 @@ +import os +import unittest +from bs4 import BeautifulSoup +from yoink.bounty import Bounty, Downloader +from yoink.provider import Provider, ReadAllComics + + + +class BasicTestCase(unittest.TestCase): + def setUp(self): + self.test_comic = 'http://readallcomics.com/static-season-one-4-2021/' + self.item = Bounty(self.test_comic) + + def test_000_provider_generates_or_fails_correctly(self): + # ensure valid comic link returns correct factory + self.assertTrue(isinstance(self.item.provider, ReadAllComics)) + + # ensure invalid comic link raises ValueError stating lack of support + def busted(): + return Bounty('http://viz.com') + + with self.assertRaises(ValueError) as context: + busted() + + self.assertTrue('Downloads for this site are not yet supported' in context.exception) + + + def test_001_provider_markup_returns_200(self): + self.assertEqual(self.item.provider.markup.status_code, 200) + + + def test_002_provider_soup_object_exists(self): + self.assertTrue(isinstance(self.item.provider.soup, BeautifulSoup)) + + + def test_003_downloader_object_exists(self): + self.assertTrue(isinstance(self.item.downloader, Downloader)) + + def test_004_downloader_paths_exist(self): + self.assertTrue(os.path.exists(self.item.downloader.root_path)) + self.assertTrue(os.path.exists(self.item.downloader.config_path)) +