passing unittests TODO fix comic downloads

This commit is contained in:
Bryan Bailey
2022-03-09 00:39:40 -05:00
commit 8cdb80315a
13 changed files with 351 additions and 0 deletions

3
.gitignore vendored Normal file
View File

@@ -0,0 +1,3 @@
env
__pycache__
.coverage

1
Dockerfile.dev Normal file
View File

@@ -0,0 +1 @@
FROM alpine

1
README.md Normal file
View File

@@ -0,0 +1 @@
# yoink

21
requirements.txt Normal file
View File

@@ -0,0 +1,21 @@
beautifulsoup4==4.10.0
bs4==0.0.1
certifi==2021.10.8
charset-normalizer==2.0.12
click==8.0.4
coverage==6.3.2
idna==3.3
requests==2.27.1
soupsieve==2.3.1
urllib3==1.26.8
beautifulsoup4==4.10.0
bs4==0.0.1
certifi==2021.10.8
charset-normalizer==2.0.12
click==8.0.4
coverage==6.3.2
idna==3.3
qbittorrent==0.1.6
requests==2.27.1
soupsieve==2.3.1
urllib3==1.26.8

0
setup.cfg Normal file
View File

0
setup.py Normal file
View File

69
test.sh Normal file
View File

@@ -0,0 +1,69 @@
# : ${DIALOG_OK=0}
# : ${DIALOG_CANCEL=1}
# : ${DIALOG_HELP=2}
# : ${DIALOG_EXTRA=3}
# : ${DIALOG_ITEM_HELP=4}
# : ${DIALOG_ESC=255}
# lipsum="Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."
# tmp_file=$(tempfile 2>/dev/null) || temp_file=/tmp/test$$
# trap "rm -f $tmp_file" 0 1 2 5 15
# dialog --title "Testing" --clear --inputbox "${lipsum}" 16 51 2> $tmp_file
# return_value=$?
# case $return_value in
# $DIALOG_OK)
# echo "Result: $(cat $tmp_file)";;
# $DIALOG_CANCEL)
# echo "Cancel pressed.";;
# $DIALOG_HELP)
# echo "Help pressed.";;
# $DIALOG_EXTRA)
# echo "Extra button pressed.";;
# $DIALOG_ITEM_HELP)
# echo "Item-help button pressed.";;
# $DIALOG_ESC)
# if test -s $tmp_file ; then
# cat $tmp_file
# else
# echo "ESC pressed."
# fi
# ;;
# esac
# dialog --begin 5 70 --backtitle "Shirak v0.1.0" --title "Info" --clear --msgbox 'Greetings, mortal...' 16 56 2> /dev/null
output="/tmp/shit.txt"
>$output
function hello() {
local name=${@-"Stranger"}
dialog --backtitle "Shirak v0.1.0 | test" --title "Greetings" --clear --msgbox "Greetings, ${name}..." 10 41
}
trap "rm $output; exit" SIGHUP SIGINT SIGTERM
dialog --title "Input your name" --backtitle "Shirak v0.1.0 | test" --inputbox "Enter your name " 8 60 2>$output
response=$?
name=$(<$output)
case $response in
0)
hello ${name}
;;
1)
echo "Cancel pressed."
;;
255)
echo "[esc] pressed."
esac
rm $output

1
yoink/__init__.py Normal file
View File

@@ -0,0 +1 @@
from yoink import *

66
yoink/bounty.py Normal file
View File

@@ -0,0 +1,66 @@
import os
from qbittorrent import Client
from yoink.provider import PirateBay, Provider, ReadAllComics
class Downloader:
def __init__(self) -> None:
self.qb = Client('http://127.0.0.1:8080')
self.qb.login('admin', 'adminadmin')
self.headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36'}
self.limit = 1
self.queue = []
self.config_path = self.set_path(os.path.abspath(os.path.join(os.environ.get('HOME'), '.config/yoink')))
self.root_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
self.download_path = self.set_path(os.path.join(os.environ.get('HOME'), 'yoink/downloads'))
def __download_torrent(self, magnetlink):
pass
def set_path(self, path):
if path.strip() == '': raise ValueError('Path cannot be an empty string')
if not os.path.exists(path):
os.makedirs(path)
return path
def empty_queue(self):
self.queue = []
def add(self, item):
self.queue.append(item)
def download(self, file):
if isinstance(file, ReadAllComics):
pass
elif isinstance(file, PirateBay):
pass
else:
raise TypeError('Downloads from this site are not yet supported')
class Bounty:
def __init__(self, url):
self.provider = Provider(site=url)
self.downloader = Downloader()
def plunder(self, *args, **kwargs):
if isinstance(self.provider, ReadAllComics):
pass
else:
raise TypeError(f'{self.provider} is not a valid provider')
if __name__ == '__main__':
item = Bounty('http://readallcomics.com/static-season-one-4-2021/')
# downloader = Downloader()
# print(downloader.download_path)
item.provider.download()

21
yoink/cli.py Normal file
View File

@@ -0,0 +1,21 @@
import click
@click.group()
def yoink():
pass
@yoink.command()
@click.argument('url')
def download(url):
# Account for whitespace/blank urls
if url.strip() == '':
click.echo('url cannot be blank')
return 1
click.echo('Downloading')
if __name__=='__main__':
yoink()

126
yoink/provider.py Normal file
View File

@@ -0,0 +1,126 @@
import os
import requests
import urllib
from bs4 import BeautifulSoup
from urllib.parse import urlparse
root_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
config_dir = os.path.abspath(os.environ.get('HOME'))
class Downloadable(object):
stopped_state = ('pausedUP', 'stalledUP', 'uploading', 'seeding')
def __init__(self, uri) -> None:
self.uri = uri
self.headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36'}
@property
def markup(self):
return requests.get(self.uri)
@property
def soup(self):
return BeautifulSoup(self.markup.content, 'html.parser')
def download(self):
pass
class PirateBay(Downloadable):
@property
def magnet(self):
self.soup.find('', attrs={'title': 'Get this torrent'}).attrs['href']
class ReadAllComics(Downloadable):
def __init__(self, uri) -> None:
super().__init__(uri)
self.filelist = self.__get_comic_filelist()
@classmethod
def get_frontpage_links(cls):
markup = requests.get('http://www.readallcomics.com')
soup = BeautifulSoup(markup.content, 'html.parser')
posts = soup.find_all('div', class_='type-post')
links = []
for post in posts:
links.append({
'title': post.find('h2').text,
'image': post.find('img', height='250').attrs['src'],
'uri': post.find('a', class_='font-link').attrs['href']
})
return links
@property
def title(self):
return self.soup.title.string.replace(' | Read All Comics Online For Free', '').replace('', '').replace('#', '').replace(':', '').strip()
@property
def category(self):
data = self.soup.find('a', attrs={'rel': 'category tag'})
return data.text
def __can_remove(self, filename):
ignore = ('.cbr', '.cbz', '000.jpg', '001.jpg')
return not filename.endswith(ignore)
def __get_image_src(self, comic):
if comic.attrs:
return comic.attrs['src']
for image in comic:
return image.attrs['src']
def __parse_soup(self):
soup = {
'default': self.soup.find_all('div', class_='separator'),
'no-div': self.soup.find_all('img', attrs={'width': '1000px'}),
'excaliber': self.soup.find_all('img')
}
for case in soup.keys():
comics = soup.get(case)
if len(comics) > 0:
return comics
def __get_comic_filelist(self):
comics = self.__parse_soup()
return list(map(self.__get_image_src, comics))
def download(self):
skippable_files = ('logo-1.png', 'logo.png', 'report.png', 'request.png', 'prev.png', 'Next.png', 'Donate.png', '11.png')
for url in self.filelist:
opener = urllib.request.build_opener()
opener.addheaders = [('User-agent', self.headers['user-agent'])]
urllib.request.install_opener(opener)
if url.endswith(skippable_files):
continue
if not url.endswith('.jpg'):
urllib.request.urlretrieve(url, filename=os.path.join(self.download_path + f'/{self.title}', f'{self.title}'.join([str(url.index(url)).zfill(3), '.jpg'])))
else:
page_number = url.split('/')[-1].split('.')[0].zfill(3)
file_extension = url.split('/')[-1].split('.')[1]
urllib.request.urlretrieve(url, filename=os.path.join(self.download_path + f'/{self.title}', f'{self.title}{page_number}.{file_extension}'))
def Provider(site='http://readallcomics.com'):
providers = {
'readallcomics': ReadAllComics
}
domain = urlparse(site)
name=domain.netloc.split('.')[0]
if name not in providers:
raise ValueError('Downloads for this site are not yet supported')
return providers[name](uri=site)

0
yoink/tests/__init__.py Normal file
View File

42
yoink/tests/test_basic.py Normal file
View File

@@ -0,0 +1,42 @@
import os
import unittest
from bs4 import BeautifulSoup
from yoink.bounty import Bounty, Downloader
from yoink.provider import Provider, ReadAllComics
class BasicTestCase(unittest.TestCase):
def setUp(self):
self.test_comic = 'http://readallcomics.com/static-season-one-4-2021/'
self.item = Bounty(self.test_comic)
def test_000_provider_generates_or_fails_correctly(self):
# ensure valid comic link returns correct factory
self.assertTrue(isinstance(self.item.provider, ReadAllComics))
# ensure invalid comic link raises ValueError stating lack of support
def busted():
return Bounty('http://viz.com')
with self.assertRaises(ValueError) as context:
busted()
self.assertTrue('Downloads for this site are not yet supported' in context.exception)
def test_001_provider_markup_returns_200(self):
self.assertEqual(self.item.provider.markup.status_code, 200)
def test_002_provider_soup_object_exists(self):
self.assertTrue(isinstance(self.item.provider.soup, BeautifulSoup))
def test_003_downloader_object_exists(self):
self.assertTrue(isinstance(self.item.downloader, Downloader))
def test_004_downloader_paths_exist(self):
self.assertTrue(os.path.exists(self.item.downloader.root_path))
self.assertTrue(os.path.exists(self.item.downloader.config_path))