From 91ff7e0b807519daf1f55ac874a70220f042f037 Mon Sep 17 00:00:00 2001
From: Bryan Bailey <bcbailey84@gail.com>
Date: Mon, 14 Mar 2022 23:07:50 -0400
Subject: [PATCH] Error handling for http errors; added gitlab-ci

---
 .gitlab-ci.yml            |  8 ++++++++
 README.md                 |  2 +-
 results.xml               |  1 +
 yoink/cli.py              |  6 +++++-
 yoink/comic.py            | 34 +++++++++++++++++++++-------------
 yoink/common.py           |  4 ++--
 yoink/scraper.py          | 19 ++++++++++---------
 yoink/tests/test_basic.py | 14 +++++++++++++-
 8 files changed, 61 insertions(+), 27 deletions(-)
 create mode 100644 .gitlab-ci.yml
 create mode 100644 results.xml
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
new file mode 100644
index 0000000..69cdc8e
--- /dev/null
+++ b/.gitlab-ci.yml
@@ -0,0 +1,8 @@
+ruby:
+  stage: test
+  script:
+    - pytest --junitxml report.xml yoink/tests/test_basic.py 
+  artifacts:
+    when: always
+    reports:
+      junit: report.xml
\ No newline at end of file
diff --git a/README.md b/README.md
index 4428dfa..26292bf 100644
--- a/README.md
+++ b/README.md
@@ -2,4 +2,4 @@
 
 [![wakatime](https://wakatime.com/badge/gitlab/Rigil-Kent/yoink.svg)](https://wakatime.com/badge/gitlab/Rigil-Kent/yoink)
 
-Yoink! is a multisite media download tool. It scrapes comics from readallcomics.com compressing them into a .cbr archive and grabs magnet links from tpb.party
\ No newline at end of file
+Yoink! is a multisite media download tool. It scrapes comics from online comic aggragate sites like readallcomics.com compressing them into a .cbr archive and grabs magnet links from tpb.party
\ No newline at end of file
diff --git a/results.xml b/results.xml
new file mode 100644
index 0000000..5181bf7
--- /dev/null
+++ b/results.xml
@@ -0,0 +1 @@
+<?xml version="1.0" encoding="utf-8"?><testsuites><testsuite name="pytest" errors="0" failures="0" skipped="0" tests="10" time="34.081" timestamp="2022-03-14T23:05:57.364590" hostname="DESKTOP-SE506CG"><testcase classname="yoink.tests.test_basic.BasicTestCase" name="test_000_comic_generates_valid_markup" time="1.243" /><testcase classname="yoink.tests.test_basic.BasicTestCase" name="test_001_comic_has_valid_title" time="0.998" /><testcase classname="yoink.tests.test_basic.BasicTestCase" name="test_002_comic_has_valid_category" time="1.250" /><testcase classname="yoink.tests.test_basic.BasicTestCase" name="test_003_empty_comic_folder" time="0.591" /><testcase classname="yoink.tests.test_basic.BasicTestCase" name="test_004_comic_folder_created_and_populated" time="22.773" /><testcase classname="yoink.tests.test_basic.BasicTestCase" name="test_005_comic_archive_generated" time="3.357" /><testcase classname="yoink.tests.test_basic.BasicTestCase" name="test_006_folder_cleaned_after_archive_generation" time="1.079" /><testcase classname="yoink.tests.test_basic.BasicTestCase" name="test_007_comic_instance_has_archiver" time="0.587" /><testcase classname="yoink.tests.test_basic.BasicTestCase" name="test_008_comic_is_subclass_scrapable" time="0.959" /><testcase classname="yoink.tests.test_basic.BasicTestCase" name="test_009_invalid_comic_link" time="1.050" /></testsuite></testsuites>
\ No newline at end of file
diff --git a/yoink/cli.py b/yoink/cli.py
index f0a8275..085fbeb 100644
--- a/yoink/cli.py
+++ b/yoink/cli.py
@@ -40,16 +40,20 @@ def download(url, comic, torrent, path):
         except ValueError:
             click.echo(f'{url} is not supported or is not a valid URL')
             return 1
+
         click.echo(f'Downloading {comic.title}')
         comic.archiver.download()
+
         click.echo('Building comic archive')
         comic.archiver.generate_archive()
+
         click.echo('Cleaning up')
         comic.archiver.cleanup_worktree()
+        
         click.echo('Success')
     
     if torrent:
-        click.echo('Downloading a torrent')
+        click.echo('Opps! It looks like Torrents aren\'t yet fully supported.')
 
     
 
diff --git a/yoink/comic.py b/yoink/comic.py
index 0323e68..61ed807 100644
--- a/yoink/comic.py
+++ b/yoink/comic.py
@@ -1,5 +1,3 @@
-from click import format_filename
-from soupsieve import select
 from yoink.common import required_comic_files, skippable_images, library_path
 from yoink.scraper import Scrapable
 
@@ -14,10 +12,16 @@ class Comic(Scrapable):
         super().__init__(url)
         self.archiver = ComicArchiver(self, library=path)
 
+    def __is_supported_image(self, image):
+        return image.endswith('.jpg' or '.jpeg')
+
 
     def __get_image_src(self, comic):
         if comic.attrs:
-            return comic.attrs['src']
+            try:
+                return comic.attrs['src']
+            except KeyError:
+                return comic['data-src']
 
         for image in comic:
             return image.attrs['src']
@@ -27,7 +31,8 @@ class Comic(Scrapable):
             'default': self.soup.find_all('div', class_='separator'),
             'no-div': self.soup.find_all('img', attrs={'width': '1000px'}),
             'excaliber': self.soup.find_all('img'),
-            'dbsuper': self.soup.findAll('meta', attrs={'property': 'twitter:image'})
+            'dbsuper': self.soup.findAll('meta', attrs={'property': 'twitter:image'}),
+            'mangadex': self.soup.find_all('img', attrs={'draggable': 'false'})
         }
 
         for case in soup.keys():
@@ -39,11 +44,18 @@ class Comic(Scrapable):
     @property
     def filelist(self):
         comics = self.__parse_soup()
+        for comic in comics: print(comic)
         return [comic for comic in list(map(self.__get_image_src, comics)) if not comic.endswith(skippable_images)]
 
 
     @property
-    def title(self): return self.soup.title.string.replace(' | Read All Comics Online For Free', '').replace('…', '').replace('#', '').replace(':', '').strip()
+    def title(self):
+        if 'readallcomics' in self.url:
+            return self.soup.title.string.replace(' | Read All Comics Online For Free', '').replace('…', '').replace('#', '').replace(':', '').strip()
+        elif 'mangadex' in self.url:
+            return self.soup.find('meta', property='og:title').attrs['content'].replace(' - Mangadex', '').replace('Read ', '')
+        else:
+            return 'Uncategorized'
 
     @property
     def category(self):
@@ -75,9 +87,11 @@ class ComicArchiver:
                 print(formatted_file, end='\r')
                 urllib.request.urlretrieve(url, filename=formatted_file)
             else:
-                page_number = url.split('/')[-1].split('.')[0].zfill(3)
+                page_number = str(index).zfill(3)
                 file_extension = url.split('/')[-1].split('.')[1]
-                urllib.request.urlretrieve(url, filename=os.path.join(self.worktree, f'{self.comic.title}{page_number}.{file_extension}'))
+                formatted_file = f'{self.comic.title} - {page_number}.{file_extension}'
+                print(formatted_file, end='\r')
+                urllib.request.urlretrieve(url, filename=os.path.join(self.worktree, formatted_file))
         print()
 
     def generate_archive(self, archive_format='.cbr'):
@@ -95,10 +109,4 @@ class ComicArchiver:
 
 if __name__ == '__main__':
     comic = Comic('http://www.readallcomics.com/static-season-one-4-2021/')
-    # # print(comic.filelist)
-    # # print(len(comic.filelist))
-    # archiver = ComicArchiver(comic)
-    # archiver.download()
-    # archiver.generate_archive()
-    # archiver.cleanup_worktree()
     print(comic.category)
\ No newline at end of file
diff --git a/yoink/common.py b/yoink/common.py
index 508fefe..146b7eb 100644
--- a/yoink/common.py
+++ b/yoink/common.py
@@ -11,7 +11,7 @@ app_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
 config_path = os.path.abspath(os.path.join(os.environ.get('HOME'), '.config/yoink'))
 library_path = os.path.abspath(os.path.join(os.environ.get('HOME'), 'yoink/library'))
 required_comic_files = ('.cbr', '.cbz', '000.jpg', '001.jpg')
-skippable_images = ('logo-1.png', 'logo.png', 'report.png', 'request.png', 'prev.png', 'Next.png', 'Donate.png', '11.png')
+skippable_images = ('logo-1.png', 'logo.png', 'report.png', 'request.png', 'prev.png', 'Next.png', 'Donate.png', '11.png', 'navbar.svg')
 torrent_concurrent_download_limit = 1
-supported_sites = ['readallcomics.com', 'tpb.party', 'dragonballsupermanga.net']
+supported_sites = ['readallcomics.com', 'tpb.party', 'dragonballsupermanga.net', 'mangadex.tv']
 headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36'}
diff --git a/yoink/scraper.py b/yoink/scraper.py
index 593b435..5d26c92 100644
--- a/yoink/scraper.py
+++ b/yoink/scraper.py
@@ -1,7 +1,6 @@
 import requests
 from bs4 import BeautifulSoup
 
-
 from yoink.common import supported_sites
 
 
@@ -12,17 +11,19 @@ class Scrapable:
 
         
         self.__check_site_support()
-        # for link in supported_sites:
-        #     if link in self.url:
-        #         return
-        #     else:
-        #         raise ValueError('Unsupported site')
-        # if not any(url in link for link in supported_sites):
-        #     raise ValueError('Unsupported site')
 
 
     @property
-    def markup(self) -> str: return requests.get(self.url).content
+    def markup(self) -> str:
+        try:
+            # raise_for_status alters the default response behavior allowing http errors to raise exception
+            req = requests.get(self.url)
+            req.raise_for_status()
+            return req.content
+        except requests.exceptions.HTTPError as e:
+            # returns {status_code} Client Error: Not found for url: {self.url} in the event of any http errors and exits
+            raise SystemExit(e)
+
 
     @property
     def soup(self) -> BeautifulSoup: return BeautifulSoup(self.markup, 'html.parser')
diff --git a/yoink/tests/test_basic.py b/yoink/tests/test_basic.py
index fb41e5e..e236a01 100644
--- a/yoink/tests/test_basic.py
+++ b/yoink/tests/test_basic.py
@@ -2,6 +2,7 @@ from bs4 import BeautifulSoup
 
 import os
 import unittest
+from shutil import rmtree
 
 from yoink.common import app_root, library_path, config_path, skippable_images, supported_sites, required_comic_files, torrent_concurrent_download_limit, headers
 from yoink.comic import Comic, ComicArchiver
@@ -14,6 +15,14 @@ class BasicTestCase(unittest.TestCase):
         self.test_comic = 'http://readallcomics.com/static-season-one-4-2021/'
         self.comic = Comic(self.test_comic)
         self.archiver = ComicArchiver(self.comic)
+        self.remove_queue = []
+
+        
+    def tearDown(self) -> None:
+        for folder in self.remove_queue:
+            rmtree(folder)
+        
+
 
     def test_000_comic_generates_valid_markup(self):
         self.assertTrue('!DOCTYPE html' in str(self.comic.markup))
@@ -51,4 +60,7 @@ class BasicTestCase(unittest.TestCase):
         with self.assertRaises(ValueError) as condition:
             comic = Comic('https://viz.com')
 
-        self.assertTrue('Unsupported' in str(condition.exception))
\ No newline at end of file
+        self.assertTrue('Unsupported' in str(condition.exception))
+
+        self.remove_queue.append(os.path.join(library_path, f'comics/{self.comic.title}'))
+