48 lines
1.7 KiB
Python
48 lines
1.7 KiB
Python
import os
|
|
import urllib.request
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
|
def get_soup_obj(url, headers):
|
|
req = urllib.request.Request(url, headers=headers)
|
|
raw = urllib.request.urlopen(req)
|
|
|
|
return BeautifulSoup(raw, 'html.parser')
|
|
|
|
|
|
def get_title(soup):
|
|
''' Grab the title element string from a Beautiful Soup object
|
|
removing the last 45 garbage characters'''
|
|
comic_title = soup.title.string.replace('Read All Comics Online For Free', '').replace('…', '').replace('|', '')
|
|
return comic_title.strip()
|
|
|
|
|
|
def get_img_links(soup):
|
|
images = []
|
|
for link in soup.findAll('a'):
|
|
if not link.has_attr('href'):
|
|
img = link.img['src']
|
|
images.append(img)
|
|
|
|
return images
|
|
|
|
def download_img_links(links, soup, folder, title=None):
|
|
# test to see if the folder exists
|
|
if title is None:
|
|
print('No title specified\ncreating from reqest')
|
|
title = get_title(soup)
|
|
# if it doesn't, create it
|
|
if not os.path.exists(os.path.join(folder, title)):
|
|
os.makedirs(os.path.join(folder, title), mode=0o777)
|
|
print('Folder {} created...'.format(os.path.join(folder, title)))
|
|
# retrieve files from server
|
|
for link in links:
|
|
opener = urllib.request.build_opener()
|
|
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
|
|
urllib.request.install_opener(opener)
|
|
print('Downloading file {} from {}'.format((link[-6:] if link[-11:-4] != '%2Bcopy' else (link[-13:-11] + link[-4:]) ), title))
|
|
urllib.request.urlretrieve(link, filename=os.path.join(folder + '/' + title, title + ' 0' + (link[-6:] if link[-11:-4] != '%2Bcopy' else (link[-13:-11] + link[-4:]) )))
|
|
|
|
print("Download Complete!")
|
|
|