yoink-go initial commit
This commit is contained in:
108
comic/archive.go
Normal file
108
comic/archive.go
Normal file
@@ -0,0 +1,108 @@
|
||||
package comic
|
||||
|
||||
import (
|
||||
"archive/zip"
|
||||
"io"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type ArchiveError struct {
|
||||
Message string
|
||||
Code int
|
||||
}
|
||||
|
||||
func (a ArchiveError) Error() string {
|
||||
return a.Message
|
||||
}
|
||||
|
||||
func (c *Comic) Archive() error {
|
||||
|
||||
outputPath := filepath.Join(c.LibraryPath, c.Title, c.Title+".cbz")
|
||||
err := os.MkdirAll(filepath.Dir(outputPath), os.ModePerm)
|
||||
if err != nil {
|
||||
return ArchiveError{
|
||||
Message: "error creating directory",
|
||||
Code: 1,
|
||||
}
|
||||
}
|
||||
zipFile, err := os.Create(outputPath)
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer zipFile.Close()
|
||||
|
||||
zwriter := zip.NewWriter(zipFile)
|
||||
defer zwriter.Close()
|
||||
|
||||
sourcePath := filepath.Join(c.LibraryPath, c.Title)
|
||||
|
||||
err = filepath.Walk(
|
||||
filepath.Dir(sourcePath),
|
||||
func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return ArchiveError{
|
||||
Message: "error walking archive",
|
||||
Code: 1,
|
||||
}
|
||||
}
|
||||
|
||||
if info.IsDir() {
|
||||
return nil
|
||||
}
|
||||
|
||||
ext := strings.ToLower(filepath.Ext(path))
|
||||
if ext != ".jpg" && ext != ".jpeg" && ext != ".png" {
|
||||
return nil
|
||||
}
|
||||
|
||||
relPath, err := filepath.Rel(sourcePath, path)
|
||||
if err != nil {
|
||||
return ArchiveError{
|
||||
Message: "error walking archive",
|
||||
Code: 1,
|
||||
}
|
||||
}
|
||||
|
||||
file, err := os.Open(path)
|
||||
if err != nil {
|
||||
return ArchiveError{
|
||||
Message: "error walking archive",
|
||||
Code: 1,
|
||||
}
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
zipEntry, err := zwriter.Create(relPath)
|
||||
if err != nil {
|
||||
return ArchiveError{
|
||||
Message: "error walking archive",
|
||||
Code: 1,
|
||||
}
|
||||
}
|
||||
|
||||
_, err = io.Copy(zipEntry, file)
|
||||
if err != nil {
|
||||
return ArchiveError{
|
||||
Message: "error walking archive",
|
||||
Code: 1,
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
},
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
return ArchiveError{
|
||||
Message: "error writing files to archive",
|
||||
Code: 1,
|
||||
}
|
||||
}
|
||||
|
||||
log.Printf("Created archive\n: %s", outputPath)
|
||||
return nil
|
||||
}
|
||||
31
comic/cleanup.go
Normal file
31
comic/cleanup.go
Normal file
@@ -0,0 +1,31 @@
|
||||
package comic
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func (c *Comic) Cleanup() error {
|
||||
filepath.Walk(
|
||||
filepath.Join(c.LibraryPath, c.Title),
|
||||
func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if info.IsDir() {
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, ext := range []string{".jpg", ".jpeg", ".png"} {
|
||||
edited := strings.Replace(info.Name(), c.Title, "", 1)
|
||||
edited = strings.Trim(edited, " ")
|
||||
|
||||
if !strings.HasPrefix(strings.ToLower(edited), "001") && strings.HasSuffix(info.Name(), ext) {
|
||||
return os.Remove(path)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
return nil
|
||||
}
|
||||
77
comic/comic.go
Normal file
77
comic/comic.go
Normal file
@@ -0,0 +1,77 @@
|
||||
package comic
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
// var debugUrl = "https://readallcomics.com/ultraman-x-avengers-001-2024/"
|
||||
|
||||
type Comic struct {
|
||||
URL string
|
||||
Title string
|
||||
Markup *goquery.Document
|
||||
Filelist []string
|
||||
Next *Comic
|
||||
Prev *Comic
|
||||
LibraryPath string
|
||||
}
|
||||
|
||||
func extractTitleFromMarkup(c Comic) string {
|
||||
yearFormat := `^(.*?)\s+\(\d{4}(?:\s+.+)?\)`
|
||||
selection := c.Markup.Find("title")
|
||||
|
||||
if selection.Length() == 0 {
|
||||
return "Untitled"
|
||||
}
|
||||
|
||||
content := selection.First().Text()
|
||||
regex := regexp.MustCompile(yearFormat)
|
||||
matches := regex.FindStringSubmatch(content)
|
||||
|
||||
if len(matches) != 2 {
|
||||
return "Untitled"
|
||||
}
|
||||
|
||||
return strings.ReplaceAll(matches[1], ":", "")
|
||||
}
|
||||
|
||||
func NewComic(
|
||||
url string, libraryPath string,
|
||||
imageChannel chan []string,
|
||||
markupChannel chan *goquery.Document,
|
||||
) *Comic {
|
||||
c := &Comic{
|
||||
URL: url,
|
||||
LibraryPath: libraryPath,
|
||||
}
|
||||
|
||||
go Markup(c.URL, markupChannel)
|
||||
|
||||
markup := <-markupChannel
|
||||
c.Markup = markup
|
||||
c.Title = extractTitleFromMarkup(*c)
|
||||
|
||||
go ParseImageLinks(markup, imageChannel)
|
||||
links := <-imageChannel
|
||||
|
||||
c.Filelist = links
|
||||
|
||||
return c
|
||||
}
|
||||
|
||||
func (c *Comic) Cover() (imageFilepath string, err error) {
|
||||
for _, image := range c.Filelist {
|
||||
if strings.HasSuffix(image, "000.jpg") || strings.HasSuffix(image, "001.jpg") {
|
||||
image, err := filepath.Abs(image)
|
||||
if err != nil {
|
||||
return image, ImageParseError{Message: err.Error(), Code: 1}
|
||||
}
|
||||
return image, nil
|
||||
}
|
||||
}
|
||||
return "", ImageParseError{Message: "No cover found", Code: 1}
|
||||
}
|
||||
254
comic/download.go
Normal file
254
comic/download.go
Normal file
@@ -0,0 +1,254 @@
|
||||
package comic
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
cloudflarebp "github.com/DaRealFreak/cloudflare-bp-go"
|
||||
)
|
||||
|
||||
// func _downloadFile(wg *sync.WaitGroup, url string, page int, c *Comic) error {
|
||||
// defer wg.Done()
|
||||
|
||||
// pageNumber := fmt.Sprintf("%03d", page)
|
||||
// formattedImagePath := fmt.Sprintf("%s %s.jpg", c.Title, pageNumber)
|
||||
// imageFilepath, _ := filepath.Abs(filepath.Join(c.LibraryPath, c.Title, formattedImagePath))
|
||||
|
||||
// if err := os.MkdirAll(
|
||||
// filepath.Dir(imageFilepath),
|
||||
// os.ModePerm,
|
||||
// ); err != nil {
|
||||
// return ComicDownloadError{
|
||||
// Message: "error creating directory",
|
||||
// Code: 1,
|
||||
// }
|
||||
// }
|
||||
|
||||
// // get image data
|
||||
// res, err := handleRequest(url)
|
||||
// if err != nil {
|
||||
// return ComicDownloadError{
|
||||
// Message: "invalid request",
|
||||
// Code: 1,
|
||||
// }
|
||||
// }
|
||||
// defer res.Body.Close()
|
||||
|
||||
// var fileChannel = make(chan *os.File)
|
||||
|
||||
// go func() error {
|
||||
// imageFile, err := os.Create(imageFilepath)
|
||||
// if err != nil {
|
||||
// return ComicDownloadError{
|
||||
// Message: "error creating image file",
|
||||
// Code: 1,
|
||||
// }
|
||||
// }
|
||||
// defer imageFile.Close()
|
||||
|
||||
// fileChannel <- imageFile
|
||||
|
||||
// return nil
|
||||
// }()
|
||||
|
||||
// println("Downloading", imageFilepath)
|
||||
|
||||
// go func(
|
||||
// fc chan *os.File,
|
||||
// res *http.Response,
|
||||
// ) error {
|
||||
// buffer := make([]byte, 64*1024)
|
||||
|
||||
// defer close(fileChannel)
|
||||
|
||||
// // write image data
|
||||
// _, err := io.CopyBuffer(<-fc, res.Body, buffer)
|
||||
// if err != nil {
|
||||
// return ComicDownloadError{
|
||||
// Message: "Unable to save file contents",
|
||||
// Code: 1,
|
||||
// }
|
||||
// }
|
||||
|
||||
// return nil
|
||||
// }(fileChannel, res)
|
||||
|
||||
// return nil
|
||||
// }
|
||||
|
||||
func downloadFile(url string, page int, c *Comic) error {
|
||||
pageNumber := fmt.Sprintf("%03d", page)
|
||||
formattedImagePath := fmt.Sprintf("%s %s.jpg", c.Title, pageNumber)
|
||||
imageFilepath, _ := filepath.Abs(filepath.Join(c.LibraryPath, c.Title, formattedImagePath))
|
||||
|
||||
if err := os.MkdirAll(
|
||||
filepath.Dir(imageFilepath),
|
||||
os.ModePerm,
|
||||
); err != nil {
|
||||
return ComicDownloadError{
|
||||
Message: "error creating directory",
|
||||
Code: 1,
|
||||
}
|
||||
}
|
||||
|
||||
res, err := handleRequest(url)
|
||||
if err != nil {
|
||||
return ComicDownloadError{
|
||||
Message: "invalid request",
|
||||
Code: 1,
|
||||
}
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
imageFile, err := os.Create(imageFilepath)
|
||||
if err != nil {
|
||||
return ComicDownloadError{
|
||||
Message: "error creating image file",
|
||||
Code: 1,
|
||||
}
|
||||
}
|
||||
defer imageFile.Close()
|
||||
|
||||
written, err := io.Copy(imageFile, res.Body)
|
||||
if err != nil {
|
||||
return ComicDownloadError{
|
||||
Message: "Unable to save file contents",
|
||||
Code: 1,
|
||||
}
|
||||
}
|
||||
|
||||
if written == 0 {
|
||||
return ComicDownloadError{
|
||||
Message: "Unable to save file contents",
|
||||
Code: 1,
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func handleRequest(url string) (*http.Response, error) {
|
||||
// adjust timeout and keep-alive to avoid connection timeout
|
||||
transport := &http.Transport{
|
||||
DisableKeepAlives: false,
|
||||
MaxIdleConnsPerHost: 32,
|
||||
}
|
||||
|
||||
// add cloudflare bypass
|
||||
cfTransport := cloudflarebp.AddCloudFlareByPass(transport)
|
||||
|
||||
// prevents cloudflarebp from occasionally returning the wrong type
|
||||
if converted, ok := cfTransport.(*http.Transport); ok {
|
||||
transport = converted
|
||||
}
|
||||
|
||||
client := &http.Client{
|
||||
Timeout: time.Second * 30,
|
||||
Transport: transport,
|
||||
}
|
||||
|
||||
// mimic generic browser
|
||||
req, err := http.NewRequest("GET", url, nil)
|
||||
if err != nil {
|
||||
return nil, ComicDownloadError{
|
||||
Message: "invalid request",
|
||||
Code: 1,
|
||||
}
|
||||
}
|
||||
|
||||
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36")
|
||||
|
||||
res, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, ComicDownloadError{
|
||||
Message: "invalid request",
|
||||
Code: 1,
|
||||
}
|
||||
}
|
||||
|
||||
if res.StatusCode != http.StatusOK {
|
||||
return nil, ComicDownloadError{
|
||||
Message: "bad response",
|
||||
Code: 1,
|
||||
}
|
||||
}
|
||||
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func (c *Comic) Download(concurrency int) []error {
|
||||
// var wg sync.WaitGroup
|
||||
// wg.Add(len(c.Filelist))
|
||||
|
||||
// for i, link := range c.Filelist {
|
||||
// go downloadFile(link, i+1, c)
|
||||
// }
|
||||
|
||||
// wg.Wait()
|
||||
// return nil
|
||||
jobs := make(chan Download)
|
||||
results := make(chan error)
|
||||
|
||||
for worker := 1; worker <= concurrency; worker++ {
|
||||
go workerPool(jobs, results)
|
||||
}
|
||||
|
||||
for i, url := range c.Filelist {
|
||||
jobs <- Download{
|
||||
URL: url,
|
||||
Page: i + 1,
|
||||
Comic: c,
|
||||
}
|
||||
}
|
||||
|
||||
var errors []error
|
||||
for i := 0; i < len(c.Filelist); i++ {
|
||||
err := <-results
|
||||
if err != nil {
|
||||
errors = append(errors, err)
|
||||
}
|
||||
}
|
||||
return errors
|
||||
}
|
||||
|
||||
type Download struct {
|
||||
URL string
|
||||
Page int
|
||||
Comic *Comic
|
||||
}
|
||||
|
||||
func workerPool(jobs <-chan Download, results chan<- error) {
|
||||
for job := range jobs {
|
||||
results <- downloadFile(job.URL, job.Page, job.Comic)
|
||||
}
|
||||
}
|
||||
|
||||
func DownloadComicImages(c *Comic, concurrency int) []error {
|
||||
jobs := make(chan Download)
|
||||
results := make(chan error)
|
||||
|
||||
for worker := 1; worker <= concurrency; worker++ {
|
||||
go workerPool(jobs, results)
|
||||
}
|
||||
|
||||
for i, url := range c.Filelist {
|
||||
jobs <- Download{
|
||||
URL: url,
|
||||
Page: i + 1,
|
||||
Comic: c,
|
||||
}
|
||||
}
|
||||
|
||||
var errors []error
|
||||
for i := 0; i < len(c.Filelist); i++ {
|
||||
err := <-results
|
||||
if err != nil {
|
||||
errors = append(errors, err)
|
||||
}
|
||||
}
|
||||
return errors
|
||||
}
|
||||
19
comic/error.go
Normal file
19
comic/error.go
Normal file
@@ -0,0 +1,19 @@
|
||||
package comic
|
||||
|
||||
type ImageParseError struct {
|
||||
Message string
|
||||
Code int
|
||||
}
|
||||
|
||||
type ComicDownloadError struct {
|
||||
Message string
|
||||
Code int
|
||||
}
|
||||
|
||||
func (i ImageParseError) Error() string {
|
||||
return i.Message
|
||||
}
|
||||
|
||||
func (c ComicDownloadError) Error() string {
|
||||
return c.Message
|
||||
}
|
||||
52
comic/parser.go
Normal file
52
comic/parser.go
Normal file
@@ -0,0 +1,52 @@
|
||||
package comic
|
||||
|
||||
import (
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
func Markup(url string, c chan *goquery.Document) *goquery.Document {
|
||||
res, err := http.Get(url)
|
||||
if err != nil {
|
||||
return &goquery.Document{}
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
if res.StatusCode != http.StatusOK {
|
||||
return &goquery.Document{}
|
||||
}
|
||||
|
||||
content, err := io.ReadAll(res.Body)
|
||||
if err != nil {
|
||||
return &goquery.Document{}
|
||||
}
|
||||
|
||||
markup, err := goquery.NewDocumentFromReader(strings.NewReader(string(content)))
|
||||
if err != nil {
|
||||
return &goquery.Document{}
|
||||
}
|
||||
|
||||
c <- markup
|
||||
return markup
|
||||
}
|
||||
|
||||
func ParseImageLinks(markup *goquery.Document, c chan []string) ([]string, error) {
|
||||
var links []string
|
||||
markup.Find("img").Each(func(_ int, image *goquery.Selection) {
|
||||
link, _ := image.Attr("src")
|
||||
if !strings.Contains(link, "logo") && (strings.Contains(link, "bp.blogspot.com") || strings.Contains(link, "blogger.googleusercontent") || strings.Contains(link, "covers")) {
|
||||
links = append(links, link)
|
||||
}
|
||||
})
|
||||
|
||||
c <- links
|
||||
|
||||
if len(links) > 0 {
|
||||
return links, nil
|
||||
}
|
||||
|
||||
return links, ImageParseError{Message: "No images found", Code: 1}
|
||||
}
|
||||
Reference in New Issue
Block a user