yoink-go initial commit

This commit is contained in:
Bryan Bailey
2024-08-26 22:49:26 -04:00
commit e8bd6e4179
11 changed files with 708 additions and 0 deletions

108
comic/archive.go Normal file
View File

@@ -0,0 +1,108 @@
package comic
import (
"archive/zip"
"io"
"log"
"os"
"path/filepath"
"strings"
)
type ArchiveError struct {
Message string
Code int
}
func (a ArchiveError) Error() string {
return a.Message
}
func (c *Comic) Archive() error {
outputPath := filepath.Join(c.LibraryPath, c.Title, c.Title+".cbz")
err := os.MkdirAll(filepath.Dir(outputPath), os.ModePerm)
if err != nil {
return ArchiveError{
Message: "error creating directory",
Code: 1,
}
}
zipFile, err := os.Create(outputPath)
if err != nil {
return err
}
defer zipFile.Close()
zwriter := zip.NewWriter(zipFile)
defer zwriter.Close()
sourcePath := filepath.Join(c.LibraryPath, c.Title)
err = filepath.Walk(
filepath.Dir(sourcePath),
func(path string, info os.FileInfo, err error) error {
if err != nil {
return ArchiveError{
Message: "error walking archive",
Code: 1,
}
}
if info.IsDir() {
return nil
}
ext := strings.ToLower(filepath.Ext(path))
if ext != ".jpg" && ext != ".jpeg" && ext != ".png" {
return nil
}
relPath, err := filepath.Rel(sourcePath, path)
if err != nil {
return ArchiveError{
Message: "error walking archive",
Code: 1,
}
}
file, err := os.Open(path)
if err != nil {
return ArchiveError{
Message: "error walking archive",
Code: 1,
}
}
defer file.Close()
zipEntry, err := zwriter.Create(relPath)
if err != nil {
return ArchiveError{
Message: "error walking archive",
Code: 1,
}
}
_, err = io.Copy(zipEntry, file)
if err != nil {
return ArchiveError{
Message: "error walking archive",
Code: 1,
}
}
return nil
},
)
if err != nil {
return ArchiveError{
Message: "error writing files to archive",
Code: 1,
}
}
log.Printf("Created archive\n: %s", outputPath)
return nil
}

31
comic/cleanup.go Normal file
View File

@@ -0,0 +1,31 @@
package comic
import (
"os"
"path/filepath"
"strings"
)
func (c *Comic) Cleanup() error {
filepath.Walk(
filepath.Join(c.LibraryPath, c.Title),
func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if info.IsDir() {
return nil
}
for _, ext := range []string{".jpg", ".jpeg", ".png"} {
edited := strings.Replace(info.Name(), c.Title, "", 1)
edited = strings.Trim(edited, " ")
if !strings.HasPrefix(strings.ToLower(edited), "001") && strings.HasSuffix(info.Name(), ext) {
return os.Remove(path)
}
}
return nil
})
return nil
}

77
comic/comic.go Normal file
View File

@@ -0,0 +1,77 @@
package comic
import (
"path/filepath"
"regexp"
"strings"
"github.com/PuerkitoBio/goquery"
)
// var debugUrl = "https://readallcomics.com/ultraman-x-avengers-001-2024/"
type Comic struct {
URL string
Title string
Markup *goquery.Document
Filelist []string
Next *Comic
Prev *Comic
LibraryPath string
}
func extractTitleFromMarkup(c Comic) string {
yearFormat := `^(.*?)\s+\(\d{4}(?:\s+.+)?\)`
selection := c.Markup.Find("title")
if selection.Length() == 0 {
return "Untitled"
}
content := selection.First().Text()
regex := regexp.MustCompile(yearFormat)
matches := regex.FindStringSubmatch(content)
if len(matches) != 2 {
return "Untitled"
}
return strings.ReplaceAll(matches[1], ":", "")
}
func NewComic(
url string, libraryPath string,
imageChannel chan []string,
markupChannel chan *goquery.Document,
) *Comic {
c := &Comic{
URL: url,
LibraryPath: libraryPath,
}
go Markup(c.URL, markupChannel)
markup := <-markupChannel
c.Markup = markup
c.Title = extractTitleFromMarkup(*c)
go ParseImageLinks(markup, imageChannel)
links := <-imageChannel
c.Filelist = links
return c
}
func (c *Comic) Cover() (imageFilepath string, err error) {
for _, image := range c.Filelist {
if strings.HasSuffix(image, "000.jpg") || strings.HasSuffix(image, "001.jpg") {
image, err := filepath.Abs(image)
if err != nil {
return image, ImageParseError{Message: err.Error(), Code: 1}
}
return image, nil
}
}
return "", ImageParseError{Message: "No cover found", Code: 1}
}

254
comic/download.go Normal file
View File

@@ -0,0 +1,254 @@
package comic
import (
"fmt"
"io"
"net/http"
"os"
"path/filepath"
"time"
cloudflarebp "github.com/DaRealFreak/cloudflare-bp-go"
)
// func _downloadFile(wg *sync.WaitGroup, url string, page int, c *Comic) error {
// defer wg.Done()
// pageNumber := fmt.Sprintf("%03d", page)
// formattedImagePath := fmt.Sprintf("%s %s.jpg", c.Title, pageNumber)
// imageFilepath, _ := filepath.Abs(filepath.Join(c.LibraryPath, c.Title, formattedImagePath))
// if err := os.MkdirAll(
// filepath.Dir(imageFilepath),
// os.ModePerm,
// ); err != nil {
// return ComicDownloadError{
// Message: "error creating directory",
// Code: 1,
// }
// }
// // get image data
// res, err := handleRequest(url)
// if err != nil {
// return ComicDownloadError{
// Message: "invalid request",
// Code: 1,
// }
// }
// defer res.Body.Close()
// var fileChannel = make(chan *os.File)
// go func() error {
// imageFile, err := os.Create(imageFilepath)
// if err != nil {
// return ComicDownloadError{
// Message: "error creating image file",
// Code: 1,
// }
// }
// defer imageFile.Close()
// fileChannel <- imageFile
// return nil
// }()
// println("Downloading", imageFilepath)
// go func(
// fc chan *os.File,
// res *http.Response,
// ) error {
// buffer := make([]byte, 64*1024)
// defer close(fileChannel)
// // write image data
// _, err := io.CopyBuffer(<-fc, res.Body, buffer)
// if err != nil {
// return ComicDownloadError{
// Message: "Unable to save file contents",
// Code: 1,
// }
// }
// return nil
// }(fileChannel, res)
// return nil
// }
func downloadFile(url string, page int, c *Comic) error {
pageNumber := fmt.Sprintf("%03d", page)
formattedImagePath := fmt.Sprintf("%s %s.jpg", c.Title, pageNumber)
imageFilepath, _ := filepath.Abs(filepath.Join(c.LibraryPath, c.Title, formattedImagePath))
if err := os.MkdirAll(
filepath.Dir(imageFilepath),
os.ModePerm,
); err != nil {
return ComicDownloadError{
Message: "error creating directory",
Code: 1,
}
}
res, err := handleRequest(url)
if err != nil {
return ComicDownloadError{
Message: "invalid request",
Code: 1,
}
}
defer res.Body.Close()
imageFile, err := os.Create(imageFilepath)
if err != nil {
return ComicDownloadError{
Message: "error creating image file",
Code: 1,
}
}
defer imageFile.Close()
written, err := io.Copy(imageFile, res.Body)
if err != nil {
return ComicDownloadError{
Message: "Unable to save file contents",
Code: 1,
}
}
if written == 0 {
return ComicDownloadError{
Message: "Unable to save file contents",
Code: 1,
}
}
return nil
}
func handleRequest(url string) (*http.Response, error) {
// adjust timeout and keep-alive to avoid connection timeout
transport := &http.Transport{
DisableKeepAlives: false,
MaxIdleConnsPerHost: 32,
}
// add cloudflare bypass
cfTransport := cloudflarebp.AddCloudFlareByPass(transport)
// prevents cloudflarebp from occasionally returning the wrong type
if converted, ok := cfTransport.(*http.Transport); ok {
transport = converted
}
client := &http.Client{
Timeout: time.Second * 30,
Transport: transport,
}
// mimic generic browser
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return nil, ComicDownloadError{
Message: "invalid request",
Code: 1,
}
}
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36")
res, err := client.Do(req)
if err != nil {
return nil, ComicDownloadError{
Message: "invalid request",
Code: 1,
}
}
if res.StatusCode != http.StatusOK {
return nil, ComicDownloadError{
Message: "bad response",
Code: 1,
}
}
return res, nil
}
func (c *Comic) Download(concurrency int) []error {
// var wg sync.WaitGroup
// wg.Add(len(c.Filelist))
// for i, link := range c.Filelist {
// go downloadFile(link, i+1, c)
// }
// wg.Wait()
// return nil
jobs := make(chan Download)
results := make(chan error)
for worker := 1; worker <= concurrency; worker++ {
go workerPool(jobs, results)
}
for i, url := range c.Filelist {
jobs <- Download{
URL: url,
Page: i + 1,
Comic: c,
}
}
var errors []error
for i := 0; i < len(c.Filelist); i++ {
err := <-results
if err != nil {
errors = append(errors, err)
}
}
return errors
}
type Download struct {
URL string
Page int
Comic *Comic
}
func workerPool(jobs <-chan Download, results chan<- error) {
for job := range jobs {
results <- downloadFile(job.URL, job.Page, job.Comic)
}
}
func DownloadComicImages(c *Comic, concurrency int) []error {
jobs := make(chan Download)
results := make(chan error)
for worker := 1; worker <= concurrency; worker++ {
go workerPool(jobs, results)
}
for i, url := range c.Filelist {
jobs <- Download{
URL: url,
Page: i + 1,
Comic: c,
}
}
var errors []error
for i := 0; i < len(c.Filelist); i++ {
err := <-results
if err != nil {
errors = append(errors, err)
}
}
return errors
}

19
comic/error.go Normal file
View File

@@ -0,0 +1,19 @@
package comic
type ImageParseError struct {
Message string
Code int
}
type ComicDownloadError struct {
Message string
Code int
}
func (i ImageParseError) Error() string {
return i.Message
}
func (c ComicDownloadError) Error() string {
return c.Message
}

52
comic/parser.go Normal file
View File

@@ -0,0 +1,52 @@
package comic
import (
"io"
"net/http"
"strings"
"github.com/PuerkitoBio/goquery"
)
func Markup(url string, c chan *goquery.Document) *goquery.Document {
res, err := http.Get(url)
if err != nil {
return &goquery.Document{}
}
defer res.Body.Close()
if res.StatusCode != http.StatusOK {
return &goquery.Document{}
}
content, err := io.ReadAll(res.Body)
if err != nil {
return &goquery.Document{}
}
markup, err := goquery.NewDocumentFromReader(strings.NewReader(string(content)))
if err != nil {
return &goquery.Document{}
}
c <- markup
return markup
}
func ParseImageLinks(markup *goquery.Document, c chan []string) ([]string, error) {
var links []string
markup.Find("img").Each(func(_ int, image *goquery.Selection) {
link, _ := image.Attr("src")
if !strings.Contains(link, "logo") && (strings.Contains(link, "bp.blogspot.com") || strings.Contains(link, "blogger.googleusercontent") || strings.Contains(link, "covers")) {
links = append(links, link)
}
})
c <- links
if len(links) > 0 {
return links, nil
}
return links, ImageParseError{Message: "No images found", Code: 1}
}