Compare commits
8 Commits
9cd4af9bb6
...
feat/batca
| Author | SHA1 | Date | |
|---|---|---|---|
| d2c715e973 | |||
| 9cb26f27ec | |||
| 855e97f72f | |||
| ca891fc6c0 | |||
| 9ec1301317 | |||
| dcb41deea9 | |||
| a7c3b632a5 | |||
| d53af6b84f |
30
Makefile
30
Makefile
@@ -1,9 +1,10 @@
|
|||||||
BIN := yoink
|
BIN := yoink
|
||||||
BUILD_DIR := build
|
BUILD_DIR := build
|
||||||
REGISTRY := git.brizzle.dev/bryan/yoink-go
|
REGISTRY := git.brizzle.dev/bryan/yoink-go
|
||||||
VERSION := $(shell git describe --tags --always --dirty)
|
VERSION ?= $(shell git describe --tags --always --dirty)
|
||||||
|
NOTES ?= ""
|
||||||
|
|
||||||
.PHONY: all windows linux darwin clean docker-build docker-push
|
.PHONY: all windows linux darwin clean docker-build docker-push tag gitea-release release
|
||||||
|
|
||||||
all: windows linux darwin
|
all: windows linux darwin
|
||||||
|
|
||||||
@@ -28,5 +29,30 @@ docker-push: docker-build
|
|||||||
podman push $(REGISTRY):$(VERSION)
|
podman push $(REGISTRY):$(VERSION)
|
||||||
podman push $(REGISTRY):latest
|
podman push $(REGISTRY):latest
|
||||||
|
|
||||||
|
tag:
|
||||||
|
@if [ -z "$(VERSION)" ]; then echo "Usage: make tag VERSION=1.2.0"; exit 1; fi
|
||||||
|
git tag $(VERSION)
|
||||||
|
git tag -f latest
|
||||||
|
git push origin $(VERSION)
|
||||||
|
git push origin -f latest
|
||||||
|
|
||||||
|
gitea-release:
|
||||||
|
tea release create \
|
||||||
|
--tag $(VERSION) \
|
||||||
|
--title "$(VERSION)" \
|
||||||
|
$(if $(NOTES),--note $(NOTES),) \
|
||||||
|
--asset $(BUILD_DIR)/$(BIN)-windows-amd64.exe \
|
||||||
|
--asset $(BUILD_DIR)/$(BIN)-linux-amd64 \
|
||||||
|
--asset $(BUILD_DIR)/$(BIN)-linux-arm64 \
|
||||||
|
--asset $(BUILD_DIR)/$(BIN)-darwin-amd64 \
|
||||||
|
--asset $(BUILD_DIR)/$(BIN)-darwin-arm64
|
||||||
|
|
||||||
|
release:
|
||||||
|
@if [ -z "$(VERSION)" ]; then echo "Usage: make release VERSION=1.3.0 NOTES='...'"; exit 1; fi
|
||||||
|
$(MAKE) tag VERSION=$(VERSION)
|
||||||
|
$(MAKE) clean all
|
||||||
|
$(MAKE) gitea-release VERSION=$(VERSION) NOTES=$(NOTES)
|
||||||
|
$(MAKE) docker-push VERSION=$(VERSION)
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf $(BUILD_DIR)
|
rm -rf $(BUILD_DIR)
|
||||||
|
|||||||
@@ -99,6 +99,8 @@ The web UI is then available at `http://localhost:8080`.
|
|||||||
|
|
||||||
#### Packaging local images
|
#### Packaging local images
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
Click the upload icon (↑) in the header to open the packaging panel. Enter a title, then either:
|
Click the upload icon (↑) in the header to open the packaging panel. Enter a title, then either:
|
||||||
|
|
||||||
- **Drag and drop** a folder or image files onto the drop zone
|
- **Drag and drop** a folder or image files onto the drop zone
|
||||||
|
|||||||
BIN
Screenshot_02.png
Normal file
BIN
Screenshot_02.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 270 KiB |
@@ -40,14 +40,14 @@ var cli = &cobra.Command{
|
|||||||
fmt.Println(comic.Title)
|
fmt.Println(comic.Title)
|
||||||
|
|
||||||
err := comic.Download(len(comic.Filelist))
|
err := comic.Download(len(comic.Filelist))
|
||||||
for e := range err {
|
for _, e := range err {
|
||||||
fmt.Println(e)
|
fmt.Println(e)
|
||||||
}
|
}
|
||||||
|
|
||||||
comic.Archive()
|
comic.Archive()
|
||||||
comic.Cleanup()
|
comic.Cleanup()
|
||||||
},
|
},
|
||||||
Version: "1.1.0",
|
Version: "1.2.1",
|
||||||
}
|
}
|
||||||
|
|
||||||
func Execute() error {
|
func Execute() error {
|
||||||
|
|||||||
@@ -45,7 +45,7 @@ func (c *Comic) Archive() error {
|
|||||||
sourcePath := filepath.Join(c.LibraryPath, c.Title)
|
sourcePath := filepath.Join(c.LibraryPath, c.Title)
|
||||||
|
|
||||||
err = filepath.Walk(
|
err = filepath.Walk(
|
||||||
filepath.Dir(sourcePath),
|
sourcePath,
|
||||||
func(path string, info os.FileInfo, err error) error {
|
func(path string, info os.FileInfo, err error) error {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return ArchiveError{
|
return ArchiveError{
|
||||||
|
|||||||
110
comic/archive_test.go
Normal file
110
comic/archive_test.go
Normal file
@@ -0,0 +1,110 @@
|
|||||||
|
package comic
|
||||||
|
|
||||||
|
import (
|
||||||
|
"archive/zip"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestArchiveError(t *testing.T) {
|
||||||
|
err := ArchiveError{Message: "archive failed", Code: 1}
|
||||||
|
if err.Error() != "archive failed" {
|
||||||
|
t.Errorf("Error() = %q, want %q", err.Error(), "archive failed")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestArchive(t *testing.T) {
|
||||||
|
t.Run("creates cbz with image files", func(t *testing.T) {
|
||||||
|
tmpDir := t.TempDir()
|
||||||
|
title := "TestComic"
|
||||||
|
comicDir := filepath.Join(tmpDir, title)
|
||||||
|
os.MkdirAll(comicDir, os.ModePerm)
|
||||||
|
|
||||||
|
// Create fake image files
|
||||||
|
for _, name := range []string{"TestComic 001.jpg", "TestComic 002.jpg", "TestComic 003.png"} {
|
||||||
|
os.WriteFile(filepath.Join(comicDir, name), []byte("fake image"), 0644)
|
||||||
|
}
|
||||||
|
|
||||||
|
c := &Comic{
|
||||||
|
Title: title,
|
||||||
|
LibraryPath: tmpDir,
|
||||||
|
}
|
||||||
|
|
||||||
|
err := c.Archive()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Archive() unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
archivePath := filepath.Join(comicDir, title+".cbz")
|
||||||
|
if _, err := os.Stat(archivePath); os.IsNotExist(err) {
|
||||||
|
t.Fatalf("expected archive %s to exist", archivePath)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify the zip contains the image files
|
||||||
|
reader, err := zip.OpenReader(archivePath)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("failed to open archive: %v", err)
|
||||||
|
}
|
||||||
|
defer reader.Close()
|
||||||
|
|
||||||
|
if len(reader.File) != 3 {
|
||||||
|
t.Errorf("archive contains %d files, want 3", len(reader.File))
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("excludes non-image files from archive", func(t *testing.T) {
|
||||||
|
tmpDir := t.TempDir()
|
||||||
|
title := "TestComic"
|
||||||
|
comicDir := filepath.Join(tmpDir, title)
|
||||||
|
os.MkdirAll(comicDir, os.ModePerm)
|
||||||
|
|
||||||
|
// Create mixed files
|
||||||
|
os.WriteFile(filepath.Join(comicDir, "page-001.jpg"), []byte("image"), 0644)
|
||||||
|
os.WriteFile(filepath.Join(comicDir, "readme.txt"), []byte("text"), 0644)
|
||||||
|
os.WriteFile(filepath.Join(comicDir, "data.json"), []byte("json"), 0644)
|
||||||
|
|
||||||
|
c := &Comic{
|
||||||
|
Title: title,
|
||||||
|
LibraryPath: tmpDir,
|
||||||
|
}
|
||||||
|
|
||||||
|
err := c.Archive()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Archive() unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
archivePath := filepath.Join(comicDir, title+".cbz")
|
||||||
|
reader, err := zip.OpenReader(archivePath)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("failed to open archive: %v", err)
|
||||||
|
}
|
||||||
|
defer reader.Close()
|
||||||
|
|
||||||
|
if len(reader.File) != 1 {
|
||||||
|
t.Errorf("archive contains %d files, want 1 (only .jpg)", len(reader.File))
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("handles empty directory", func(t *testing.T) {
|
||||||
|
tmpDir := t.TempDir()
|
||||||
|
title := "EmptyComic"
|
||||||
|
comicDir := filepath.Join(tmpDir, title)
|
||||||
|
os.MkdirAll(comicDir, os.ModePerm)
|
||||||
|
|
||||||
|
c := &Comic{
|
||||||
|
Title: title,
|
||||||
|
LibraryPath: tmpDir,
|
||||||
|
}
|
||||||
|
|
||||||
|
err := c.Archive()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Archive() unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
archivePath := filepath.Join(comicDir, title+".cbz")
|
||||||
|
if _, err := os.Stat(archivePath); os.IsNotExist(err) {
|
||||||
|
t.Fatalf("expected archive %s to exist even if empty", archivePath)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
93
comic/cleanup_test.go
Normal file
93
comic/cleanup_test.go
Normal file
@@ -0,0 +1,93 @@
|
|||||||
|
package comic
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestCleanup(t *testing.T) {
|
||||||
|
t.Run("keeps cover image 001 and removes others", func(t *testing.T) {
|
||||||
|
tmpDir := t.TempDir()
|
||||||
|
title := "TestComic"
|
||||||
|
comicDir := filepath.Join(tmpDir, title)
|
||||||
|
os.MkdirAll(comicDir, os.ModePerm)
|
||||||
|
|
||||||
|
files := map[string]bool{
|
||||||
|
"TestComic 001.jpg": true, // should be kept
|
||||||
|
"TestComic 002.jpg": false, // should be removed
|
||||||
|
"TestComic 003.jpg": false, // should be removed
|
||||||
|
}
|
||||||
|
|
||||||
|
for name := range files {
|
||||||
|
os.WriteFile(filepath.Join(comicDir, name), []byte("fake"), 0644)
|
||||||
|
}
|
||||||
|
|
||||||
|
c := &Comic{
|
||||||
|
Title: title,
|
||||||
|
LibraryPath: tmpDir,
|
||||||
|
}
|
||||||
|
|
||||||
|
err := c.Cleanup()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Cleanup() unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
for name, shouldExist := range files {
|
||||||
|
path := filepath.Join(comicDir, name)
|
||||||
|
_, err := os.Stat(path)
|
||||||
|
exists := !os.IsNotExist(err)
|
||||||
|
|
||||||
|
if shouldExist && !exists {
|
||||||
|
t.Errorf("expected %s to be kept, but it was removed", name)
|
||||||
|
}
|
||||||
|
if !shouldExist && exists {
|
||||||
|
t.Errorf("expected %s to be removed, but it still exists", name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("keeps non-image files", func(t *testing.T) {
|
||||||
|
tmpDir := t.TempDir()
|
||||||
|
title := "TestComic"
|
||||||
|
comicDir := filepath.Join(tmpDir, title)
|
||||||
|
os.MkdirAll(comicDir, os.ModePerm)
|
||||||
|
|
||||||
|
os.WriteFile(filepath.Join(comicDir, "TestComic.cbz"), []byte("archive"), 0644)
|
||||||
|
os.WriteFile(filepath.Join(comicDir, "metadata.json"), []byte("data"), 0644)
|
||||||
|
|
||||||
|
c := &Comic{
|
||||||
|
Title: title,
|
||||||
|
LibraryPath: tmpDir,
|
||||||
|
}
|
||||||
|
|
||||||
|
err := c.Cleanup()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Cleanup() unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, name := range []string{"TestComic.cbz", "metadata.json"} {
|
||||||
|
path := filepath.Join(comicDir, name)
|
||||||
|
if _, err := os.Stat(path); os.IsNotExist(err) {
|
||||||
|
t.Errorf("expected non-image file %s to be kept", name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("handles empty directory", func(t *testing.T) {
|
||||||
|
tmpDir := t.TempDir()
|
||||||
|
title := "EmptyComic"
|
||||||
|
comicDir := filepath.Join(tmpDir, title)
|
||||||
|
os.MkdirAll(comicDir, os.ModePerm)
|
||||||
|
|
||||||
|
c := &Comic{
|
||||||
|
Title: title,
|
||||||
|
LibraryPath: tmpDir,
|
||||||
|
}
|
||||||
|
|
||||||
|
err := c.Cleanup()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Cleanup() unexpected error for empty dir: %v", err)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
@@ -1,6 +1,7 @@
|
|||||||
package comic
|
package comic
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"net/http"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
@@ -18,6 +19,7 @@ type Comic struct {
|
|||||||
Next *Comic
|
Next *Comic
|
||||||
Prev *Comic
|
Prev *Comic
|
||||||
LibraryPath string
|
LibraryPath string
|
||||||
|
Client *http.Client
|
||||||
}
|
}
|
||||||
|
|
||||||
// extractTitleFromMarkup extracts the title from the comic's markup.
|
// extractTitleFromMarkup extracts the title from the comic's markup.
|
||||||
@@ -26,21 +28,52 @@ type Comic struct {
|
|||||||
// Returns the extracted title as a string.
|
// Returns the extracted title as a string.
|
||||||
func extractTitleFromMarkup(c Comic) string {
|
func extractTitleFromMarkup(c Comic) string {
|
||||||
yearFormat := `^(.*?)\s+\(\d{4}(?:\s+.+)?\)`
|
yearFormat := `^(.*?)\s+\(\d{4}(?:\s+.+)?\)`
|
||||||
selection := c.Markup.Find("title")
|
|
||||||
|
|
||||||
if selection.Length() == 0 {
|
|
||||||
return "Untitled"
|
|
||||||
}
|
|
||||||
|
|
||||||
content := selection.First().Text()
|
|
||||||
regex := regexp.MustCompile(yearFormat)
|
regex := regexp.MustCompile(yearFormat)
|
||||||
matches := regex.FindStringSubmatch(content)
|
|
||||||
|
|
||||||
|
extractFrom := func(text string) string {
|
||||||
|
matches := regex.FindStringSubmatch(text)
|
||||||
if len(matches) != 2 {
|
if len(matches) != 2 {
|
||||||
return "Untitled"
|
return ""
|
||||||
|
}
|
||||||
|
return strings.ReplaceAll(matches[1], ":", "")
|
||||||
}
|
}
|
||||||
|
|
||||||
return strings.ReplaceAll(matches[1], ":", "")
|
title := extractFrom(c.Markup.Find("title").First().Text())
|
||||||
|
|
||||||
|
if strings.HasPrefix(title, "#") {
|
||||||
|
if h1 := extractFrom(c.Markup.Find("h1").First().Text()); h1 != "" && !strings.HasPrefix(h1, "#") {
|
||||||
|
return h1
|
||||||
|
}
|
||||||
|
if slug := titleFromSlug(c.URL); slug != "" {
|
||||||
|
return slug
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if title != "" {
|
||||||
|
return title
|
||||||
|
}
|
||||||
|
|
||||||
|
return "Untitled"
|
||||||
|
}
|
||||||
|
|
||||||
|
// titleFromSlug derives a comic title from the last path segment of a URL.
|
||||||
|
// It strips a trailing year (-YYYY), replaces hyphens with spaces, and title-cases the result.
|
||||||
|
func titleFromSlug(url string) string {
|
||||||
|
slug := strings.TrimRight(url, "/")
|
||||||
|
if i := strings.LastIndex(slug, "/"); i >= 0 {
|
||||||
|
slug = slug[i+1:]
|
||||||
|
}
|
||||||
|
slug = regexp.MustCompile(`-\d{4}$`).ReplaceAllString(slug, "")
|
||||||
|
if slug == "" {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
words := strings.Split(slug, "-")
|
||||||
|
for i, w := range words {
|
||||||
|
if len(w) > 0 {
|
||||||
|
words[i] = strings.ToUpper(w[:1]) + w[1:]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return strings.Join(words, " ")
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewComic creates a new Comic instance from the provided URL and library path.
|
// NewComic creates a new Comic instance from the provided URL and library path.
|
||||||
@@ -61,13 +94,25 @@ func NewComic(
|
|||||||
LibraryPath: libraryPath,
|
LibraryPath: libraryPath,
|
||||||
}
|
}
|
||||||
|
|
||||||
go Markup(c.URL, markupChannel)
|
if strings.Contains(url, "batcave.biz") {
|
||||||
|
clientChan := make(chan *http.Client, 1)
|
||||||
|
go BatcaveBizMarkup(url, markupChannel, clientChan)
|
||||||
|
markup := <-markupChannel
|
||||||
|
c.Markup = markup
|
||||||
|
c.Client = <-clientChan
|
||||||
|
if t := ParseBatcaveBizTitle(markup, url); t != "" {
|
||||||
|
c.Title = t
|
||||||
|
} else {
|
||||||
|
c.Title = extractTitleFromMarkup(*c)
|
||||||
|
}
|
||||||
|
go ParseBatcaveBizImageLinks(markup, imageChannel)
|
||||||
|
} else {
|
||||||
|
go Markup(url, markupChannel)
|
||||||
markup := <-markupChannel
|
markup := <-markupChannel
|
||||||
c.Markup = markup
|
c.Markup = markup
|
||||||
c.Title = extractTitleFromMarkup(*c)
|
c.Title = extractTitleFromMarkup(*c)
|
||||||
|
|
||||||
go ParseImageLinks(markup, imageChannel)
|
go ParseImageLinks(markup, imageChannel)
|
||||||
|
}
|
||||||
links := <-imageChannel
|
links := <-imageChannel
|
||||||
|
|
||||||
c.Filelist = links
|
c.Filelist = links
|
||||||
|
|||||||
170
comic/comic_test.go
Normal file
170
comic/comic_test.go
Normal file
@@ -0,0 +1,170 @@
|
|||||||
|
package comic
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/PuerkitoBio/goquery"
|
||||||
|
)
|
||||||
|
|
||||||
|
func newDocFromHTML(html string) *goquery.Document {
|
||||||
|
doc, _ := goquery.NewDocumentFromReader(strings.NewReader(html))
|
||||||
|
return doc
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestExtractTitleFromMarkup(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
html string
|
||||||
|
url string
|
||||||
|
expected string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "standard title with year",
|
||||||
|
html: `<html><head><title>Ultraman X Avengers 001 (2024)</title></head></html>`,
|
||||||
|
expected: "Ultraman X Avengers 001",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "title with year and extra text",
|
||||||
|
html: `<html><head><title>Batman 042 (2023 Digital)</title></head></html>`,
|
||||||
|
expected: "Batman 042",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "title with colon removed",
|
||||||
|
html: `<html><head><title>Spider-Man: No Way Home 001 (2022)</title></head></html>`,
|
||||||
|
expected: "Spider-Man No Way Home 001",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "no title tag",
|
||||||
|
html: `<html><head></head></html>`,
|
||||||
|
expected: "Untitled",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "title without year pattern",
|
||||||
|
html: `<html><head><title>Some Random Page</title></head></html>`,
|
||||||
|
expected: "Untitled",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "empty title",
|
||||||
|
html: `<html><head><title></title></head></html>`,
|
||||||
|
expected: "Untitled",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "title starts with # falls back to h1",
|
||||||
|
html: `<html><head><title>#018 (2026)</title></head><body><h1>Absolute Batman #018 (2026)</h1></body></html>`,
|
||||||
|
expected: "Absolute Batman #018",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "title starts with # but h1 also starts with #, falls back to slug",
|
||||||
|
html: `<html><head><title>#018 (2026)</title></head><body><h1>#018 (2026)</h1></body></html>`,
|
||||||
|
url: "https://readallcomics.com/absolute-batman-018-2026/",
|
||||||
|
expected: "Absolute Batman 018",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "title starts with # falls back to slug when no h1",
|
||||||
|
html: `<html><head><title>#018 (2026)</title></head></html>`,
|
||||||
|
url: "https://readallcomics.com/absolute-batman-018-2026/",
|
||||||
|
expected: "Absolute Batman 018",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "title starts with # no h1 no url",
|
||||||
|
html: `<html><head><title>#018 (2026)</title></head></html>`,
|
||||||
|
expected: "#018",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
doc := newDocFromHTML(tt.html)
|
||||||
|
c := Comic{Markup: doc, URL: tt.url}
|
||||||
|
result := extractTitleFromMarkup(c)
|
||||||
|
if result != tt.expected {
|
||||||
|
t.Errorf("extractTitleFromMarkup() = %q, want %q", result, tt.expected)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTitleFromSlug(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
url string
|
||||||
|
expected string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "standard comic URL",
|
||||||
|
url: "https://readallcomics.com/absolute-batman-018-2026/",
|
||||||
|
expected: "Absolute Batman 018",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "no trailing slash",
|
||||||
|
url: "https://readallcomics.com/absolute-batman-018-2026",
|
||||||
|
expected: "Absolute Batman 018",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "no year in slug",
|
||||||
|
url: "https://readallcomics.com/absolute-batman-018/",
|
||||||
|
expected: "Absolute Batman 018",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "single word slug",
|
||||||
|
url: "https://readallcomics.com/batman/",
|
||||||
|
expected: "Batman",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
result := titleFromSlug(tt.url)
|
||||||
|
if result != tt.expected {
|
||||||
|
t.Errorf("titleFromSlug() = %q, want %q", result, tt.expected)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCover(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
filelist []string
|
||||||
|
wantSuffix string
|
||||||
|
expectErr bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "finds cover ending in 001.jpg",
|
||||||
|
filelist: []string{"https://example.com/image-002.jpg", "https://example.com/image-001.jpg", "https://example.com/image-003.jpg"},
|
||||||
|
wantSuffix: "image-001.jpg",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "finds cover ending in 000.jpg",
|
||||||
|
filelist: []string{"https://example.com/image-000.jpg", "https://example.com/image-001.jpg"},
|
||||||
|
wantSuffix: "image-000.jpg",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "returns error when no cover found",
|
||||||
|
filelist: []string{"https://example.com/image-002.jpg", "https://example.com/image-003.jpg"},
|
||||||
|
expectErr: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "returns error for empty filelist",
|
||||||
|
filelist: []string{},
|
||||||
|
expectErr: true,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
c := &Comic{Filelist: tt.filelist}
|
||||||
|
cover, err := c.Cover()
|
||||||
|
if tt.expectErr && err == nil {
|
||||||
|
t.Error("Cover() expected error, got nil")
|
||||||
|
}
|
||||||
|
if !tt.expectErr && err != nil {
|
||||||
|
t.Errorf("Cover() unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
if tt.wantSuffix != "" && !strings.HasSuffix(cover, tt.wantSuffix) {
|
||||||
|
t.Errorf("Cover() = %q, want path ending in %q", cover, tt.wantSuffix)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -6,6 +6,7 @@ import (
|
|||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cloudflarebp "github.com/DaRealFreak/cloudflare-bp-go"
|
cloudflarebp "github.com/DaRealFreak/cloudflare-bp-go"
|
||||||
@@ -39,13 +40,33 @@ func downloadFile(url string, page int, c *Comic) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
res, err := handleRequest(url)
|
var res *http.Response
|
||||||
|
var err error
|
||||||
|
if c.Client != nil {
|
||||||
|
req, reqErr := http.NewRequest("GET", url, nil)
|
||||||
|
if reqErr != nil {
|
||||||
|
return ComicDownloadError{Message: "invalid request", Code: 1}
|
||||||
|
}
|
||||||
|
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
|
||||||
|
if strings.Contains(url, "batcave.biz") {
|
||||||
|
req.Header.Set("Referer", "https://batcave.biz/")
|
||||||
|
}
|
||||||
|
res, err = c.Client.Do(req)
|
||||||
|
} else {
|
||||||
|
res, err = handleRequest(url)
|
||||||
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return ComicDownloadError{
|
return ComicDownloadError{
|
||||||
Message: "invalid request",
|
Message: "invalid request",
|
||||||
Code: 1,
|
Code: 1,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if res.StatusCode != http.StatusOK {
|
||||||
|
return ComicDownloadError{
|
||||||
|
Message: "bad response",
|
||||||
|
Code: 1,
|
||||||
|
}
|
||||||
|
}
|
||||||
defer res.Body.Close()
|
defer res.Body.Close()
|
||||||
|
|
||||||
imageFile, err := os.Create(imageFilepath)
|
imageFile, err := os.Create(imageFilepath)
|
||||||
|
|||||||
145
comic/download_test.go
Normal file
145
comic/download_test.go
Normal file
@@ -0,0 +1,145 @@
|
|||||||
|
package comic
|
||||||
|
|
||||||
|
import (
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestComicDownloadError(t *testing.T) {
|
||||||
|
err := ComicDownloadError{Message: "download failed", Code: 1}
|
||||||
|
if err.Error() != "download failed" {
|
||||||
|
t.Errorf("Error() = %q, want %q", err.Error(), "download failed")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestHandleRequest(t *testing.T) {
|
||||||
|
t.Run("successful request", func(t *testing.T) {
|
||||||
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if r.Header.Get("User-Agent") == "" {
|
||||||
|
t.Error("expected User-Agent header to be set")
|
||||||
|
}
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
w.Write([]byte("image data"))
|
||||||
|
}))
|
||||||
|
defer server.Close()
|
||||||
|
|
||||||
|
resp, err := handleRequest(server.URL)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("handleRequest() unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
t.Errorf("handleRequest() status = %d, want %d", resp.StatusCode, http.StatusOK)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("non-200 response", func(t *testing.T) {
|
||||||
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.WriteHeader(http.StatusNotFound)
|
||||||
|
}))
|
||||||
|
defer server.Close()
|
||||||
|
|
||||||
|
_, err := handleRequest(server.URL)
|
||||||
|
if err == nil {
|
||||||
|
t.Error("handleRequest() expected error for 404 response, got nil")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("invalid URL", func(t *testing.T) {
|
||||||
|
_, err := handleRequest("http://invalid.localhost:0/bad")
|
||||||
|
if err == nil {
|
||||||
|
t.Error("handleRequest() expected error for invalid URL, got nil")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDownloadFile(t *testing.T) {
|
||||||
|
t.Run("successful download", func(t *testing.T) {
|
||||||
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
w.Write([]byte("fake image content"))
|
||||||
|
}))
|
||||||
|
defer server.Close()
|
||||||
|
|
||||||
|
tmpDir := t.TempDir()
|
||||||
|
c := &Comic{
|
||||||
|
Title: "TestComic",
|
||||||
|
LibraryPath: tmpDir,
|
||||||
|
}
|
||||||
|
|
||||||
|
err := downloadFile(server.URL+"/image.jpg", 1, c)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("downloadFile() unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
expectedPath := filepath.Join(tmpDir, "TestComic", "TestComic 001.jpg")
|
||||||
|
if _, err := os.Stat(expectedPath); os.IsNotExist(err) {
|
||||||
|
t.Errorf("expected file %s to exist", expectedPath)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("formats page number with leading zeros", func(t *testing.T) {
|
||||||
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
w.Write([]byte("fake image content"))
|
||||||
|
}))
|
||||||
|
defer server.Close()
|
||||||
|
|
||||||
|
tmpDir := t.TempDir()
|
||||||
|
c := &Comic{
|
||||||
|
Title: "TestComic",
|
||||||
|
LibraryPath: tmpDir,
|
||||||
|
}
|
||||||
|
|
||||||
|
err := downloadFile(server.URL+"/image.jpg", 42, c)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("downloadFile() unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
expectedPath := filepath.Join(tmpDir, "TestComic", "TestComic 042.jpg")
|
||||||
|
if _, err := os.Stat(expectedPath); os.IsNotExist(err) {
|
||||||
|
t.Errorf("expected file %s to exist", expectedPath)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("server error returns error", func(t *testing.T) {
|
||||||
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.WriteHeader(http.StatusInternalServerError)
|
||||||
|
}))
|
||||||
|
defer server.Close()
|
||||||
|
|
||||||
|
tmpDir := t.TempDir()
|
||||||
|
c := &Comic{
|
||||||
|
Title: "TestComic",
|
||||||
|
LibraryPath: tmpDir,
|
||||||
|
}
|
||||||
|
|
||||||
|
err := downloadFile(server.URL+"/image.jpg", 1, c)
|
||||||
|
if err == nil {
|
||||||
|
t.Error("downloadFile() expected error for server error, got nil")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("empty response body returns error", func(t *testing.T) {
|
||||||
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
// write nothing
|
||||||
|
}))
|
||||||
|
defer server.Close()
|
||||||
|
|
||||||
|
tmpDir := t.TempDir()
|
||||||
|
c := &Comic{
|
||||||
|
Title: "TestComic",
|
||||||
|
LibraryPath: tmpDir,
|
||||||
|
}
|
||||||
|
|
||||||
|
err := downloadFile(server.URL+"/image.jpg", 1, c)
|
||||||
|
if err == nil {
|
||||||
|
t.Error("downloadFile() expected error for empty body, got nil")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
211
comic/parser.go
211
comic/parser.go
@@ -3,7 +3,11 @@ package comic
|
|||||||
import (
|
import (
|
||||||
"io"
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"net/http/cookiejar"
|
||||||
|
"net/url"
|
||||||
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/PuerkitoBio/goquery"
|
"github.com/PuerkitoBio/goquery"
|
||||||
)
|
)
|
||||||
@@ -47,6 +51,133 @@ func Markup(url string, c chan *goquery.Document) *goquery.Document {
|
|||||||
return markup
|
return markup
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func BatcaveBizMarkup(referer string, c chan *goquery.Document, clientChan chan *http.Client) *goquery.Document {
|
||||||
|
sendErr := func() *goquery.Document {
|
||||||
|
if c != nil {
|
||||||
|
c <- &goquery.Document{}
|
||||||
|
}
|
||||||
|
if clientChan != nil {
|
||||||
|
clientChan <- nil
|
||||||
|
}
|
||||||
|
return &goquery.Document{}
|
||||||
|
}
|
||||||
|
|
||||||
|
jar, _ := cookiejar.New(nil)
|
||||||
|
client := &http.Client{
|
||||||
|
Jar: jar,
|
||||||
|
Timeout: time.Second * 30,
|
||||||
|
CheckRedirect: func(req *http.Request, via []*http.Request) error {
|
||||||
|
return nil
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
headers := map[string]string{
|
||||||
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
||||||
|
"Accept-Language": "en-US,en;q=0.9",
|
||||||
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
|
||||||
|
}
|
||||||
|
|
||||||
|
// GET the challange page to obtain cookies and any necessary tokens
|
||||||
|
req, err := http.NewRequest("GET", referer, nil)
|
||||||
|
if err != nil {
|
||||||
|
return sendErr()
|
||||||
|
}
|
||||||
|
for k, v := range headers {
|
||||||
|
req.Header.Set(k, v)
|
||||||
|
}
|
||||||
|
|
||||||
|
res, err := client.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return sendErr()
|
||||||
|
}
|
||||||
|
defer res.Body.Close()
|
||||||
|
|
||||||
|
body, err := io.ReadAll(res.Body)
|
||||||
|
if err != nil {
|
||||||
|
return sendErr()
|
||||||
|
}
|
||||||
|
|
||||||
|
tokenRegex := regexp.MustCompile(`token:\s*"([^"]+)"`)
|
||||||
|
matches := tokenRegex.FindSubmatch(body)
|
||||||
|
|
||||||
|
if matches == nil {
|
||||||
|
// no challenge, parse directly
|
||||||
|
doc, err := goquery.NewDocumentFromReader(strings.NewReader(string(body)))
|
||||||
|
if err != nil {
|
||||||
|
return sendErr()
|
||||||
|
}
|
||||||
|
if c != nil {
|
||||||
|
c <- doc
|
||||||
|
}
|
||||||
|
if clientChan != nil {
|
||||||
|
clientChan <- client
|
||||||
|
}
|
||||||
|
return doc
|
||||||
|
}
|
||||||
|
|
||||||
|
encodedToken := string(matches[1])
|
||||||
|
token, err := url.QueryUnescape(encodedToken)
|
||||||
|
if err != nil {
|
||||||
|
token = encodedToken
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 3: POST to /_v with fake browser metrics
|
||||||
|
params := url.Values{}
|
||||||
|
params.Set("token", token)
|
||||||
|
params.Set("mode", "modern")
|
||||||
|
params.Set("workTime", "462")
|
||||||
|
params.Set("iterations", "183")
|
||||||
|
params.Set("webdriver", "0")
|
||||||
|
params.Set("touch", "0")
|
||||||
|
params.Set("screen_w", "1920")
|
||||||
|
params.Set("screen_h", "1080")
|
||||||
|
params.Set("screen_cd", "24")
|
||||||
|
|
||||||
|
postReq, err := http.NewRequest("POST", "https://batcave.biz/_v", strings.NewReader(params.Encode()))
|
||||||
|
if err != nil {
|
||||||
|
return sendErr()
|
||||||
|
}
|
||||||
|
for k, v := range headers {
|
||||||
|
postReq.Header.Set(k, v)
|
||||||
|
}
|
||||||
|
postReq.Header.Set("Content-Type", "application/x-www-form-urlencoded")
|
||||||
|
postReq.Header.Set("Referer", referer)
|
||||||
|
|
||||||
|
postRes, err := client.Do(postReq)
|
||||||
|
if err != nil {
|
||||||
|
return sendErr()
|
||||||
|
}
|
||||||
|
defer postRes.Body.Close()
|
||||||
|
io.ReadAll(postRes.Body)
|
||||||
|
|
||||||
|
// GET the real page with the set cookie
|
||||||
|
realReq, err := http.NewRequest("GET", referer, nil)
|
||||||
|
if err != nil {
|
||||||
|
return sendErr()
|
||||||
|
}
|
||||||
|
for k, v := range headers {
|
||||||
|
realReq.Header.Set(k, v)
|
||||||
|
}
|
||||||
|
|
||||||
|
realRes, err := client.Do(realReq)
|
||||||
|
if err != nil {
|
||||||
|
return sendErr()
|
||||||
|
}
|
||||||
|
defer realRes.Body.Close()
|
||||||
|
|
||||||
|
doc, err := goquery.NewDocumentFromReader(realRes.Body)
|
||||||
|
if err != nil {
|
||||||
|
return sendErr()
|
||||||
|
}
|
||||||
|
if c != nil {
|
||||||
|
c <- doc
|
||||||
|
}
|
||||||
|
if clientChan != nil {
|
||||||
|
clientChan <- client
|
||||||
|
}
|
||||||
|
return doc
|
||||||
|
}
|
||||||
|
|
||||||
// ParseImageLinks parses a goquery document to extract image links.
|
// ParseImageLinks parses a goquery document to extract image links.
|
||||||
//
|
//
|
||||||
// markup is the goquery document to parse for image links.
|
// markup is the goquery document to parse for image links.
|
||||||
@@ -69,3 +200,83 @@ func ParseImageLinks(markup *goquery.Document, c chan []string) ([]string, error
|
|||||||
|
|
||||||
return links, ImageParseError{Message: "No images found", Code: 1}
|
return links, ImageParseError{Message: "No images found", Code: 1}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func ParseReadAllComicsLinks(markup *goquery.Document, c chan []string) ([]string, error) {
|
||||||
|
var links []string
|
||||||
|
markup.Find("img").Each(func(_ int, image *goquery.Selection) {
|
||||||
|
link, _ := image.Attr("src")
|
||||||
|
if !strings.Contains(link, "logo") && (strings.Contains(link, "bp.blogspot.com") || strings.Contains(link, "blogger.googleusercontent") || strings.Contains(link, "covers")) {
|
||||||
|
links = append(links, link)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
c <- links
|
||||||
|
|
||||||
|
if len(links) > 0 {
|
||||||
|
return links, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return links, ImageParseError{Message: "No images found", Code: 1}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ParseBatcaveBizTitle extracts the chapter title from the __DATA__.chapters array
|
||||||
|
// by matching the chapter id to the last path segment of the provided URL.
|
||||||
|
func ParseBatcaveBizTitle(markup *goquery.Document, chapterURL string) string {
|
||||||
|
slug := strings.TrimRight(chapterURL, "/")
|
||||||
|
if i := strings.LastIndex(slug, "/"); i >= 0 {
|
||||||
|
slug = slug[i+1:]
|
||||||
|
}
|
||||||
|
|
||||||
|
var title string
|
||||||
|
markup.Find("script").Each(func(_ int, s *goquery.Selection) {
|
||||||
|
if title != "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
text := s.Text()
|
||||||
|
if !strings.Contains(text, "__DATA__") {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
chapterRegex := regexp.MustCompile(`"id"\s*:\s*` + regexp.QuoteMeta(slug) + `[^}]*?"title"\s*:\s*"([^"]+)"`)
|
||||||
|
m := chapterRegex.FindStringSubmatch(text)
|
||||||
|
if len(m) >= 2 {
|
||||||
|
title = strings.ReplaceAll(m[1], `\/`, "/")
|
||||||
|
title = strings.ReplaceAll(title, "Issue #", "")
|
||||||
|
title = strings.ReplaceAll(title, "#", "")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
return title
|
||||||
|
}
|
||||||
|
|
||||||
|
// ParseBatcaveBizImageLinks extracts image URLs from the __DATA__.images JavaScript
|
||||||
|
// variable embedded in a batcave.biz page.
|
||||||
|
func ParseBatcaveBizImageLinks(markup *goquery.Document, c chan []string) ([]string, error) {
|
||||||
|
var links []string
|
||||||
|
|
||||||
|
markup.Find("script").Each(func(_ int, s *goquery.Selection) {
|
||||||
|
text := s.Text()
|
||||||
|
if !strings.Contains(text, "__DATA__") {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
arrayRegex := regexp.MustCompile(`"images"\s*:\s*\[([^\]]+)\]`)
|
||||||
|
arrayMatch := arrayRegex.FindStringSubmatch(text)
|
||||||
|
if len(arrayMatch) < 2 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
urlRegex := regexp.MustCompile(`"([^"]+)"`)
|
||||||
|
for _, m := range urlRegex.FindAllStringSubmatch(arrayMatch[1], -1) {
|
||||||
|
if len(m) >= 2 {
|
||||||
|
links = append(links, strings.ReplaceAll(m[1], `\/`, "/"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
c <- links
|
||||||
|
|
||||||
|
if len(links) > 0 {
|
||||||
|
return links, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return links, ImageParseError{Message: "No images found", Code: 1}
|
||||||
|
}
|
||||||
|
|||||||
192
comic/parser_test.go
Normal file
192
comic/parser_test.go
Normal file
@@ -0,0 +1,192 @@
|
|||||||
|
package comic
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/PuerkitoBio/goquery"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestParseBatcaveBizImageLinks(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
html string
|
||||||
|
expectCount int
|
||||||
|
expectErr bool
|
||||||
|
expectURLs []string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "extracts images from __DATA__",
|
||||||
|
html: `<html><body><script>
|
||||||
|
var __DATA__ = {"images":["https://cdn.batcave.biz/img/001.jpg","https://cdn.batcave.biz/img/002.jpg"]};
|
||||||
|
</script></body></html>`,
|
||||||
|
expectCount: 2,
|
||||||
|
expectErr: false,
|
||||||
|
expectURLs: []string{"https://cdn.batcave.biz/img/001.jpg", "https://cdn.batcave.biz/img/002.jpg"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "unescapes forward slashes in URLs",
|
||||||
|
html: `<html><body><script>
|
||||||
|
var __DATA__ = {"images":["https:\/\/cdn.batcave.biz\/img\/001.jpg"]};
|
||||||
|
</script></body></html>`,
|
||||||
|
expectCount: 1,
|
||||||
|
expectErr: false,
|
||||||
|
expectURLs: []string{"https://cdn.batcave.biz/img/001.jpg"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "extracts images with spaces around colon and bracket",
|
||||||
|
html: `<html><body><script>
|
||||||
|
var __DATA__ = {"images" : [ "https://cdn.batcave.biz/img/001.jpg" ]};
|
||||||
|
</script></body></html>`,
|
||||||
|
expectCount: 1,
|
||||||
|
expectErr: false,
|
||||||
|
expectURLs: []string{"https://cdn.batcave.biz/img/001.jpg"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "no __DATA__ script",
|
||||||
|
html: `<html><body><script>
|
||||||
|
var foo = "bar";
|
||||||
|
</script></body></html>`,
|
||||||
|
expectCount: 0,
|
||||||
|
expectErr: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "__DATA__ present but no images key",
|
||||||
|
html: `<html><body><script>
|
||||||
|
var __DATA__ = {"title":"Nightwing"};
|
||||||
|
</script></body></html>`,
|
||||||
|
expectCount: 0,
|
||||||
|
expectErr: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "no script tags",
|
||||||
|
html: `<html><body><p>nothing here</p></body></html>`,
|
||||||
|
expectCount: 0,
|
||||||
|
expectErr: true,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
doc, _ := goquery.NewDocumentFromReader(strings.NewReader(tt.html))
|
||||||
|
ch := make(chan []string, 1)
|
||||||
|
|
||||||
|
links, err := ParseBatcaveBizImageLinks(doc, ch)
|
||||||
|
|
||||||
|
if tt.expectErr && err == nil {
|
||||||
|
t.Error("ParseBatcaveBizImageLinks() expected error, got nil")
|
||||||
|
}
|
||||||
|
if !tt.expectErr && err != nil {
|
||||||
|
t.Errorf("ParseBatcaveBizImageLinks() unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
if len(links) != tt.expectCount {
|
||||||
|
t.Errorf("ParseBatcaveBizImageLinks() returned %d links, want %d", len(links), tt.expectCount)
|
||||||
|
}
|
||||||
|
for i, expected := range tt.expectURLs {
|
||||||
|
if i >= len(links) {
|
||||||
|
t.Errorf("missing link at index %d: want %q", i, expected)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if links[i] != expected {
|
||||||
|
t.Errorf("links[%d] = %q, want %q", i, links[i], expected)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
channelLinks := <-ch
|
||||||
|
if len(channelLinks) != tt.expectCount {
|
||||||
|
t.Errorf("channel received %d links, want %d", len(channelLinks), tt.expectCount)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestImageParseError(t *testing.T) {
|
||||||
|
err := ImageParseError{Message: "test error", Code: 1}
|
||||||
|
if err.Error() != "test error" {
|
||||||
|
t.Errorf("Error() = %q, want %q", err.Error(), "test error")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseImageLinks(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
html string
|
||||||
|
expectCount int
|
||||||
|
expectErr bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "extracts blogspot images",
|
||||||
|
html: `<html><body>
|
||||||
|
<img src="https://bp.blogspot.com/page-001.jpg" />
|
||||||
|
<img src="https://bp.blogspot.com/page-002.jpg" />
|
||||||
|
</body></html>`,
|
||||||
|
expectCount: 2,
|
||||||
|
expectErr: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "extracts blogger googleusercontent images",
|
||||||
|
html: `<html><body>
|
||||||
|
<img src="https://blogger.googleusercontent.com/page-001.jpg" />
|
||||||
|
</body></html>`,
|
||||||
|
expectCount: 1,
|
||||||
|
expectErr: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "extracts covers images",
|
||||||
|
html: `<html><body>
|
||||||
|
<img src="https://example.com/covers/cover-001.jpg" />
|
||||||
|
</body></html>`,
|
||||||
|
expectCount: 1,
|
||||||
|
expectErr: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "excludes logo images",
|
||||||
|
html: `<html><body>
|
||||||
|
<img src="https://bp.blogspot.com/logo-site.jpg" />
|
||||||
|
<img src="https://bp.blogspot.com/page-001.jpg" />
|
||||||
|
</body></html>`,
|
||||||
|
expectCount: 1,
|
||||||
|
expectErr: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "excludes non-matching images",
|
||||||
|
html: `<html><body>
|
||||||
|
<img src="https://other-site.com/image.jpg" />
|
||||||
|
<img src="https://cdn.example.com/banner.png" />
|
||||||
|
</body></html>`,
|
||||||
|
expectCount: 0,
|
||||||
|
expectErr: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "no images at all",
|
||||||
|
html: `<html><body><p>No images here</p></body></html>`,
|
||||||
|
expectCount: 0,
|
||||||
|
expectErr: true,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
doc, _ := goquery.NewDocumentFromReader(strings.NewReader(tt.html))
|
||||||
|
ch := make(chan []string, 1)
|
||||||
|
|
||||||
|
links, err := ParseImageLinks(doc, ch)
|
||||||
|
|
||||||
|
if tt.expectErr && err == nil {
|
||||||
|
t.Error("ParseImageLinks() expected error, got nil")
|
||||||
|
}
|
||||||
|
if !tt.expectErr && err != nil {
|
||||||
|
t.Errorf("ParseImageLinks() unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
if len(links) != tt.expectCount {
|
||||||
|
t.Errorf("ParseImageLinks() returned %d links, want %d", len(links), tt.expectCount)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify the channel also received the links
|
||||||
|
channelLinks := <-ch
|
||||||
|
if len(channelLinks) != tt.expectCount {
|
||||||
|
t.Errorf("channel received %d links, want %d", len(channelLinks), tt.expectCount)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
1
go.mod
1
go.mod
@@ -5,6 +5,7 @@ go 1.22.3
|
|||||||
require (
|
require (
|
||||||
github.com/DaRealFreak/cloudflare-bp-go v1.0.4
|
github.com/DaRealFreak/cloudflare-bp-go v1.0.4
|
||||||
github.com/PuerkitoBio/goquery v1.9.2
|
github.com/PuerkitoBio/goquery v1.9.2
|
||||||
|
github.com/andybalholm/brotli v1.2.0
|
||||||
github.com/spf13/cobra v1.8.1
|
github.com/spf13/cobra v1.8.1
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
4
go.sum
4
go.sum
@@ -4,6 +4,8 @@ github.com/EDDYCJY/fake-useragent v0.2.0 h1:Jcnkk2bgXmDpX0z+ELlUErTkoLb/mxFBNd2Y
|
|||||||
github.com/EDDYCJY/fake-useragent v0.2.0/go.mod h1:5wn3zzlDxhKW6NYknushqinPcAqZcAPHy8lLczCdJdc=
|
github.com/EDDYCJY/fake-useragent v0.2.0/go.mod h1:5wn3zzlDxhKW6NYknushqinPcAqZcAPHy8lLczCdJdc=
|
||||||
github.com/PuerkitoBio/goquery v1.9.2 h1:4/wZksC3KgkQw7SQgkKotmKljk0M6V8TUvA8Wb4yPeE=
|
github.com/PuerkitoBio/goquery v1.9.2 h1:4/wZksC3KgkQw7SQgkKotmKljk0M6V8TUvA8Wb4yPeE=
|
||||||
github.com/PuerkitoBio/goquery v1.9.2/go.mod h1:GHPCaP0ODyyxqcNoFGYlAprUFH81NuRPd0GX3Zu2Mvk=
|
github.com/PuerkitoBio/goquery v1.9.2/go.mod h1:GHPCaP0ODyyxqcNoFGYlAprUFH81NuRPd0GX3Zu2Mvk=
|
||||||
|
github.com/andybalholm/brotli v1.2.0 h1:ukwgCxwYrmACq68yiUqwIWnGY0cTPox/M94sVwToPjQ=
|
||||||
|
github.com/andybalholm/brotli v1.2.0/go.mod h1:rzTDkvFWvIrjDXZHkuS16NPggd91W3kUSvPlQ1pLaKY=
|
||||||
github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsViSLyss=
|
github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsViSLyss=
|
||||||
github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU=
|
github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU=
|
||||||
github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
|
github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
|
||||||
@@ -20,6 +22,8 @@ github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
|
|||||||
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
|
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
|
||||||
github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
|
github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
|
||||||
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
|
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
|
||||||
|
github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU=
|
||||||
|
github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E=
|
||||||
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
|
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
|
||||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||||
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
|
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
|
||||||
|
|||||||
Reference in New Issue
Block a user