diff --git a/comic/archive_test.go b/comic/archive_test.go
new file mode 100644
index 0000000..6f1645e
--- /dev/null
+++ b/comic/archive_test.go
@@ -0,0 +1,110 @@
+package comic
+
+import (
+ "archive/zip"
+ "os"
+ "path/filepath"
+ "testing"
+)
+
+func TestArchiveError(t *testing.T) {
+ err := ArchiveError{Message: "archive failed", Code: 1}
+ if err.Error() != "archive failed" {
+ t.Errorf("Error() = %q, want %q", err.Error(), "archive failed")
+ }
+}
+
+func TestArchive(t *testing.T) {
+ t.Run("creates cbz with image files", func(t *testing.T) {
+ tmpDir := t.TempDir()
+ title := "TestComic"
+ comicDir := filepath.Join(tmpDir, title)
+ os.MkdirAll(comicDir, os.ModePerm)
+
+ // Create fake image files
+ for _, name := range []string{"TestComic 001.jpg", "TestComic 002.jpg", "TestComic 003.png"} {
+ os.WriteFile(filepath.Join(comicDir, name), []byte("fake image"), 0644)
+ }
+
+ c := &Comic{
+ Title: title,
+ LibraryPath: tmpDir,
+ }
+
+ err := c.Archive()
+ if err != nil {
+ t.Fatalf("Archive() unexpected error: %v", err)
+ }
+
+ archivePath := filepath.Join(comicDir, title+".cbz")
+ if _, err := os.Stat(archivePath); os.IsNotExist(err) {
+ t.Fatalf("expected archive %s to exist", archivePath)
+ }
+
+ // Verify the zip contains the image files
+ reader, err := zip.OpenReader(archivePath)
+ if err != nil {
+ t.Fatalf("failed to open archive: %v", err)
+ }
+ defer reader.Close()
+
+ if len(reader.File) != 3 {
+ t.Errorf("archive contains %d files, want 3", len(reader.File))
+ }
+ })
+
+ t.Run("excludes non-image files from archive", func(t *testing.T) {
+ tmpDir := t.TempDir()
+ title := "TestComic"
+ comicDir := filepath.Join(tmpDir, title)
+ os.MkdirAll(comicDir, os.ModePerm)
+
+ // Create mixed files
+ os.WriteFile(filepath.Join(comicDir, "page-001.jpg"), []byte("image"), 0644)
+ os.WriteFile(filepath.Join(comicDir, "readme.txt"), []byte("text"), 0644)
+ os.WriteFile(filepath.Join(comicDir, "data.json"), []byte("json"), 0644)
+
+ c := &Comic{
+ Title: title,
+ LibraryPath: tmpDir,
+ }
+
+ err := c.Archive()
+ if err != nil {
+ t.Fatalf("Archive() unexpected error: %v", err)
+ }
+
+ archivePath := filepath.Join(comicDir, title+".cbz")
+ reader, err := zip.OpenReader(archivePath)
+ if err != nil {
+ t.Fatalf("failed to open archive: %v", err)
+ }
+ defer reader.Close()
+
+ if len(reader.File) != 1 {
+ t.Errorf("archive contains %d files, want 1 (only .jpg)", len(reader.File))
+ }
+ })
+
+ t.Run("handles empty directory", func(t *testing.T) {
+ tmpDir := t.TempDir()
+ title := "EmptyComic"
+ comicDir := filepath.Join(tmpDir, title)
+ os.MkdirAll(comicDir, os.ModePerm)
+
+ c := &Comic{
+ Title: title,
+ LibraryPath: tmpDir,
+ }
+
+ err := c.Archive()
+ if err != nil {
+ t.Fatalf("Archive() unexpected error: %v", err)
+ }
+
+ archivePath := filepath.Join(comicDir, title+".cbz")
+ if _, err := os.Stat(archivePath); os.IsNotExist(err) {
+ t.Fatalf("expected archive %s to exist even if empty", archivePath)
+ }
+ })
+}
diff --git a/comic/cleanup_test.go b/comic/cleanup_test.go
new file mode 100644
index 0000000..da90ce4
--- /dev/null
+++ b/comic/cleanup_test.go
@@ -0,0 +1,93 @@
+package comic
+
+import (
+ "os"
+ "path/filepath"
+ "testing"
+)
+
+func TestCleanup(t *testing.T) {
+ t.Run("keeps cover image 001 and removes others", func(t *testing.T) {
+ tmpDir := t.TempDir()
+ title := "TestComic"
+ comicDir := filepath.Join(tmpDir, title)
+ os.MkdirAll(comicDir, os.ModePerm)
+
+ files := map[string]bool{
+ "TestComic 001.jpg": true, // should be kept
+ "TestComic 002.jpg": false, // should be removed
+ "TestComic 003.jpg": false, // should be removed
+ }
+
+ for name := range files {
+ os.WriteFile(filepath.Join(comicDir, name), []byte("fake"), 0644)
+ }
+
+ c := &Comic{
+ Title: title,
+ LibraryPath: tmpDir,
+ }
+
+ err := c.Cleanup()
+ if err != nil {
+ t.Fatalf("Cleanup() unexpected error: %v", err)
+ }
+
+ for name, shouldExist := range files {
+ path := filepath.Join(comicDir, name)
+ _, err := os.Stat(path)
+ exists := !os.IsNotExist(err)
+
+ if shouldExist && !exists {
+ t.Errorf("expected %s to be kept, but it was removed", name)
+ }
+ if !shouldExist && exists {
+ t.Errorf("expected %s to be removed, but it still exists", name)
+ }
+ }
+ })
+
+ t.Run("keeps non-image files", func(t *testing.T) {
+ tmpDir := t.TempDir()
+ title := "TestComic"
+ comicDir := filepath.Join(tmpDir, title)
+ os.MkdirAll(comicDir, os.ModePerm)
+
+ os.WriteFile(filepath.Join(comicDir, "TestComic.cbz"), []byte("archive"), 0644)
+ os.WriteFile(filepath.Join(comicDir, "metadata.json"), []byte("data"), 0644)
+
+ c := &Comic{
+ Title: title,
+ LibraryPath: tmpDir,
+ }
+
+ err := c.Cleanup()
+ if err != nil {
+ t.Fatalf("Cleanup() unexpected error: %v", err)
+ }
+
+ for _, name := range []string{"TestComic.cbz", "metadata.json"} {
+ path := filepath.Join(comicDir, name)
+ if _, err := os.Stat(path); os.IsNotExist(err) {
+ t.Errorf("expected non-image file %s to be kept", name)
+ }
+ }
+ })
+
+ t.Run("handles empty directory", func(t *testing.T) {
+ tmpDir := t.TempDir()
+ title := "EmptyComic"
+ comicDir := filepath.Join(tmpDir, title)
+ os.MkdirAll(comicDir, os.ModePerm)
+
+ c := &Comic{
+ Title: title,
+ LibraryPath: tmpDir,
+ }
+
+ err := c.Cleanup()
+ if err != nil {
+ t.Fatalf("Cleanup() unexpected error for empty dir: %v", err)
+ }
+ })
+}
diff --git a/comic/comic.go b/comic/comic.go
index 4a48c73..16fa617 100644
--- a/comic/comic.go
+++ b/comic/comic.go
@@ -26,21 +26,52 @@ type Comic struct {
// Returns the extracted title as a string.
func extractTitleFromMarkup(c Comic) string {
yearFormat := `^(.*?)\s+\(\d{4}(?:\s+.+)?\)`
- selection := c.Markup.Find("title")
-
- if selection.Length() == 0 {
- return "Untitled"
- }
-
- content := selection.First().Text()
regex := regexp.MustCompile(yearFormat)
- matches := regex.FindStringSubmatch(content)
- if len(matches) != 2 {
- return "Untitled"
+ extractFrom := func(text string) string {
+ matches := regex.FindStringSubmatch(text)
+ if len(matches) != 2 {
+ return ""
+ }
+ return strings.ReplaceAll(matches[1], ":", "")
}
- return strings.ReplaceAll(matches[1], ":", "")
+ title := extractFrom(c.Markup.Find("title").First().Text())
+
+ if strings.HasPrefix(title, "#") {
+ if h1 := extractFrom(c.Markup.Find("h1").First().Text()); h1 != "" && !strings.HasPrefix(h1, "#") {
+ return h1
+ }
+ if slug := titleFromSlug(c.URL); slug != "" {
+ return slug
+ }
+ }
+
+ if title != "" {
+ return title
+ }
+
+ return "Untitled"
+}
+
+// titleFromSlug derives a comic title from the last path segment of a URL.
+// It strips a trailing year (-YYYY), replaces hyphens with spaces, and title-cases the result.
+func titleFromSlug(url string) string {
+ slug := strings.TrimRight(url, "/")
+ if i := strings.LastIndex(slug, "/"); i >= 0 {
+ slug = slug[i+1:]
+ }
+ slug = regexp.MustCompile(`-\d{4}$`).ReplaceAllString(slug, "")
+ if slug == "" {
+ return ""
+ }
+ words := strings.Split(slug, "-")
+ for i, w := range words {
+ if len(w) > 0 {
+ words[i] = strings.ToUpper(w[:1]) + w[1:]
+ }
+ }
+ return strings.Join(words, " ")
}
// NewComic creates a new Comic instance from the provided URL and library path.
@@ -61,13 +92,21 @@ func NewComic(
LibraryPath: libraryPath,
}
- go Markup(c.URL, markupChannel)
+ if strings.Contains(url, "batcave.biz") {
+ go BatcaveBizMarkup(url, markupChannel)
+ } else {
+ go Markup(url, markupChannel)
+ }
markup := <-markupChannel
c.Markup = markup
c.Title = extractTitleFromMarkup(*c)
- go ParseImageLinks(markup, imageChannel)
+ if strings.Contains(url, "batcave.biz") {
+ go ParseBatcaveBizImageLinks(markup, imageChannel)
+ } else {
+ go ParseImageLinks(markup, imageChannel)
+ }
links := <-imageChannel
c.Filelist = links
diff --git a/comic/comic_test.go b/comic/comic_test.go
new file mode 100644
index 0000000..56f2318
--- /dev/null
+++ b/comic/comic_test.go
@@ -0,0 +1,170 @@
+package comic
+
+import (
+ "strings"
+ "testing"
+
+ "github.com/PuerkitoBio/goquery"
+)
+
+func newDocFromHTML(html string) *goquery.Document {
+ doc, _ := goquery.NewDocumentFromReader(strings.NewReader(html))
+ return doc
+}
+
+func TestExtractTitleFromMarkup(t *testing.T) {
+ tests := []struct {
+ name string
+ html string
+ url string
+ expected string
+ }{
+ {
+ name: "standard title with year",
+ html: `
Ultraman X Avengers 001 (2024)`,
+ expected: "Ultraman X Avengers 001",
+ },
+ {
+ name: "title with year and extra text",
+ html: `Batman 042 (2023 Digital)`,
+ expected: "Batman 042",
+ },
+ {
+ name: "title with colon removed",
+ html: `Spider-Man: No Way Home 001 (2022)`,
+ expected: "Spider-Man No Way Home 001",
+ },
+ {
+ name: "no title tag",
+ html: ``,
+ expected: "Untitled",
+ },
+ {
+ name: "title without year pattern",
+ html: `Some Random Page`,
+ expected: "Untitled",
+ },
+ {
+ name: "empty title",
+ html: ``,
+ expected: "Untitled",
+ },
+ {
+ name: "title starts with # falls back to h1",
+ html: `#018 (2026)Absolute Batman #018 (2026)
`,
+ expected: "Absolute Batman #018",
+ },
+ {
+ name: "title starts with # but h1 also starts with #, falls back to slug",
+ html: `#018 (2026)#018 (2026)
`,
+ url: "https://readallcomics.com/absolute-batman-018-2026/",
+ expected: "Absolute Batman 018",
+ },
+ {
+ name: "title starts with # falls back to slug when no h1",
+ html: `#018 (2026)`,
+ url: "https://readallcomics.com/absolute-batman-018-2026/",
+ expected: "Absolute Batman 018",
+ },
+ {
+ name: "title starts with # no h1 no url",
+ html: `#018 (2026)`,
+ expected: "#018",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ doc := newDocFromHTML(tt.html)
+ c := Comic{Markup: doc, URL: tt.url}
+ result := extractTitleFromMarkup(c)
+ if result != tt.expected {
+ t.Errorf("extractTitleFromMarkup() = %q, want %q", result, tt.expected)
+ }
+ })
+ }
+}
+
+func TestTitleFromSlug(t *testing.T) {
+ tests := []struct {
+ name string
+ url string
+ expected string
+ }{
+ {
+ name: "standard comic URL",
+ url: "https://readallcomics.com/absolute-batman-018-2026/",
+ expected: "Absolute Batman 018",
+ },
+ {
+ name: "no trailing slash",
+ url: "https://readallcomics.com/absolute-batman-018-2026",
+ expected: "Absolute Batman 018",
+ },
+ {
+ name: "no year in slug",
+ url: "https://readallcomics.com/absolute-batman-018/",
+ expected: "Absolute Batman 018",
+ },
+ {
+ name: "single word slug",
+ url: "https://readallcomics.com/batman/",
+ expected: "Batman",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ result := titleFromSlug(tt.url)
+ if result != tt.expected {
+ t.Errorf("titleFromSlug() = %q, want %q", result, tt.expected)
+ }
+ })
+ }
+}
+
+func TestCover(t *testing.T) {
+ tests := []struct {
+ name string
+ filelist []string
+ wantSuffix string
+ expectErr bool
+ }{
+ {
+ name: "finds cover ending in 001.jpg",
+ filelist: []string{"https://example.com/image-002.jpg", "https://example.com/image-001.jpg", "https://example.com/image-003.jpg"},
+ wantSuffix: "image-001.jpg",
+ },
+ {
+ name: "finds cover ending in 000.jpg",
+ filelist: []string{"https://example.com/image-000.jpg", "https://example.com/image-001.jpg"},
+ wantSuffix: "image-000.jpg",
+ },
+ {
+ name: "returns error when no cover found",
+ filelist: []string{"https://example.com/image-002.jpg", "https://example.com/image-003.jpg"},
+ expectErr: true,
+ },
+ {
+ name: "returns error for empty filelist",
+ filelist: []string{},
+ expectErr: true,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ c := &Comic{Filelist: tt.filelist}
+ cover, err := c.Cover()
+ if tt.expectErr && err == nil {
+ t.Error("Cover() expected error, got nil")
+ }
+ if !tt.expectErr && err != nil {
+ t.Errorf("Cover() unexpected error: %v", err)
+ }
+ if tt.wantSuffix != "" && !strings.HasSuffix(cover, tt.wantSuffix) {
+ t.Errorf("Cover() = %q, want path ending in %q", cover, tt.wantSuffix)
+ }
+ })
+ }
+}
diff --git a/comic/download_test.go b/comic/download_test.go
new file mode 100644
index 0000000..690ad68
--- /dev/null
+++ b/comic/download_test.go
@@ -0,0 +1,145 @@
+package comic
+
+import (
+ "net/http"
+ "net/http/httptest"
+ "os"
+ "path/filepath"
+ "testing"
+)
+
+func TestComicDownloadError(t *testing.T) {
+ err := ComicDownloadError{Message: "download failed", Code: 1}
+ if err.Error() != "download failed" {
+ t.Errorf("Error() = %q, want %q", err.Error(), "download failed")
+ }
+}
+
+func TestHandleRequest(t *testing.T) {
+ t.Run("successful request", func(t *testing.T) {
+ server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ if r.Header.Get("User-Agent") == "" {
+ t.Error("expected User-Agent header to be set")
+ }
+ w.WriteHeader(http.StatusOK)
+ w.Write([]byte("image data"))
+ }))
+ defer server.Close()
+
+ resp, err := handleRequest(server.URL)
+ if err != nil {
+ t.Fatalf("handleRequest() unexpected error: %v", err)
+ }
+ defer resp.Body.Close()
+
+ if resp.StatusCode != http.StatusOK {
+ t.Errorf("handleRequest() status = %d, want %d", resp.StatusCode, http.StatusOK)
+ }
+ })
+
+ t.Run("non-200 response", func(t *testing.T) {
+ server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ w.WriteHeader(http.StatusNotFound)
+ }))
+ defer server.Close()
+
+ _, err := handleRequest(server.URL)
+ if err == nil {
+ t.Error("handleRequest() expected error for 404 response, got nil")
+ }
+ })
+
+ t.Run("invalid URL", func(t *testing.T) {
+ _, err := handleRequest("http://invalid.localhost:0/bad")
+ if err == nil {
+ t.Error("handleRequest() expected error for invalid URL, got nil")
+ }
+ })
+}
+
+func TestDownloadFile(t *testing.T) {
+ t.Run("successful download", func(t *testing.T) {
+ server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ w.WriteHeader(http.StatusOK)
+ w.Write([]byte("fake image content"))
+ }))
+ defer server.Close()
+
+ tmpDir := t.TempDir()
+ c := &Comic{
+ Title: "TestComic",
+ LibraryPath: tmpDir,
+ }
+
+ err := downloadFile(server.URL+"/image.jpg", 1, c)
+ if err != nil {
+ t.Fatalf("downloadFile() unexpected error: %v", err)
+ }
+
+ expectedPath := filepath.Join(tmpDir, "TestComic", "TestComic 001.jpg")
+ if _, err := os.Stat(expectedPath); os.IsNotExist(err) {
+ t.Errorf("expected file %s to exist", expectedPath)
+ }
+ })
+
+ t.Run("formats page number with leading zeros", func(t *testing.T) {
+ server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ w.WriteHeader(http.StatusOK)
+ w.Write([]byte("fake image content"))
+ }))
+ defer server.Close()
+
+ tmpDir := t.TempDir()
+ c := &Comic{
+ Title: "TestComic",
+ LibraryPath: tmpDir,
+ }
+
+ err := downloadFile(server.URL+"/image.jpg", 42, c)
+ if err != nil {
+ t.Fatalf("downloadFile() unexpected error: %v", err)
+ }
+
+ expectedPath := filepath.Join(tmpDir, "TestComic", "TestComic 042.jpg")
+ if _, err := os.Stat(expectedPath); os.IsNotExist(err) {
+ t.Errorf("expected file %s to exist", expectedPath)
+ }
+ })
+
+ t.Run("server error returns error", func(t *testing.T) {
+ server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ w.WriteHeader(http.StatusInternalServerError)
+ }))
+ defer server.Close()
+
+ tmpDir := t.TempDir()
+ c := &Comic{
+ Title: "TestComic",
+ LibraryPath: tmpDir,
+ }
+
+ err := downloadFile(server.URL+"/image.jpg", 1, c)
+ if err == nil {
+ t.Error("downloadFile() expected error for server error, got nil")
+ }
+ })
+
+ t.Run("empty response body returns error", func(t *testing.T) {
+ server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ w.WriteHeader(http.StatusOK)
+ // write nothing
+ }))
+ defer server.Close()
+
+ tmpDir := t.TempDir()
+ c := &Comic{
+ Title: "TestComic",
+ LibraryPath: tmpDir,
+ }
+
+ err := downloadFile(server.URL+"/image.jpg", 1, c)
+ if err == nil {
+ t.Error("downloadFile() expected error for empty body, got nil")
+ }
+ })
+}
diff --git a/comic/parser.go b/comic/parser.go
index da8429b..32b1058 100644
--- a/comic/parser.go
+++ b/comic/parser.go
@@ -3,6 +3,9 @@ package comic
import (
"io"
"net/http"
+ "net/http/cookiejar"
+ "net/url"
+ "regexp"
"strings"
"github.com/PuerkitoBio/goquery"
@@ -47,6 +50,143 @@ func Markup(url string, c chan *goquery.Document) *goquery.Document {
return markup
}
+func BatcaveBizMarkup(referer string, c chan *goquery.Document) *goquery.Document {
+ jar, _ := cookiejar.New(nil)
+ client := &http.Client{
+ Jar: jar,
+ CheckRedirect: func(req *http.Request, via []*http.Request) error {
+ return nil
+ },
+ }
+
+ headers := map[string]string{
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
+ "Accept-Language": "en-US,en;q=0.9",
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
+ }
+
+ // GET the challange page to obtain cookies and any necessary tokens
+ req, err := http.NewRequest("GET", referer, nil)
+ if err != nil {
+ if c != nil {
+ c <- &goquery.Document{}
+ }
+ return &goquery.Document{}
+ }
+ for k, v := range headers {
+ req.Header.Set(k, v)
+ }
+
+ res, err := client.Do(req)
+ if err != nil {
+ if c != nil {
+ c <- &goquery.Document{}
+ }
+ return &goquery.Document{}
+ }
+ defer res.Body.Close()
+
+ body, err := io.ReadAll(res.Body)
+ if err != nil {
+ if c != nil {
+ c <- &goquery.Document{}
+ }
+ return &goquery.Document{}
+ }
+
+ tokenRegex := regexp.MustCompile(`token:\s*"([^"]+)"`)
+ matches := tokenRegex.FindSubmatch(body)
+
+ if matches == nil {
+ // no challenge, parse directly
+ doc, err := goquery.NewDocumentFromReader(strings.NewReader(string(body)))
+ if err != nil {
+ if c != nil {
+ c <- &goquery.Document{}
+ }
+ return &goquery.Document{}
+ }
+ if c != nil {
+ c <- doc
+ }
+ return doc
+ }
+
+ encodedToken := string(matches[1])
+ token, err := url.QueryUnescape(encodedToken)
+ if err != nil {
+ token = encodedToken
+ }
+
+ // Step 3: POST to /_v with fake browser metrics
+ params := url.Values{}
+ params.Set("token", token)
+ params.Set("mode", "modern")
+ params.Set("workTime", "462")
+ params.Set("iterations", "183")
+ params.Set("webdriver", "0")
+ params.Set("touch", "0")
+ params.Set("screen_w", "1920")
+ params.Set("screen_h", "1080")
+ params.Set("screen_cd", "24")
+
+ postReq, err := http.NewRequest("POST", "https://batcave.biz/_v", strings.NewReader(params.Encode()))
+ if err != nil {
+ if c != nil {
+ c <- &goquery.Document{}
+ }
+ return &goquery.Document{}
+ }
+ for k, v := range headers {
+ postReq.Header.Set(k, v)
+ }
+ postReq.Header.Set("Content-Type", "application/x-www-form-urlencoded")
+ postReq.Header.Set("Referer", referer)
+
+ postRes, err := client.Do(postReq)
+ if err != nil {
+ if c != nil {
+ c <- &goquery.Document{}
+ }
+ return &goquery.Document{}
+ }
+ defer postRes.Body.Close()
+ io.ReadAll(postRes.Body)
+
+ // GET the real page with the set cookie
+ realReq, err := http.NewRequest("GET", referer, nil)
+ if err != nil {
+ if c != nil {
+ c <- &goquery.Document{}
+ }
+ return &goquery.Document{}
+ }
+ for k, v := range headers {
+ realReq.Header.Set(k, v)
+ }
+
+ realRes, err := client.Do(realReq)
+ if err != nil {
+ if c != nil {
+ c <- &goquery.Document{}
+ }
+ return &goquery.Document{}
+ }
+ defer realRes.Body.Close()
+
+ doc, err := goquery.NewDocumentFromReader(realRes.Body)
+ if err != nil {
+ if c != nil {
+ c <- &goquery.Document{}
+ }
+ return &goquery.Document{}
+ }
+ if c != nil {
+ c <- doc
+ }
+ return doc
+}
+
// ParseImageLinks parses a goquery document to extract image links.
//
// markup is the goquery document to parse for image links.
@@ -69,3 +209,55 @@ func ParseImageLinks(markup *goquery.Document, c chan []string) ([]string, error
return links, ImageParseError{Message: "No images found", Code: 1}
}
+
+func ParseReadAllComicsLinks(markup *goquery.Document, c chan []string) ([]string, error) {
+ var links []string
+ markup.Find("img").Each(func(_ int, image *goquery.Selection) {
+ link, _ := image.Attr("src")
+ if !strings.Contains(link, "logo") && (strings.Contains(link, "bp.blogspot.com") || strings.Contains(link, "blogger.googleusercontent") || strings.Contains(link, "covers")) {
+ links = append(links, link)
+ }
+ })
+
+ c <- links
+
+ if len(links) > 0 {
+ return links, nil
+ }
+
+ return links, ImageParseError{Message: "No images found", Code: 1}
+}
+
+// ParseBatcaveBizImageLinks extracts image URLs from the __DATA__.images JavaScript
+// variable embedded in a batcave.biz page.
+func ParseBatcaveBizImageLinks(markup *goquery.Document, c chan []string) ([]string, error) {
+ var links []string
+
+ markup.Find("script").Each(func(_ int, s *goquery.Selection) {
+ text := s.Text()
+ if !strings.Contains(text, "__DATA__") {
+ return
+ }
+
+ arrayRegex := regexp.MustCompile(`"images"\s*:\s*\[([^\]]+)\]`)
+ arrayMatch := arrayRegex.FindStringSubmatch(text)
+ if len(arrayMatch) < 2 {
+ return
+ }
+
+ urlRegex := regexp.MustCompile(`"([^"]+)"`)
+ for _, m := range urlRegex.FindAllStringSubmatch(arrayMatch[1], -1) {
+ if len(m) >= 2 {
+ links = append(links, m[1])
+ }
+ }
+ })
+
+ c <- links
+
+ if len(links) > 0 {
+ return links, nil
+ }
+
+ return links, ImageParseError{Message: "No images found", Code: 1}
+}
diff --git a/comic/parser_test.go b/comic/parser_test.go
new file mode 100644
index 0000000..9502639
--- /dev/null
+++ b/comic/parser_test.go
@@ -0,0 +1,183 @@
+package comic
+
+import (
+ "strings"
+ "testing"
+
+ "github.com/PuerkitoBio/goquery"
+)
+
+func TestParseBatcaveBizImageLinks(t *testing.T) {
+ tests := []struct {
+ name string
+ html string
+ expectCount int
+ expectErr bool
+ expectURLs []string
+ }{
+ {
+ name: "extracts images from __DATA__",
+ html: ``,
+ expectCount: 2,
+ expectErr: false,
+ expectURLs: []string{"https://cdn.batcave.biz/img/001.jpg", "https://cdn.batcave.biz/img/002.jpg"},
+ },
+ {
+ name: "extracts images with spaces around colon and bracket",
+ html: ``,
+ expectCount: 1,
+ expectErr: false,
+ expectURLs: []string{"https://cdn.batcave.biz/img/001.jpg"},
+ },
+ {
+ name: "no __DATA__ script",
+ html: ``,
+ expectCount: 0,
+ expectErr: true,
+ },
+ {
+ name: "__DATA__ present but no images key",
+ html: ``,
+ expectCount: 0,
+ expectErr: true,
+ },
+ {
+ name: "no script tags",
+ html: `nothing here
`,
+ expectCount: 0,
+ expectErr: true,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ doc, _ := goquery.NewDocumentFromReader(strings.NewReader(tt.html))
+ ch := make(chan []string, 1)
+
+ links, err := ParseBatcaveBizImageLinks(doc, ch)
+
+ if tt.expectErr && err == nil {
+ t.Error("ParseBatcaveBizImageLinks() expected error, got nil")
+ }
+ if !tt.expectErr && err != nil {
+ t.Errorf("ParseBatcaveBizImageLinks() unexpected error: %v", err)
+ }
+ if len(links) != tt.expectCount {
+ t.Errorf("ParseBatcaveBizImageLinks() returned %d links, want %d", len(links), tt.expectCount)
+ }
+ for i, expected := range tt.expectURLs {
+ if i >= len(links) {
+ t.Errorf("missing link at index %d: want %q", i, expected)
+ continue
+ }
+ if links[i] != expected {
+ t.Errorf("links[%d] = %q, want %q", i, links[i], expected)
+ }
+ }
+
+ channelLinks := <-ch
+ if len(channelLinks) != tt.expectCount {
+ t.Errorf("channel received %d links, want %d", len(channelLinks), tt.expectCount)
+ }
+ })
+ }
+}
+
+func TestImageParseError(t *testing.T) {
+ err := ImageParseError{Message: "test error", Code: 1}
+ if err.Error() != "test error" {
+ t.Errorf("Error() = %q, want %q", err.Error(), "test error")
+ }
+}
+
+func TestParseImageLinks(t *testing.T) {
+ tests := []struct {
+ name string
+ html string
+ expectCount int
+ expectErr bool
+ }{
+ {
+ name: "extracts blogspot images",
+ html: `
+
+
+ `,
+ expectCount: 2,
+ expectErr: false,
+ },
+ {
+ name: "extracts blogger googleusercontent images",
+ html: `
+
+ `,
+ expectCount: 1,
+ expectErr: false,
+ },
+ {
+ name: "extracts covers images",
+ html: `
+
+ `,
+ expectCount: 1,
+ expectErr: false,
+ },
+ {
+ name: "excludes logo images",
+ html: `
+
+
+ `,
+ expectCount: 1,
+ expectErr: false,
+ },
+ {
+ name: "excludes non-matching images",
+ html: `
+
+
+ `,
+ expectCount: 0,
+ expectErr: true,
+ },
+ {
+ name: "no images at all",
+ html: `No images here
`,
+ expectCount: 0,
+ expectErr: true,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ doc, _ := goquery.NewDocumentFromReader(strings.NewReader(tt.html))
+ ch := make(chan []string, 1)
+
+ links, err := ParseImageLinks(doc, ch)
+
+ if tt.expectErr && err == nil {
+ t.Error("ParseImageLinks() expected error, got nil")
+ }
+ if !tt.expectErr && err != nil {
+ t.Errorf("ParseImageLinks() unexpected error: %v", err)
+ }
+ if len(links) != tt.expectCount {
+ t.Errorf("ParseImageLinks() returned %d links, want %d", len(links), tt.expectCount)
+ }
+
+ // Verify the channel also received the links
+ channelLinks := <-ch
+ if len(channelLinks) != tt.expectCount {
+ t.Errorf("channel received %d links, want %d", len(channelLinks), tt.expectCount)
+ }
+ })
+ }
+}
diff --git a/go.mod b/go.mod
index 0d3d77b..a078973 100644
--- a/go.mod
+++ b/go.mod
@@ -5,6 +5,7 @@ go 1.22.3
require (
github.com/DaRealFreak/cloudflare-bp-go v1.0.4
github.com/PuerkitoBio/goquery v1.9.2
+ github.com/andybalholm/brotli v1.2.0
github.com/spf13/cobra v1.8.1
)
diff --git a/go.sum b/go.sum
index 9bd373d..7c1d02f 100644
--- a/go.sum
+++ b/go.sum
@@ -4,6 +4,8 @@ github.com/EDDYCJY/fake-useragent v0.2.0 h1:Jcnkk2bgXmDpX0z+ELlUErTkoLb/mxFBNd2Y
github.com/EDDYCJY/fake-useragent v0.2.0/go.mod h1:5wn3zzlDxhKW6NYknushqinPcAqZcAPHy8lLczCdJdc=
github.com/PuerkitoBio/goquery v1.9.2 h1:4/wZksC3KgkQw7SQgkKotmKljk0M6V8TUvA8Wb4yPeE=
github.com/PuerkitoBio/goquery v1.9.2/go.mod h1:GHPCaP0ODyyxqcNoFGYlAprUFH81NuRPd0GX3Zu2Mvk=
+github.com/andybalholm/brotli v1.2.0 h1:ukwgCxwYrmACq68yiUqwIWnGY0cTPox/M94sVwToPjQ=
+github.com/andybalholm/brotli v1.2.0/go.mod h1:rzTDkvFWvIrjDXZHkuS16NPggd91W3kUSvPlQ1pLaKY=
github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsViSLyss=
github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU=
github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
@@ -20,6 +22,8 @@ github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
+github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU=
+github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=