Merge pull request 'fix: extract title from h1 or URL slug when page title starts with #' (#5 ) from feat/title-h1-fallback into main

Reviewed-on: #5
fix: extract title from h1 or URL slug when page title starts with #
2026-03-11 22:16:25 +00:00 · 2026-03-11 18:13:14 -04:00
9 changed files with 950 additions and 13 deletions
--- a/comic/archive_test.go
+++ b/comic/archive_test.go
@@ -0,0 +1,110 @@
+package comic
+
+import (
+	"archive/zip"
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+func TestArchiveError(t *testing.T) {
+	err := ArchiveError{Message: "archive failed", Code: 1}
+	if err.Error() != "archive failed" {
+		t.Errorf("Error() = %q, want %q", err.Error(), "archive failed")
+	}
+}
+
+func TestArchive(t *testing.T) {
+	t.Run("creates cbz with image files", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		title := "TestComic"
+		comicDir := filepath.Join(tmpDir, title)
+		os.MkdirAll(comicDir, os.ModePerm)
+
+		// Create fake image files
+		for _, name := range []string{"TestComic 001.jpg", "TestComic 002.jpg", "TestComic 003.png"} {
+			os.WriteFile(filepath.Join(comicDir, name), []byte("fake image"), 0644)
+		}
+
+		c := &Comic{
+			Title:       title,
+			LibraryPath: tmpDir,
+		}
+
+		err := c.Archive()
+		if err != nil {
+			t.Fatalf("Archive() unexpected error: %v", err)
+		}
+
+		archivePath := filepath.Join(comicDir, title+".cbz")
+		if _, err := os.Stat(archivePath); os.IsNotExist(err) {
+			t.Fatalf("expected archive %s to exist", archivePath)
+		}
+
+		// Verify the zip contains the image files
+		reader, err := zip.OpenReader(archivePath)
+		if err != nil {
+			t.Fatalf("failed to open archive: %v", err)
+		}
+		defer reader.Close()
+
+		if len(reader.File) != 3 {
+			t.Errorf("archive contains %d files, want 3", len(reader.File))
+		}
+	})
+
+	t.Run("excludes non-image files from archive", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		title := "TestComic"
+		comicDir := filepath.Join(tmpDir, title)
+		os.MkdirAll(comicDir, os.ModePerm)
+
+		// Create mixed files
+		os.WriteFile(filepath.Join(comicDir, "page-001.jpg"), []byte("image"), 0644)
+		os.WriteFile(filepath.Join(comicDir, "readme.txt"), []byte("text"), 0644)
+		os.WriteFile(filepath.Join(comicDir, "data.json"), []byte("json"), 0644)
+
+		c := &Comic{
+			Title:       title,
+			LibraryPath: tmpDir,
+		}
+
+		err := c.Archive()
+		if err != nil {
+			t.Fatalf("Archive() unexpected error: %v", err)
+		}
+
+		archivePath := filepath.Join(comicDir, title+".cbz")
+		reader, err := zip.OpenReader(archivePath)
+		if err != nil {
+			t.Fatalf("failed to open archive: %v", err)
+		}
+		defer reader.Close()
+
+		if len(reader.File) != 1 {
+			t.Errorf("archive contains %d files, want 1 (only .jpg)", len(reader.File))
+		}
+	})
+
+	t.Run("handles empty directory", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		title := "EmptyComic"
+		comicDir := filepath.Join(tmpDir, title)
+		os.MkdirAll(comicDir, os.ModePerm)
+
+		c := &Comic{
+			Title:       title,
+			LibraryPath: tmpDir,
+		}
+
+		err := c.Archive()
+		if err != nil {
+			t.Fatalf("Archive() unexpected error: %v", err)
+		}
+
+		archivePath := filepath.Join(comicDir, title+".cbz")
+		if _, err := os.Stat(archivePath); os.IsNotExist(err) {
+			t.Fatalf("expected archive %s to exist even if empty", archivePath)
+		}
+	})
+}
--- a/comic/cleanup_test.go
+++ b/comic/cleanup_test.go
@@ -0,0 +1,93 @@
+package comic
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+func TestCleanup(t *testing.T) {
+	t.Run("keeps cover image 001 and removes others", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		title := "TestComic"
+		comicDir := filepath.Join(tmpDir, title)
+		os.MkdirAll(comicDir, os.ModePerm)
+
+		files := map[string]bool{
+			"TestComic 001.jpg": true,  // should be kept
+			"TestComic 002.jpg": false, // should be removed
+			"TestComic 003.jpg": false, // should be removed
+		}
+
+		for name := range files {
+			os.WriteFile(filepath.Join(comicDir, name), []byte("fake"), 0644)
+		}
+
+		c := &Comic{
+			Title:       title,
+			LibraryPath: tmpDir,
+		}
+
+		err := c.Cleanup()
+		if err != nil {
+			t.Fatalf("Cleanup() unexpected error: %v", err)
+		}
+
+		for name, shouldExist := range files {
+			path := filepath.Join(comicDir, name)
+			_, err := os.Stat(path)
+			exists := !os.IsNotExist(err)
+
+			if shouldExist && !exists {
+				t.Errorf("expected %s to be kept, but it was removed", name)
+			}
+			if !shouldExist && exists {
+				t.Errorf("expected %s to be removed, but it still exists", name)
+			}
+		}
+	})
+
+	t.Run("keeps non-image files", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		title := "TestComic"
+		comicDir := filepath.Join(tmpDir, title)
+		os.MkdirAll(comicDir, os.ModePerm)
+
+		os.WriteFile(filepath.Join(comicDir, "TestComic.cbz"), []byte("archive"), 0644)
+		os.WriteFile(filepath.Join(comicDir, "metadata.json"), []byte("data"), 0644)
+
+		c := &Comic{
+			Title:       title,
+			LibraryPath: tmpDir,
+		}
+
+		err := c.Cleanup()
+		if err != nil {
+			t.Fatalf("Cleanup() unexpected error: %v", err)
+		}
+
+		for _, name := range []string{"TestComic.cbz", "metadata.json"} {
+			path := filepath.Join(comicDir, name)
+			if _, err := os.Stat(path); os.IsNotExist(err) {
+				t.Errorf("expected non-image file %s to be kept", name)
+			}
+		}
+	})
+
+	t.Run("handles empty directory", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		title := "EmptyComic"
+		comicDir := filepath.Join(tmpDir, title)
+		os.MkdirAll(comicDir, os.ModePerm)
+
+		c := &Comic{
+			Title:       title,
+			LibraryPath: tmpDir,
+		}
+
+		err := c.Cleanup()
+		if err != nil {
+			t.Fatalf("Cleanup() unexpected error for empty dir: %v", err)
+		}
+	})
+}
--- a/comic/comic.go
+++ b/comic/comic.go
@@ -26,21 +26,52 @@ type Comic struct {
 // Returns the extracted title as a string.
 func extractTitleFromMarkup(c Comic) string {
 	yearFormat := `^(.*?)\s+\(\d{4}(?:\s+.+)?\)`
-	selection := c.Markup.Find("title")
-
-	if selection.Length() == 0 {
-		return "Untitled"
-	}
-
-	content := selection.First().Text()
 	regex := regexp.MustCompile(yearFormat)
-	matches := regex.FindStringSubmatch(content)

-	if len(matches) != 2 {
-		return "Untitled"
+	extractFrom := func(text string) string {
+		matches := regex.FindStringSubmatch(text)
+		if len(matches) != 2 {
+			return ""
+		}
+		return strings.ReplaceAll(matches[1], ":", "")
 	}

-	return strings.ReplaceAll(matches[1], ":", "")
+	title := extractFrom(c.Markup.Find("title").First().Text())
+
+	if strings.HasPrefix(title, "#") {
+		if h1 := extractFrom(c.Markup.Find("h1").First().Text()); h1 != "" && !strings.HasPrefix(h1, "#") {
+			return h1
+		}
+		if slug := titleFromSlug(c.URL); slug != "" {
+			return slug
+		}
+	}
+
+	if title != "" {
+		return title
+	}
+
+	return "Untitled"
+}
+
+// titleFromSlug derives a comic title from the last path segment of a URL.
+// It strips a trailing year (-YYYY), replaces hyphens with spaces, and title-cases the result.
+func titleFromSlug(url string) string {
+	slug := strings.TrimRight(url, "/")
+	if i := strings.LastIndex(slug, "/"); i >= 0 {
+		slug = slug[i+1:]
+	}
+	slug = regexp.MustCompile(`-\d{4}$`).ReplaceAllString(slug, "")
+	if slug == "" {
+		return ""
+	}
+	words := strings.Split(slug, "-")
+	for i, w := range words {
+		if len(w) > 0 {
+			words[i] = strings.ToUpper(w[:1]) + w[1:]
+		}
+	}
+	return strings.Join(words, " ")
 }

 // NewComic creates a new Comic instance from the provided URL and library path.
@@ -61,13 +92,21 @@ func NewComic(
 		LibraryPath: libraryPath,
 	}

-	go Markup(c.URL, markupChannel)
+	if strings.Contains(url, "batcave.biz") {
+		go BatcaveBizMarkup(url, markupChannel)
+	} else {
+		go Markup(url, markupChannel)
+	}

 	markup := <-markupChannel
 	c.Markup = markup
 	c.Title = extractTitleFromMarkup(*c)

-	go ParseImageLinks(markup, imageChannel)
+	if strings.Contains(url, "batcave.biz") {
+		go ParseBatcaveBizImageLinks(markup, imageChannel)
+	} else {
+		go ParseImageLinks(markup, imageChannel)
+	}
 	links := <-imageChannel

 	c.Filelist = links
--- a/comic/comic_test.go
+++ b/comic/comic_test.go
@@ -0,0 +1,170 @@
+package comic
+
+import (
+	"strings"
+	"testing"
+
+	"github.com/PuerkitoBio/goquery"
+)
+
+func newDocFromHTML(html string) *goquery.Document {
+	doc, _ := goquery.NewDocumentFromReader(strings.NewReader(html))
+	return doc
+}
+
+func TestExtractTitleFromMarkup(t *testing.T) {
+	tests := []struct {
+		name     string
+		html     string
+		url      string
+		expected string
+	}{
+		{
+			name:     "standard title with year",
+			html:     `<html><head><title>Ultraman X Avengers 001 (2024)</title></head></html>`,
+			expected: "Ultraman X Avengers 001",
+		},
+		{
+			name:     "title with year and extra text",
+			html:     `<html><head><title>Batman 042 (2023 Digital)</title></head></html>`,
+			expected: "Batman 042",
+		},
+		{
+			name:     "title with colon removed",
+			html:     `<html><head><title>Spider-Man: No Way Home 001 (2022)</title></head></html>`,
+			expected: "Spider-Man No Way Home 001",
+		},
+		{
+			name:     "no title tag",
+			html:     `<html><head></head></html>`,
+			expected: "Untitled",
+		},
+		{
+			name:     "title without year pattern",
+			html:     `<html><head><title>Some Random Page</title></head></html>`,
+			expected: "Untitled",
+		},
+		{
+			name:     "empty title",
+			html:     `<html><head><title></title></head></html>`,
+			expected: "Untitled",
+		},
+		{
+			name:     "title starts with # falls back to h1",
+			html:     `<html><head><title>#018 (2026)</title></head><body><h1>Absolute Batman #018 (2026)</h1></body></html>`,
+			expected: "Absolute Batman #018",
+		},
+		{
+			name:     "title starts with # but h1 also starts with #, falls back to slug",
+			html:     `<html><head><title>#018 (2026)</title></head><body><h1>#018 (2026)</h1></body></html>`,
+			url:      "https://readallcomics.com/absolute-batman-018-2026/",
+			expected: "Absolute Batman 018",
+		},
+		{
+			name:     "title starts with # falls back to slug when no h1",
+			html:     `<html><head><title>#018 (2026)</title></head></html>`,
+			url:      "https://readallcomics.com/absolute-batman-018-2026/",
+			expected: "Absolute Batman 018",
+		},
+		{
+			name:     "title starts with # no h1 no url",
+			html:     `<html><head><title>#018 (2026)</title></head></html>`,
+			expected: "#018",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			doc := newDocFromHTML(tt.html)
+			c := Comic{Markup: doc, URL: tt.url}
+			result := extractTitleFromMarkup(c)
+			if result != tt.expected {
+				t.Errorf("extractTitleFromMarkup() = %q, want %q", result, tt.expected)
+			}
+		})
+	}
+}
+
+func TestTitleFromSlug(t *testing.T) {
+	tests := []struct {
+		name     string
+		url      string
+		expected string
+	}{
+		{
+			name:     "standard comic URL",
+			url:      "https://readallcomics.com/absolute-batman-018-2026/",
+			expected: "Absolute Batman 018",
+		},
+		{
+			name:     "no trailing slash",
+			url:      "https://readallcomics.com/absolute-batman-018-2026",
+			expected: "Absolute Batman 018",
+		},
+		{
+			name:     "no year in slug",
+			url:      "https://readallcomics.com/absolute-batman-018/",
+			expected: "Absolute Batman 018",
+		},
+		{
+			name:     "single word slug",
+			url:      "https://readallcomics.com/batman/",
+			expected: "Batman",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := titleFromSlug(tt.url)
+			if result != tt.expected {
+				t.Errorf("titleFromSlug() = %q, want %q", result, tt.expected)
+			}
+		})
+	}
+}
+
+func TestCover(t *testing.T) {
+	tests := []struct {
+		name        string
+		filelist    []string
+		wantSuffix  string
+		expectErr   bool
+	}{
+		{
+			name:       "finds cover ending in 001.jpg",
+			filelist:   []string{"https://example.com/image-002.jpg", "https://example.com/image-001.jpg", "https://example.com/image-003.jpg"},
+			wantSuffix: "image-001.jpg",
+		},
+		{
+			name:       "finds cover ending in 000.jpg",
+			filelist:   []string{"https://example.com/image-000.jpg", "https://example.com/image-001.jpg"},
+			wantSuffix: "image-000.jpg",
+		},
+		{
+			name:      "returns error when no cover found",
+			filelist:  []string{"https://example.com/image-002.jpg", "https://example.com/image-003.jpg"},
+			expectErr: true,
+		},
+		{
+			name:      "returns error for empty filelist",
+			filelist:  []string{},
+			expectErr: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			c := &Comic{Filelist: tt.filelist}
+			cover, err := c.Cover()
+			if tt.expectErr && err == nil {
+				t.Error("Cover() expected error, got nil")
+			}
+			if !tt.expectErr && err != nil {
+				t.Errorf("Cover() unexpected error: %v", err)
+			}
+			if tt.wantSuffix != "" && !strings.HasSuffix(cover, tt.wantSuffix) {
+				t.Errorf("Cover() = %q, want path ending in %q", cover, tt.wantSuffix)
+			}
+		})
+	}
+}
--- a/comic/download_test.go
+++ b/comic/download_test.go
@@ -0,0 +1,145 @@
+package comic
+
+import (
+	"net/http"
+	"net/http/httptest"
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+func TestComicDownloadError(t *testing.T) {
+	err := ComicDownloadError{Message: "download failed", Code: 1}
+	if err.Error() != "download failed" {
+		t.Errorf("Error() = %q, want %q", err.Error(), "download failed")
+	}
+}
+
+func TestHandleRequest(t *testing.T) {
+	t.Run("successful request", func(t *testing.T) {
+		server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			if r.Header.Get("User-Agent") == "" {
+				t.Error("expected User-Agent header to be set")
+			}
+			w.WriteHeader(http.StatusOK)
+			w.Write([]byte("image data"))
+		}))
+		defer server.Close()
+
+		resp, err := handleRequest(server.URL)
+		if err != nil {
+			t.Fatalf("handleRequest() unexpected error: %v", err)
+		}
+		defer resp.Body.Close()
+
+		if resp.StatusCode != http.StatusOK {
+			t.Errorf("handleRequest() status = %d, want %d", resp.StatusCode, http.StatusOK)
+		}
+	})
+
+	t.Run("non-200 response", func(t *testing.T) {
+		server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			w.WriteHeader(http.StatusNotFound)
+		}))
+		defer server.Close()
+
+		_, err := handleRequest(server.URL)
+		if err == nil {
+			t.Error("handleRequest() expected error for 404 response, got nil")
+		}
+	})
+
+	t.Run("invalid URL", func(t *testing.T) {
+		_, err := handleRequest("http://invalid.localhost:0/bad")
+		if err == nil {
+			t.Error("handleRequest() expected error for invalid URL, got nil")
+		}
+	})
+}
+
+func TestDownloadFile(t *testing.T) {
+	t.Run("successful download", func(t *testing.T) {
+		server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			w.WriteHeader(http.StatusOK)
+			w.Write([]byte("fake image content"))
+		}))
+		defer server.Close()
+
+		tmpDir := t.TempDir()
+		c := &Comic{
+			Title:       "TestComic",
+			LibraryPath: tmpDir,
+		}
+
+		err := downloadFile(server.URL+"/image.jpg", 1, c)
+		if err != nil {
+			t.Fatalf("downloadFile() unexpected error: %v", err)
+		}
+
+		expectedPath := filepath.Join(tmpDir, "TestComic", "TestComic 001.jpg")
+		if _, err := os.Stat(expectedPath); os.IsNotExist(err) {
+			t.Errorf("expected file %s to exist", expectedPath)
+		}
+	})
+
+	t.Run("formats page number with leading zeros", func(t *testing.T) {
+		server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			w.WriteHeader(http.StatusOK)
+			w.Write([]byte("fake image content"))
+		}))
+		defer server.Close()
+
+		tmpDir := t.TempDir()
+		c := &Comic{
+			Title:       "TestComic",
+			LibraryPath: tmpDir,
+		}
+
+		err := downloadFile(server.URL+"/image.jpg", 42, c)
+		if err != nil {
+			t.Fatalf("downloadFile() unexpected error: %v", err)
+		}
+
+		expectedPath := filepath.Join(tmpDir, "TestComic", "TestComic 042.jpg")
+		if _, err := os.Stat(expectedPath); os.IsNotExist(err) {
+			t.Errorf("expected file %s to exist", expectedPath)
+		}
+	})
+
+	t.Run("server error returns error", func(t *testing.T) {
+		server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			w.WriteHeader(http.StatusInternalServerError)
+		}))
+		defer server.Close()
+
+		tmpDir := t.TempDir()
+		c := &Comic{
+			Title:       "TestComic",
+			LibraryPath: tmpDir,
+		}
+
+		err := downloadFile(server.URL+"/image.jpg", 1, c)
+		if err == nil {
+			t.Error("downloadFile() expected error for server error, got nil")
+		}
+	})
+
+	t.Run("empty response body returns error", func(t *testing.T) {
+		server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			w.WriteHeader(http.StatusOK)
+			// write nothing
+		}))
+		defer server.Close()
+
+		tmpDir := t.TempDir()
+		c := &Comic{
+			Title:       "TestComic",
+			LibraryPath: tmpDir,
+		}
+
+		err := downloadFile(server.URL+"/image.jpg", 1, c)
+		if err == nil {
+			t.Error("downloadFile() expected error for empty body, got nil")
+		}
+	})
+}
--- a/comic/parser.go
+++ b/comic/parser.go
@@ -3,6 +3,9 @@ package comic
 import (
 	"io"
 	"net/http"
+	"net/http/cookiejar"
+	"net/url"
+	"regexp"
 	"strings"

 	"github.com/PuerkitoBio/goquery"
@@ -47,6 +50,143 @@ func Markup(url string, c chan *goquery.Document) *goquery.Document {
 	return markup
 }

+func BatcaveBizMarkup(referer string, c chan *goquery.Document) *goquery.Document {
+	jar, _ := cookiejar.New(nil)
+	client := &http.Client{
+		Jar: jar,
+		CheckRedirect: func(req *http.Request, via []*http.Request) error {
+			return nil
+		},
+	}
+
+	headers := map[string]string{
+		"User-Agent":      "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
+		"Accept-Language": "en-US,en;q=0.9",
+		"Accept":          "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
+	}
+
+	// GET the challange page to obtain cookies and any necessary tokens
+	req, err := http.NewRequest("GET", referer, nil)
+	if err != nil {
+		if c != nil {
+			c <- &goquery.Document{}
+		}
+		return &goquery.Document{}
+	}
+	for k, v := range headers {
+		req.Header.Set(k, v)
+	}
+
+	res, err := client.Do(req)
+	if err != nil {
+		if c != nil {
+			c <- &goquery.Document{}
+		}
+		return &goquery.Document{}
+	}
+	defer res.Body.Close()
+
+	body, err := io.ReadAll(res.Body)
+	if err != nil {
+		if c != nil {
+			c <- &goquery.Document{}
+		}
+		return &goquery.Document{}
+	}
+
+	tokenRegex := regexp.MustCompile(`token:\s*"([^"]+)"`)
+	matches := tokenRegex.FindSubmatch(body)
+
+	if matches == nil {
+		//  no challenge, parse directly
+		doc, err := goquery.NewDocumentFromReader(strings.NewReader(string(body)))
+		if err != nil {
+			if c != nil {
+				c <- &goquery.Document{}
+			}
+			return &goquery.Document{}
+		}
+		if c != nil {
+			c <- doc
+		}
+		return doc
+	}
+
+	encodedToken := string(matches[1])
+	token, err := url.QueryUnescape(encodedToken)
+	if err != nil {
+		token = encodedToken
+	}
+
+	// Step 3: POST to /_v with fake browser metrics
+	params := url.Values{}
+	params.Set("token", token)
+	params.Set("mode", "modern")
+	params.Set("workTime", "462")
+	params.Set("iterations", "183")
+	params.Set("webdriver", "0")
+	params.Set("touch", "0")
+	params.Set("screen_w", "1920")
+	params.Set("screen_h", "1080")
+	params.Set("screen_cd", "24")
+
+	postReq, err := http.NewRequest("POST", "https://batcave.biz/_v", strings.NewReader(params.Encode()))
+	if err != nil {
+		if c != nil {
+			c <- &goquery.Document{}
+		}
+		return &goquery.Document{}
+	}
+	for k, v := range headers {
+		postReq.Header.Set(k, v)
+	}
+	postReq.Header.Set("Content-Type", "application/x-www-form-urlencoded")
+	postReq.Header.Set("Referer", referer)
+
+	postRes, err := client.Do(postReq)
+	if err != nil {
+		if c != nil {
+			c <- &goquery.Document{}
+		}
+		return &goquery.Document{}
+	}
+	defer postRes.Body.Close()
+	io.ReadAll(postRes.Body)
+
+	// GET the real page with the set cookie
+	realReq, err := http.NewRequest("GET", referer, nil)
+	if err != nil {
+		if c != nil {
+			c <- &goquery.Document{}
+		}
+		return &goquery.Document{}
+	}
+	for k, v := range headers {
+		realReq.Header.Set(k, v)
+	}
+
+	realRes, err := client.Do(realReq)
+	if err != nil {
+		if c != nil {
+			c <- &goquery.Document{}
+		}
+		return &goquery.Document{}
+	}
+	defer realRes.Body.Close()
+
+	doc, err := goquery.NewDocumentFromReader(realRes.Body)
+	if err != nil {
+		if c != nil {
+			c <- &goquery.Document{}
+		}
+		return &goquery.Document{}
+	}
+	if c != nil {
+		c <- doc
+	}
+	return doc
+}
+
 // ParseImageLinks parses a goquery document to extract image links.
 //
 // markup is the goquery document to parse for image links.
@@ -69,3 +209,55 @@ func ParseImageLinks(markup *goquery.Document, c chan []string) ([]string, error

 	return links, ImageParseError{Message: "No images found", Code: 1}
 }
+
+func ParseReadAllComicsLinks(markup *goquery.Document, c chan []string) ([]string, error) {
+	var links []string
+	markup.Find("img").Each(func(_ int, image *goquery.Selection) {
+		link, _ := image.Attr("src")
+		if !strings.Contains(link, "logo") && (strings.Contains(link, "bp.blogspot.com") || strings.Contains(link, "blogger.googleusercontent") || strings.Contains(link, "covers")) {
+			links = append(links, link)
+		}
+	})
+
+	c <- links
+
+	if len(links) > 0 {
+		return links, nil
+	}
+
+	return links, ImageParseError{Message: "No images found", Code: 1}
+}
+
+// ParseBatcaveBizImageLinks extracts image URLs from the __DATA__.images JavaScript
+// variable embedded in a batcave.biz page.
+func ParseBatcaveBizImageLinks(markup *goquery.Document, c chan []string) ([]string, error) {
+	var links []string
+
+	markup.Find("script").Each(func(_ int, s *goquery.Selection) {
+		text := s.Text()
+		if !strings.Contains(text, "__DATA__") {
+			return
+		}
+
+		arrayRegex := regexp.MustCompile(`"images"\s*:\s*\[([^\]]+)\]`)
+		arrayMatch := arrayRegex.FindStringSubmatch(text)
+		if len(arrayMatch) < 2 {
+			return
+		}
+
+		urlRegex := regexp.MustCompile(`"([^"]+)"`)
+		for _, m := range urlRegex.FindAllStringSubmatch(arrayMatch[1], -1) {
+			if len(m) >= 2 {
+				links = append(links, m[1])
+			}
+		}
+	})
+
+	c <- links
+
+	if len(links) > 0 {
+		return links, nil
+	}
+
+	return links, ImageParseError{Message: "No images found", Code: 1}
+}
--- a/comic/parser_test.go
+++ b/comic/parser_test.go
@@ -0,0 +1,183 @@
+package comic
+
+import (
+	"strings"
+	"testing"
+
+	"github.com/PuerkitoBio/goquery"
+)
+
+func TestParseBatcaveBizImageLinks(t *testing.T) {
+	tests := []struct {
+		name        string
+		html        string
+		expectCount int
+		expectErr   bool
+		expectURLs  []string
+	}{
+		{
+			name: "extracts images from __DATA__",
+			html: `<html><body><script>
+				var __DATA__ = {"images":["https://cdn.batcave.biz/img/001.jpg","https://cdn.batcave.biz/img/002.jpg"]};
+			</script></body></html>`,
+			expectCount: 2,
+			expectErr:   false,
+			expectURLs:  []string{"https://cdn.batcave.biz/img/001.jpg", "https://cdn.batcave.biz/img/002.jpg"},
+		},
+		{
+			name: "extracts images with spaces around colon and bracket",
+			html: `<html><body><script>
+				var __DATA__ = {"images" : [ "https://cdn.batcave.biz/img/001.jpg" ]};
+			</script></body></html>`,
+			expectCount: 1,
+			expectErr:   false,
+			expectURLs:  []string{"https://cdn.batcave.biz/img/001.jpg"},
+		},
+		{
+			name: "no __DATA__ script",
+			html: `<html><body><script>
+				var foo = "bar";
+			</script></body></html>`,
+			expectCount: 0,
+			expectErr:   true,
+		},
+		{
+			name: "__DATA__ present but no images key",
+			html: `<html><body><script>
+				var __DATA__ = {"title":"Nightwing"};
+			</script></body></html>`,
+			expectCount: 0,
+			expectErr:   true,
+		},
+		{
+			name:        "no script tags",
+			html:        `<html><body><p>nothing here</p></body></html>`,
+			expectCount: 0,
+			expectErr:   true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			doc, _ := goquery.NewDocumentFromReader(strings.NewReader(tt.html))
+			ch := make(chan []string, 1)
+
+			links, err := ParseBatcaveBizImageLinks(doc, ch)
+
+			if tt.expectErr && err == nil {
+				t.Error("ParseBatcaveBizImageLinks() expected error, got nil")
+			}
+			if !tt.expectErr && err != nil {
+				t.Errorf("ParseBatcaveBizImageLinks() unexpected error: %v", err)
+			}
+			if len(links) != tt.expectCount {
+				t.Errorf("ParseBatcaveBizImageLinks() returned %d links, want %d", len(links), tt.expectCount)
+			}
+			for i, expected := range tt.expectURLs {
+				if i >= len(links) {
+					t.Errorf("missing link at index %d: want %q", i, expected)
+					continue
+				}
+				if links[i] != expected {
+					t.Errorf("links[%d] = %q, want %q", i, links[i], expected)
+				}
+			}
+
+			channelLinks := <-ch
+			if len(channelLinks) != tt.expectCount {
+				t.Errorf("channel received %d links, want %d", len(channelLinks), tt.expectCount)
+			}
+		})
+	}
+}
+
+func TestImageParseError(t *testing.T) {
+	err := ImageParseError{Message: "test error", Code: 1}
+	if err.Error() != "test error" {
+		t.Errorf("Error() = %q, want %q", err.Error(), "test error")
+	}
+}
+
+func TestParseImageLinks(t *testing.T) {
+	tests := []struct {
+		name        string
+		html        string
+		expectCount int
+		expectErr   bool
+	}{
+		{
+			name: "extracts blogspot images",
+			html: `<html><body>
+				<img src="https://bp.blogspot.com/page-001.jpg" />
+				<img src="https://bp.blogspot.com/page-002.jpg" />
+			</body></html>`,
+			expectCount: 2,
+			expectErr:   false,
+		},
+		{
+			name: "extracts blogger googleusercontent images",
+			html: `<html><body>
+				<img src="https://blogger.googleusercontent.com/page-001.jpg" />
+			</body></html>`,
+			expectCount: 1,
+			expectErr:   false,
+		},
+		{
+			name: "extracts covers images",
+			html: `<html><body>
+				<img src="https://example.com/covers/cover-001.jpg" />
+			</body></html>`,
+			expectCount: 1,
+			expectErr:   false,
+		},
+		{
+			name: "excludes logo images",
+			html: `<html><body>
+				<img src="https://bp.blogspot.com/logo-site.jpg" />
+				<img src="https://bp.blogspot.com/page-001.jpg" />
+			</body></html>`,
+			expectCount: 1,
+			expectErr:   false,
+		},
+		{
+			name: "excludes non-matching images",
+			html: `<html><body>
+				<img src="https://other-site.com/image.jpg" />
+				<img src="https://cdn.example.com/banner.png" />
+			</body></html>`,
+			expectCount: 0,
+			expectErr:   true,
+		},
+		{
+			name:        "no images at all",
+			html:        `<html><body><p>No images here</p></body></html>`,
+			expectCount: 0,
+			expectErr:   true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			doc, _ := goquery.NewDocumentFromReader(strings.NewReader(tt.html))
+			ch := make(chan []string, 1)
+
+			links, err := ParseImageLinks(doc, ch)
+
+			if tt.expectErr && err == nil {
+				t.Error("ParseImageLinks() expected error, got nil")
+			}
+			if !tt.expectErr && err != nil {
+				t.Errorf("ParseImageLinks() unexpected error: %v", err)
+			}
+			if len(links) != tt.expectCount {
+				t.Errorf("ParseImageLinks() returned %d links, want %d", len(links), tt.expectCount)
+			}
+
+			// Verify the channel also received the links
+			channelLinks := <-ch
+			if len(channelLinks) != tt.expectCount {
+				t.Errorf("channel received %d links, want %d", len(channelLinks), tt.expectCount)
+			}
+		})
+	}
+}
--- a/go.mod
+++ b/go.mod
@@ -5,6 +5,7 @@ go 1.22.3
 require (
 	github.com/DaRealFreak/cloudflare-bp-go v1.0.4
 	github.com/PuerkitoBio/goquery v1.9.2
+	github.com/andybalholm/brotli v1.2.0
 	github.com/spf13/cobra v1.8.1
 )

--- a/go.sum
+++ b/go.sum
@@ -4,6 +4,8 @@ github.com/EDDYCJY/fake-useragent v0.2.0 h1:Jcnkk2bgXmDpX0z+ELlUErTkoLb/mxFBNd2Y
 github.com/EDDYCJY/fake-useragent v0.2.0/go.mod h1:5wn3zzlDxhKW6NYknushqinPcAqZcAPHy8lLczCdJdc=
 github.com/PuerkitoBio/goquery v1.9.2 h1:4/wZksC3KgkQw7SQgkKotmKljk0M6V8TUvA8Wb4yPeE=
 github.com/PuerkitoBio/goquery v1.9.2/go.mod h1:GHPCaP0ODyyxqcNoFGYlAprUFH81NuRPd0GX3Zu2Mvk=
+github.com/andybalholm/brotli v1.2.0 h1:ukwgCxwYrmACq68yiUqwIWnGY0cTPox/M94sVwToPjQ=
+github.com/andybalholm/brotli v1.2.0/go.mod h1:rzTDkvFWvIrjDXZHkuS16NPggd91W3kUSvPlQ1pLaKY=
 github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsViSLyss=
 github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU=
 github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
@@ -20,6 +22,8 @@ github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
 github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
 github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
 github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
+github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU=
+github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E=
 github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
 golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
Author	SHA1	Message	Date
Bryan Bailey	9ec1301317	Merge pull request 'fix: extract title from h1 or URL slug when page title starts with #' (#5 ) from feat/title-h1-fallback into main Reviewed-on: #5	2026-03-11 22:16:25 +00:00
Bryan Bailey	dcb41deea9	fix: extract title from h1 or URL slug when page title starts with # When readallcomics.com pages have a <title> containing only the issue number (e.g. '#018 (2026)'), fall back to the h1 element first, then derive the title from the URL slug by stripping the trailing year and title-casing the hyphen-separated segments. Closes #4	2026-03-11 18:13:14 -04:00