feat: add batcave.biz support, closes #6

## What changed - `BatcaveBizMarkup` now accepts a `clientChan chan *http.Client` and sends the authenticated cookie jar client back to the caller after completing the Cloudflare challenge flow. All error paths send nil so the caller never blocks. - `Comic` struct gains a `Client *http.Client` field. `NewComic` wires up the channel, receives the client, and stores it so downstream code can reuse the same authenticated session. - `downloadFile` branches on `c.Client`: when set it builds the request manually and only attaches a `Referer: https://batcave.biz/` header when the image URL is actually on batcave.biz. Some issues host images on third-party CDNs (e.g. readcomicsonline.ru) that actively block requests with a batcave Referer, returning 403 — omitting the header fixes those. - `ParseBatcaveBizTitle` extracts the chapter title from the `__DATA__.chapters` JSON array by matching the chapter ID in the URL's last path segment. The HTML `<title>` on batcave.biz is prefixed with "Read " and suffixed with "comics online for free", making it unsuitable as a filename. Using the chapter data gives clean titles like "Nightwing (1996) 153". "Issue #" and bare "#" are stripped since the hash character causes problems on some filesystems and tools. - `ParseBatcaveBizImageLinks` now unescapes `\/` → `/` in extracted URLs. The `__DATA__` JSON often contains forward-slash-escaped URLs that would otherwise be stored verbatim. - `archive.go`: `filepath.Walk` was called on `filepath.Dir(sourcePath)` (the library root) instead of `sourcePath` (the comic's own folder). This caused any leftover image files from previous downloads in sibling directories to be included in every new CBZ. Fixed by walking `sourcePath` directly. - `BatcaveBizMarkup` client now has a 30s `Timeout`. Without it, a single stalled CDN connection would hang the worker goroutine indefinitely, causing `Download()` to block forever waiting for a result that never arrives. - Fixed `for e := range err` in `cli/root.go` — ranging over `[]error` with one variable yields the index, not the error value.
2026-03-11 20:55:03 -04:00
parent 9cb26f27ec
commit d2c715e973
6 changed files with 107 additions and 52 deletions
--- a/cli/root.go
+++ b/cli/root.go
@@ -40,7 +40,7 @@ var cli = &cobra.Command{
 		fmt.Println(comic.Title)

 		err := comic.Download(len(comic.Filelist))
-		for e := range err {
+		for _, e := range err {
 			fmt.Println(e)
 		}

--- a/comic/archive.go
+++ b/comic/archive.go
@@ -45,7 +45,7 @@ func (c *Comic) Archive() error {
 	sourcePath := filepath.Join(c.LibraryPath, c.Title)

 	err = filepath.Walk(
-		filepath.Dir(sourcePath),
+		sourcePath,
 		func(path string, info os.FileInfo, err error) error {
 			if err != nil {
 				return ArchiveError{
--- a/comic/comic.go
+++ b/comic/comic.go
@@ -1,6 +1,7 @@
 package comic

 import (
+	"net/http"
 	"path/filepath"
 	"regexp"
 	"strings"
@@ -18,6 +19,7 @@ type Comic struct {
 	Next        *Comic
 	Prev        *Comic
 	LibraryPath string
+	Client      *http.Client
 }

 // extractTitleFromMarkup extracts the title from the comic's markup.
@@ -93,18 +95,22 @@ func NewComic(
 	}

 	if strings.Contains(url, "batcave.biz") {
-		go BatcaveBizMarkup(url, markupChannel)
+		clientChan := make(chan *http.Client, 1)
+		go BatcaveBizMarkup(url, markupChannel, clientChan)
+		markup := <-markupChannel
+		c.Markup = markup
+		c.Client = <-clientChan
+		if t := ParseBatcaveBizTitle(markup, url); t != "" {
+			c.Title = t
+		} else {
+			c.Title = extractTitleFromMarkup(*c)
+		}
+		go ParseBatcaveBizImageLinks(markup, imageChannel)
 	} else {
 		go Markup(url, markupChannel)
-	}
-
 		markup := <-markupChannel
 		c.Markup = markup
 		c.Title = extractTitleFromMarkup(*c)
-
-	if strings.Contains(url, "batcave.biz") {
-		go ParseBatcaveBizImageLinks(markup, imageChannel)
-	} else {
 		go ParseImageLinks(markup, imageChannel)
 	}
 	links := <-imageChannel
--- a/comic/download.go
+++ b/comic/download.go
@@ -6,6 +6,7 @@ import (
 	"net/http"
 	"os"
 	"path/filepath"
+	"strings"
 	"time"

 	cloudflarebp "github.com/DaRealFreak/cloudflare-bp-go"
@@ -39,13 +40,33 @@ func downloadFile(url string, page int, c *Comic) error {
 		}
 	}

-	res, err := handleRequest(url)
+	var res *http.Response
+	var err error
+	if c.Client != nil {
+		req, reqErr := http.NewRequest("GET", url, nil)
+		if reqErr != nil {
+			return ComicDownloadError{Message: "invalid request", Code: 1}
+		}
+		req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
+		if strings.Contains(url, "batcave.biz") {
+			req.Header.Set("Referer", "https://batcave.biz/")
+		}
+		res, err = c.Client.Do(req)
+	} else {
+		res, err = handleRequest(url)
+	}
 	if err != nil {
 		return ComicDownloadError{
 			Message: "invalid request",
 			Code:    1,
 		}
 	}
+	if res.StatusCode != http.StatusOK {
+		return ComicDownloadError{
+			Message: "bad response",
+			Code:    1,
+		}
+	}
 	defer res.Body.Close()

 	imageFile, err := os.Create(imageFilepath)
--- a/comic/parser.go
+++ b/comic/parser.go
@@ -7,6 +7,7 @@ import (
 	"net/url"
 	"regexp"
 	"strings"
+	"time"

 	"github.com/PuerkitoBio/goquery"
 )
@@ -50,10 +51,21 @@ func Markup(url string, c chan *goquery.Document) *goquery.Document {
 	return markup
 }

-func BatcaveBizMarkup(referer string, c chan *goquery.Document) *goquery.Document {
+func BatcaveBizMarkup(referer string, c chan *goquery.Document, clientChan chan *http.Client) *goquery.Document {
+	sendErr := func() *goquery.Document {
+		if c != nil {
+			c <- &goquery.Document{}
+		}
+		if clientChan != nil {
+			clientChan <- nil
+		}
+		return &goquery.Document{}
+	}
+
 	jar, _ := cookiejar.New(nil)
 	client := &http.Client{
 		Jar:     jar,
+		Timeout: time.Second * 30,
 		CheckRedirect: func(req *http.Request, via []*http.Request) error {
 			return nil
 		},
@@ -68,10 +80,7 @@ func BatcaveBizMarkup(referer string, c chan *goquery.Document) *goquery.Documen
 	// GET the challange page to obtain cookies and any necessary tokens
 	req, err := http.NewRequest("GET", referer, nil)
 	if err != nil {
-		if c != nil {
-			c <- &goquery.Document{}
-		}
-		return &goquery.Document{}
+		return sendErr()
 	}
 	for k, v := range headers {
 		req.Header.Set(k, v)
@@ -79,19 +88,13 @@ func BatcaveBizMarkup(referer string, c chan *goquery.Document) *goquery.Documen

 	res, err := client.Do(req)
 	if err != nil {
-		if c != nil {
-			c <- &goquery.Document{}
-		}
-		return &goquery.Document{}
+		return sendErr()
 	}
 	defer res.Body.Close()

 	body, err := io.ReadAll(res.Body)
 	if err != nil {
-		if c != nil {
-			c <- &goquery.Document{}
-		}
-		return &goquery.Document{}
+		return sendErr()
 	}

 	tokenRegex := regexp.MustCompile(`token:\s*"([^"]+)"`)
@@ -101,14 +104,14 @@ func BatcaveBizMarkup(referer string, c chan *goquery.Document) *goquery.Documen
 		//  no challenge, parse directly
 		doc, err := goquery.NewDocumentFromReader(strings.NewReader(string(body)))
 		if err != nil {
-			if c != nil {
-				c <- &goquery.Document{}
-			}
-			return &goquery.Document{}
+			return sendErr()
 		}
 		if c != nil {
 			c <- doc
 		}
+		if clientChan != nil {
+			clientChan <- client
+		}
 		return doc
 	}

@@ -132,10 +135,7 @@ func BatcaveBizMarkup(referer string, c chan *goquery.Document) *goquery.Documen

 	postReq, err := http.NewRequest("POST", "https://batcave.biz/_v", strings.NewReader(params.Encode()))
 	if err != nil {
-		if c != nil {
-			c <- &goquery.Document{}
-		}
-		return &goquery.Document{}
+		return sendErr()
 	}
 	for k, v := range headers {
 		postReq.Header.Set(k, v)
@@ -145,10 +145,7 @@ func BatcaveBizMarkup(referer string, c chan *goquery.Document) *goquery.Documen

 	postRes, err := client.Do(postReq)
 	if err != nil {
-		if c != nil {
-			c <- &goquery.Document{}
-		}
-		return &goquery.Document{}
+		return sendErr()
 	}
 	defer postRes.Body.Close()
 	io.ReadAll(postRes.Body)
@@ -156,10 +153,7 @@ func BatcaveBizMarkup(referer string, c chan *goquery.Document) *goquery.Documen
 	// GET the real page with the set cookie
 	realReq, err := http.NewRequest("GET", referer, nil)
 	if err != nil {
-		if c != nil {
-			c <- &goquery.Document{}
-		}
-		return &goquery.Document{}
+		return sendErr()
 	}
 	for k, v := range headers {
 		realReq.Header.Set(k, v)
@@ -167,23 +161,20 @@ func BatcaveBizMarkup(referer string, c chan *goquery.Document) *goquery.Documen

 	realRes, err := client.Do(realReq)
 	if err != nil {
-		if c != nil {
-			c <- &goquery.Document{}
-		}
-		return &goquery.Document{}
+		return sendErr()
 	}
 	defer realRes.Body.Close()

 	doc, err := goquery.NewDocumentFromReader(realRes.Body)
 	if err != nil {
-		if c != nil {
-			c <- &goquery.Document{}
-		}
-		return &goquery.Document{}
+		return sendErr()
 	}
 	if c != nil {
 		c <- doc
 	}
+	if clientChan != nil {
+		clientChan <- client
+	}
 	return doc
 }

@@ -228,6 +219,34 @@ func ParseReadAllComicsLinks(markup *goquery.Document, c chan []string) ([]strin
 	return links, ImageParseError{Message: "No images found", Code: 1}
 }

+// ParseBatcaveBizTitle extracts the chapter title from the __DATA__.chapters array
+// by matching the chapter id to the last path segment of the provided URL.
+func ParseBatcaveBizTitle(markup *goquery.Document, chapterURL string) string {
+	slug := strings.TrimRight(chapterURL, "/")
+	if i := strings.LastIndex(slug, "/"); i >= 0 {
+		slug = slug[i+1:]
+	}
+
+	var title string
+	markup.Find("script").Each(func(_ int, s *goquery.Selection) {
+		if title != "" {
+			return
+		}
+		text := s.Text()
+		if !strings.Contains(text, "__DATA__") {
+			return
+		}
+		chapterRegex := regexp.MustCompile(`"id"\s*:\s*` + regexp.QuoteMeta(slug) + `[^}]*?"title"\s*:\s*"([^"]+)"`)
+		m := chapterRegex.FindStringSubmatch(text)
+		if len(m) >= 2 {
+			title = strings.ReplaceAll(m[1], `\/`, "/")
+			title = strings.ReplaceAll(title, "Issue #", "")
+			title = strings.ReplaceAll(title, "#", "")
+		}
+	})
+	return title
+}
+
 // ParseBatcaveBizImageLinks extracts image URLs from the __DATA__.images JavaScript
 // variable embedded in a batcave.biz page.
 func ParseBatcaveBizImageLinks(markup *goquery.Document, c chan []string) ([]string, error) {
@@ -248,7 +267,7 @@ func ParseBatcaveBizImageLinks(markup *goquery.Document, c chan []string) ([]str
 		urlRegex := regexp.MustCompile(`"([^"]+)"`)
 		for _, m := range urlRegex.FindAllStringSubmatch(arrayMatch[1], -1) {
 			if len(m) >= 2 {
-				links = append(links, m[1])
+				links = append(links, strings.ReplaceAll(m[1], `\/`, "/"))
 			}
 		}
 	})
--- a/comic/parser_test.go
+++ b/comic/parser_test.go
@@ -24,6 +24,15 @@ func TestParseBatcaveBizImageLinks(t *testing.T) {
 			expectErr:   false,
 			expectURLs:  []string{"https://cdn.batcave.biz/img/001.jpg", "https://cdn.batcave.biz/img/002.jpg"},
 		},
+		{
+			name: "unescapes forward slashes in URLs",
+			html: `<html><body><script>
+				var __DATA__ = {"images":["https:\/\/cdn.batcave.biz\/img\/001.jpg"]};
+			</script></body></html>`,
+			expectCount: 1,
+			expectErr:   false,
+			expectURLs:  []string{"https://cdn.batcave.biz/img/001.jpg"},
+		},
 		{
 			name: "extracts images with spaces around colon and bracket",
 			html: `<html><body><script>