## What changed - `BatcaveBizMarkup` now accepts a `clientChan chan *http.Client` and sends the authenticated cookie jar client back to the caller after completing the Cloudflare challenge flow. All error paths send nil so the caller never blocks. - `Comic` struct gains a `Client *http.Client` field. `NewComic` wires up the channel, receives the client, and stores it so downstream code can reuse the same authenticated session. - `downloadFile` branches on `c.Client`: when set it builds the request manually and only attaches a `Referer: https://batcave.biz/` header when the image URL is actually on batcave.biz. Some issues host images on third-party CDNs (e.g. readcomicsonline.ru) that actively block requests with a batcave Referer, returning 403 — omitting the header fixes those. - `ParseBatcaveBizTitle` extracts the chapter title from the `__DATA__.chapters` JSON array by matching the chapter ID in the URL's last path segment. The HTML `<title>` on batcave.biz is prefixed with "Read " and suffixed with "comics online for free", making it unsuitable as a filename. Using the chapter data gives clean titles like "Nightwing (1996) 153". "Issue #" and bare "#" are stripped since the hash character causes problems on some filesystems and tools. - `ParseBatcaveBizImageLinks` now unescapes `\/` → `/` in extracted URLs. The `__DATA__` JSON often contains forward-slash-escaped URLs that would otherwise be stored verbatim. - `archive.go`: `filepath.Walk` was called on `filepath.Dir(sourcePath)` (the library root) instead of `sourcePath` (the comic's own folder). This caused any leftover image files from previous downloads in sibling directories to be included in every new CBZ. Fixed by walking `sourcePath` directly. - `BatcaveBizMarkup` client now has a 30s `Timeout`. Without it, a single stalled CDN connection would hang the worker goroutine indefinitely, causing `Download()` to block forever waiting for a result that never arrives. - Fixed `for e := range err` in `cli/root.go` — ranging over `[]error` with one variable yields the index, not the error value.
200 lines
4.7 KiB
Go
200 lines
4.7 KiB
Go
package comic
|
|
|
|
import (
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"time"
|
|
|
|
cloudflarebp "github.com/DaRealFreak/cloudflare-bp-go"
|
|
)
|
|
|
|
type ComicDownloadError struct {
|
|
Message string
|
|
Code int
|
|
}
|
|
|
|
func (c ComicDownloadError) Error() string {
|
|
return c.Message
|
|
}
|
|
|
|
// downloadFile downloads a file from a given URL and saves it to a specified location.
|
|
//
|
|
// The function takes a URL string, a page number, and a Comic struct as parameters.
|
|
// It returns an error if the download fails, and nil otherwise.
|
|
func downloadFile(url string, page int, c *Comic) error {
|
|
pageNumber := fmt.Sprintf("%03d", page)
|
|
formattedImagePath := fmt.Sprintf("%s %s.jpg", c.Title, pageNumber)
|
|
imageFilepath, _ := filepath.Abs(filepath.Join(c.LibraryPath, c.Title, formattedImagePath))
|
|
|
|
if err := os.MkdirAll(
|
|
filepath.Dir(imageFilepath),
|
|
os.ModePerm,
|
|
); err != nil {
|
|
return ComicDownloadError{
|
|
Message: "error creating directory",
|
|
Code: 1,
|
|
}
|
|
}
|
|
|
|
var res *http.Response
|
|
var err error
|
|
if c.Client != nil {
|
|
req, reqErr := http.NewRequest("GET", url, nil)
|
|
if reqErr != nil {
|
|
return ComicDownloadError{Message: "invalid request", Code: 1}
|
|
}
|
|
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
|
|
if strings.Contains(url, "batcave.biz") {
|
|
req.Header.Set("Referer", "https://batcave.biz/")
|
|
}
|
|
res, err = c.Client.Do(req)
|
|
} else {
|
|
res, err = handleRequest(url)
|
|
}
|
|
if err != nil {
|
|
return ComicDownloadError{
|
|
Message: "invalid request",
|
|
Code: 1,
|
|
}
|
|
}
|
|
if res.StatusCode != http.StatusOK {
|
|
return ComicDownloadError{
|
|
Message: "bad response",
|
|
Code: 1,
|
|
}
|
|
}
|
|
defer res.Body.Close()
|
|
|
|
imageFile, err := os.Create(imageFilepath)
|
|
if err != nil {
|
|
return ComicDownloadError{
|
|
Message: "error creating image file",
|
|
Code: 1,
|
|
}
|
|
}
|
|
defer imageFile.Close()
|
|
|
|
written, err := io.Copy(imageFile, res.Body)
|
|
if err != nil {
|
|
return ComicDownloadError{
|
|
Message: "Unable to save file contents",
|
|
Code: 1,
|
|
}
|
|
}
|
|
|
|
if written == 0 {
|
|
return ComicDownloadError{
|
|
Message: "Unable to save file contents",
|
|
Code: 1,
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// handleRequest sends a GET request to the provided URL, mimicking a generic browser,
|
|
// and returns the HTTP response.
|
|
//
|
|
// url - the URL to send the request to.
|
|
// *http.Response - the HTTP response from the server.
|
|
// error - an error that occurred during the request.
|
|
func handleRequest(url string) (*http.Response, error) {
|
|
// adjust timeout and keep-alive to avoid connection timeout
|
|
transport := &http.Transport{
|
|
DisableKeepAlives: false,
|
|
MaxIdleConnsPerHost: 32,
|
|
}
|
|
|
|
// add cloudflare bypass
|
|
cfTransport := cloudflarebp.AddCloudFlareByPass(transport)
|
|
|
|
// prevents cloudflarebp from occasionally returning the wrong type
|
|
if converted, ok := cfTransport.(*http.Transport); ok {
|
|
transport = converted
|
|
}
|
|
|
|
client := &http.Client{
|
|
Timeout: time.Second * 30,
|
|
Transport: transport,
|
|
}
|
|
|
|
// mimic generic browser
|
|
req, err := http.NewRequest("GET", url, nil)
|
|
if err != nil {
|
|
return nil, ComicDownloadError{
|
|
Message: "invalid request",
|
|
Code: 1,
|
|
}
|
|
}
|
|
|
|
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36")
|
|
|
|
res, err := client.Do(req)
|
|
if err != nil {
|
|
return nil, ComicDownloadError{
|
|
Message: "invalid request",
|
|
Code: 1,
|
|
}
|
|
}
|
|
|
|
if res.StatusCode != http.StatusOK {
|
|
return nil, ComicDownloadError{
|
|
Message: "bad response",
|
|
Code: 1,
|
|
}
|
|
}
|
|
|
|
return res, nil
|
|
}
|
|
|
|
// Download is a method of the Comic struct that downloads multiple files concurrently.
|
|
//
|
|
// It takes an integer parameter `concurrency` which represents the number of concurrent downloads.
|
|
//
|
|
// It returns a slice of errors, each representing an error that occurred during the download process.
|
|
func (c *Comic) Download(concurrency int) []error {
|
|
jobs := make(chan Download)
|
|
results := make(chan error)
|
|
|
|
for worker := 1; worker <= concurrency; worker++ {
|
|
go workerPool(jobs, results)
|
|
}
|
|
|
|
for i, url := range c.Filelist {
|
|
jobs <- Download{
|
|
URL: url,
|
|
Page: i + 1,
|
|
Comic: c,
|
|
}
|
|
}
|
|
|
|
var errors []error
|
|
for i := 0; i < len(c.Filelist); i++ {
|
|
err := <-results
|
|
if err != nil {
|
|
errors = append(errors, err)
|
|
}
|
|
}
|
|
return errors
|
|
}
|
|
|
|
type Download struct {
|
|
URL string
|
|
Page int
|
|
Comic *Comic
|
|
}
|
|
|
|
// workerPool is a function that processes a channel of Download jobs concurrently.
|
|
//
|
|
// It takes two parameters: a receive-only channel of Download jobs and a send-only channel of errors.
|
|
// It returns no value, but sends errors to the results channel as they occur.
|
|
func workerPool(jobs <-chan Download, results chan<- error) {
|
|
for job := range jobs {
|
|
results <- downloadFile(job.URL, job.Page, job.Comic)
|
|
}
|
|
}
|