Compare commits
4 Commits
9ec1301317
...
feat/batca
| Author | SHA1 | Date | |
|---|---|---|---|
| d2c715e973 | |||
| 9cb26f27ec | |||
| 855e97f72f | |||
| ca891fc6c0 |
4
Makefile
4
Makefile
@@ -32,13 +32,15 @@ docker-push: docker-build
|
|||||||
tag:
|
tag:
|
||||||
@if [ -z "$(VERSION)" ]; then echo "Usage: make tag VERSION=1.2.0"; exit 1; fi
|
@if [ -z "$(VERSION)" ]; then echo "Usage: make tag VERSION=1.2.0"; exit 1; fi
|
||||||
git tag $(VERSION)
|
git tag $(VERSION)
|
||||||
|
git tag -f latest
|
||||||
git push origin $(VERSION)
|
git push origin $(VERSION)
|
||||||
|
git push origin -f latest
|
||||||
|
|
||||||
gitea-release:
|
gitea-release:
|
||||||
tea release create \
|
tea release create \
|
||||||
--tag $(VERSION) \
|
--tag $(VERSION) \
|
||||||
--title "$(VERSION)" \
|
--title "$(VERSION)" \
|
||||||
--note $(NOTES) \
|
$(if $(NOTES),--note $(NOTES),) \
|
||||||
--asset $(BUILD_DIR)/$(BIN)-windows-amd64.exe \
|
--asset $(BUILD_DIR)/$(BIN)-windows-amd64.exe \
|
||||||
--asset $(BUILD_DIR)/$(BIN)-linux-amd64 \
|
--asset $(BUILD_DIR)/$(BIN)-linux-amd64 \
|
||||||
--asset $(BUILD_DIR)/$(BIN)-linux-arm64 \
|
--asset $(BUILD_DIR)/$(BIN)-linux-arm64 \
|
||||||
|
|||||||
@@ -40,14 +40,14 @@ var cli = &cobra.Command{
|
|||||||
fmt.Println(comic.Title)
|
fmt.Println(comic.Title)
|
||||||
|
|
||||||
err := comic.Download(len(comic.Filelist))
|
err := comic.Download(len(comic.Filelist))
|
||||||
for e := range err {
|
for _, e := range err {
|
||||||
fmt.Println(e)
|
fmt.Println(e)
|
||||||
}
|
}
|
||||||
|
|
||||||
comic.Archive()
|
comic.Archive()
|
||||||
comic.Cleanup()
|
comic.Cleanup()
|
||||||
},
|
},
|
||||||
Version: "1.1.0",
|
Version: "1.2.1",
|
||||||
}
|
}
|
||||||
|
|
||||||
func Execute() error {
|
func Execute() error {
|
||||||
|
|||||||
@@ -45,7 +45,7 @@ func (c *Comic) Archive() error {
|
|||||||
sourcePath := filepath.Join(c.LibraryPath, c.Title)
|
sourcePath := filepath.Join(c.LibraryPath, c.Title)
|
||||||
|
|
||||||
err = filepath.Walk(
|
err = filepath.Walk(
|
||||||
filepath.Dir(sourcePath),
|
sourcePath,
|
||||||
func(path string, info os.FileInfo, err error) error {
|
func(path string, info os.FileInfo, err error) error {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return ArchiveError{
|
return ArchiveError{
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
package comic
|
package comic
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"net/http"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
@@ -18,6 +19,7 @@ type Comic struct {
|
|||||||
Next *Comic
|
Next *Comic
|
||||||
Prev *Comic
|
Prev *Comic
|
||||||
LibraryPath string
|
LibraryPath string
|
||||||
|
Client *http.Client
|
||||||
}
|
}
|
||||||
|
|
||||||
// extractTitleFromMarkup extracts the title from the comic's markup.
|
// extractTitleFromMarkup extracts the title from the comic's markup.
|
||||||
@@ -93,18 +95,22 @@ func NewComic(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if strings.Contains(url, "batcave.biz") {
|
if strings.Contains(url, "batcave.biz") {
|
||||||
go BatcaveBizMarkup(url, markupChannel)
|
clientChan := make(chan *http.Client, 1)
|
||||||
|
go BatcaveBizMarkup(url, markupChannel, clientChan)
|
||||||
|
markup := <-markupChannel
|
||||||
|
c.Markup = markup
|
||||||
|
c.Client = <-clientChan
|
||||||
|
if t := ParseBatcaveBizTitle(markup, url); t != "" {
|
||||||
|
c.Title = t
|
||||||
|
} else {
|
||||||
|
c.Title = extractTitleFromMarkup(*c)
|
||||||
|
}
|
||||||
|
go ParseBatcaveBizImageLinks(markup, imageChannel)
|
||||||
} else {
|
} else {
|
||||||
go Markup(url, markupChannel)
|
go Markup(url, markupChannel)
|
||||||
}
|
|
||||||
|
|
||||||
markup := <-markupChannel
|
markup := <-markupChannel
|
||||||
c.Markup = markup
|
c.Markup = markup
|
||||||
c.Title = extractTitleFromMarkup(*c)
|
c.Title = extractTitleFromMarkup(*c)
|
||||||
|
|
||||||
if strings.Contains(url, "batcave.biz") {
|
|
||||||
go ParseBatcaveBizImageLinks(markup, imageChannel)
|
|
||||||
} else {
|
|
||||||
go ParseImageLinks(markup, imageChannel)
|
go ParseImageLinks(markup, imageChannel)
|
||||||
}
|
}
|
||||||
links := <-imageChannel
|
links := <-imageChannel
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import (
|
|||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cloudflarebp "github.com/DaRealFreak/cloudflare-bp-go"
|
cloudflarebp "github.com/DaRealFreak/cloudflare-bp-go"
|
||||||
@@ -39,13 +40,33 @@ func downloadFile(url string, page int, c *Comic) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
res, err := handleRequest(url)
|
var res *http.Response
|
||||||
|
var err error
|
||||||
|
if c.Client != nil {
|
||||||
|
req, reqErr := http.NewRequest("GET", url, nil)
|
||||||
|
if reqErr != nil {
|
||||||
|
return ComicDownloadError{Message: "invalid request", Code: 1}
|
||||||
|
}
|
||||||
|
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
|
||||||
|
if strings.Contains(url, "batcave.biz") {
|
||||||
|
req.Header.Set("Referer", "https://batcave.biz/")
|
||||||
|
}
|
||||||
|
res, err = c.Client.Do(req)
|
||||||
|
} else {
|
||||||
|
res, err = handleRequest(url)
|
||||||
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return ComicDownloadError{
|
return ComicDownloadError{
|
||||||
Message: "invalid request",
|
Message: "invalid request",
|
||||||
Code: 1,
|
Code: 1,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if res.StatusCode != http.StatusOK {
|
||||||
|
return ComicDownloadError{
|
||||||
|
Message: "bad response",
|
||||||
|
Code: 1,
|
||||||
|
}
|
||||||
|
}
|
||||||
defer res.Body.Close()
|
defer res.Body.Close()
|
||||||
|
|
||||||
imageFile, err := os.Create(imageFilepath)
|
imageFile, err := os.Create(imageFilepath)
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import (
|
|||||||
"net/url"
|
"net/url"
|
||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/PuerkitoBio/goquery"
|
"github.com/PuerkitoBio/goquery"
|
||||||
)
|
)
|
||||||
@@ -50,10 +51,21 @@ func Markup(url string, c chan *goquery.Document) *goquery.Document {
|
|||||||
return markup
|
return markup
|
||||||
}
|
}
|
||||||
|
|
||||||
func BatcaveBizMarkup(referer string, c chan *goquery.Document) *goquery.Document {
|
func BatcaveBizMarkup(referer string, c chan *goquery.Document, clientChan chan *http.Client) *goquery.Document {
|
||||||
|
sendErr := func() *goquery.Document {
|
||||||
|
if c != nil {
|
||||||
|
c <- &goquery.Document{}
|
||||||
|
}
|
||||||
|
if clientChan != nil {
|
||||||
|
clientChan <- nil
|
||||||
|
}
|
||||||
|
return &goquery.Document{}
|
||||||
|
}
|
||||||
|
|
||||||
jar, _ := cookiejar.New(nil)
|
jar, _ := cookiejar.New(nil)
|
||||||
client := &http.Client{
|
client := &http.Client{
|
||||||
Jar: jar,
|
Jar: jar,
|
||||||
|
Timeout: time.Second * 30,
|
||||||
CheckRedirect: func(req *http.Request, via []*http.Request) error {
|
CheckRedirect: func(req *http.Request, via []*http.Request) error {
|
||||||
return nil
|
return nil
|
||||||
},
|
},
|
||||||
@@ -68,10 +80,7 @@ func BatcaveBizMarkup(referer string, c chan *goquery.Document) *goquery.Documen
|
|||||||
// GET the challange page to obtain cookies and any necessary tokens
|
// GET the challange page to obtain cookies and any necessary tokens
|
||||||
req, err := http.NewRequest("GET", referer, nil)
|
req, err := http.NewRequest("GET", referer, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if c != nil {
|
return sendErr()
|
||||||
c <- &goquery.Document{}
|
|
||||||
}
|
|
||||||
return &goquery.Document{}
|
|
||||||
}
|
}
|
||||||
for k, v := range headers {
|
for k, v := range headers {
|
||||||
req.Header.Set(k, v)
|
req.Header.Set(k, v)
|
||||||
@@ -79,19 +88,13 @@ func BatcaveBizMarkup(referer string, c chan *goquery.Document) *goquery.Documen
|
|||||||
|
|
||||||
res, err := client.Do(req)
|
res, err := client.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if c != nil {
|
return sendErr()
|
||||||
c <- &goquery.Document{}
|
|
||||||
}
|
|
||||||
return &goquery.Document{}
|
|
||||||
}
|
}
|
||||||
defer res.Body.Close()
|
defer res.Body.Close()
|
||||||
|
|
||||||
body, err := io.ReadAll(res.Body)
|
body, err := io.ReadAll(res.Body)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if c != nil {
|
return sendErr()
|
||||||
c <- &goquery.Document{}
|
|
||||||
}
|
|
||||||
return &goquery.Document{}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
tokenRegex := regexp.MustCompile(`token:\s*"([^"]+)"`)
|
tokenRegex := regexp.MustCompile(`token:\s*"([^"]+)"`)
|
||||||
@@ -101,14 +104,14 @@ func BatcaveBizMarkup(referer string, c chan *goquery.Document) *goquery.Documen
|
|||||||
// no challenge, parse directly
|
// no challenge, parse directly
|
||||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(string(body)))
|
doc, err := goquery.NewDocumentFromReader(strings.NewReader(string(body)))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if c != nil {
|
return sendErr()
|
||||||
c <- &goquery.Document{}
|
|
||||||
}
|
|
||||||
return &goquery.Document{}
|
|
||||||
}
|
}
|
||||||
if c != nil {
|
if c != nil {
|
||||||
c <- doc
|
c <- doc
|
||||||
}
|
}
|
||||||
|
if clientChan != nil {
|
||||||
|
clientChan <- client
|
||||||
|
}
|
||||||
return doc
|
return doc
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -132,10 +135,7 @@ func BatcaveBizMarkup(referer string, c chan *goquery.Document) *goquery.Documen
|
|||||||
|
|
||||||
postReq, err := http.NewRequest("POST", "https://batcave.biz/_v", strings.NewReader(params.Encode()))
|
postReq, err := http.NewRequest("POST", "https://batcave.biz/_v", strings.NewReader(params.Encode()))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if c != nil {
|
return sendErr()
|
||||||
c <- &goquery.Document{}
|
|
||||||
}
|
|
||||||
return &goquery.Document{}
|
|
||||||
}
|
}
|
||||||
for k, v := range headers {
|
for k, v := range headers {
|
||||||
postReq.Header.Set(k, v)
|
postReq.Header.Set(k, v)
|
||||||
@@ -145,10 +145,7 @@ func BatcaveBizMarkup(referer string, c chan *goquery.Document) *goquery.Documen
|
|||||||
|
|
||||||
postRes, err := client.Do(postReq)
|
postRes, err := client.Do(postReq)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if c != nil {
|
return sendErr()
|
||||||
c <- &goquery.Document{}
|
|
||||||
}
|
|
||||||
return &goquery.Document{}
|
|
||||||
}
|
}
|
||||||
defer postRes.Body.Close()
|
defer postRes.Body.Close()
|
||||||
io.ReadAll(postRes.Body)
|
io.ReadAll(postRes.Body)
|
||||||
@@ -156,10 +153,7 @@ func BatcaveBizMarkup(referer string, c chan *goquery.Document) *goquery.Documen
|
|||||||
// GET the real page with the set cookie
|
// GET the real page with the set cookie
|
||||||
realReq, err := http.NewRequest("GET", referer, nil)
|
realReq, err := http.NewRequest("GET", referer, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if c != nil {
|
return sendErr()
|
||||||
c <- &goquery.Document{}
|
|
||||||
}
|
|
||||||
return &goquery.Document{}
|
|
||||||
}
|
}
|
||||||
for k, v := range headers {
|
for k, v := range headers {
|
||||||
realReq.Header.Set(k, v)
|
realReq.Header.Set(k, v)
|
||||||
@@ -167,23 +161,20 @@ func BatcaveBizMarkup(referer string, c chan *goquery.Document) *goquery.Documen
|
|||||||
|
|
||||||
realRes, err := client.Do(realReq)
|
realRes, err := client.Do(realReq)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if c != nil {
|
return sendErr()
|
||||||
c <- &goquery.Document{}
|
|
||||||
}
|
|
||||||
return &goquery.Document{}
|
|
||||||
}
|
}
|
||||||
defer realRes.Body.Close()
|
defer realRes.Body.Close()
|
||||||
|
|
||||||
doc, err := goquery.NewDocumentFromReader(realRes.Body)
|
doc, err := goquery.NewDocumentFromReader(realRes.Body)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if c != nil {
|
return sendErr()
|
||||||
c <- &goquery.Document{}
|
|
||||||
}
|
|
||||||
return &goquery.Document{}
|
|
||||||
}
|
}
|
||||||
if c != nil {
|
if c != nil {
|
||||||
c <- doc
|
c <- doc
|
||||||
}
|
}
|
||||||
|
if clientChan != nil {
|
||||||
|
clientChan <- client
|
||||||
|
}
|
||||||
return doc
|
return doc
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -228,6 +219,34 @@ func ParseReadAllComicsLinks(markup *goquery.Document, c chan []string) ([]strin
|
|||||||
return links, ImageParseError{Message: "No images found", Code: 1}
|
return links, ImageParseError{Message: "No images found", Code: 1}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ParseBatcaveBizTitle extracts the chapter title from the __DATA__.chapters array
|
||||||
|
// by matching the chapter id to the last path segment of the provided URL.
|
||||||
|
func ParseBatcaveBizTitle(markup *goquery.Document, chapterURL string) string {
|
||||||
|
slug := strings.TrimRight(chapterURL, "/")
|
||||||
|
if i := strings.LastIndex(slug, "/"); i >= 0 {
|
||||||
|
slug = slug[i+1:]
|
||||||
|
}
|
||||||
|
|
||||||
|
var title string
|
||||||
|
markup.Find("script").Each(func(_ int, s *goquery.Selection) {
|
||||||
|
if title != "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
text := s.Text()
|
||||||
|
if !strings.Contains(text, "__DATA__") {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
chapterRegex := regexp.MustCompile(`"id"\s*:\s*` + regexp.QuoteMeta(slug) + `[^}]*?"title"\s*:\s*"([^"]+)"`)
|
||||||
|
m := chapterRegex.FindStringSubmatch(text)
|
||||||
|
if len(m) >= 2 {
|
||||||
|
title = strings.ReplaceAll(m[1], `\/`, "/")
|
||||||
|
title = strings.ReplaceAll(title, "Issue #", "")
|
||||||
|
title = strings.ReplaceAll(title, "#", "")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
return title
|
||||||
|
}
|
||||||
|
|
||||||
// ParseBatcaveBizImageLinks extracts image URLs from the __DATA__.images JavaScript
|
// ParseBatcaveBizImageLinks extracts image URLs from the __DATA__.images JavaScript
|
||||||
// variable embedded in a batcave.biz page.
|
// variable embedded in a batcave.biz page.
|
||||||
func ParseBatcaveBizImageLinks(markup *goquery.Document, c chan []string) ([]string, error) {
|
func ParseBatcaveBizImageLinks(markup *goquery.Document, c chan []string) ([]string, error) {
|
||||||
@@ -248,7 +267,7 @@ func ParseBatcaveBizImageLinks(markup *goquery.Document, c chan []string) ([]str
|
|||||||
urlRegex := regexp.MustCompile(`"([^"]+)"`)
|
urlRegex := regexp.MustCompile(`"([^"]+)"`)
|
||||||
for _, m := range urlRegex.FindAllStringSubmatch(arrayMatch[1], -1) {
|
for _, m := range urlRegex.FindAllStringSubmatch(arrayMatch[1], -1) {
|
||||||
if len(m) >= 2 {
|
if len(m) >= 2 {
|
||||||
links = append(links, m[1])
|
links = append(links, strings.ReplaceAll(m[1], `\/`, "/"))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -24,6 +24,15 @@ func TestParseBatcaveBizImageLinks(t *testing.T) {
|
|||||||
expectErr: false,
|
expectErr: false,
|
||||||
expectURLs: []string{"https://cdn.batcave.biz/img/001.jpg", "https://cdn.batcave.biz/img/002.jpg"},
|
expectURLs: []string{"https://cdn.batcave.biz/img/001.jpg", "https://cdn.batcave.biz/img/002.jpg"},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "unescapes forward slashes in URLs",
|
||||||
|
html: `<html><body><script>
|
||||||
|
var __DATA__ = {"images":["https:\/\/cdn.batcave.biz\/img\/001.jpg"]};
|
||||||
|
</script></body></html>`,
|
||||||
|
expectCount: 1,
|
||||||
|
expectErr: false,
|
||||||
|
expectURLs: []string{"https://cdn.batcave.biz/img/001.jpg"},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "extracts images with spaces around colon and bracket",
|
name: "extracts images with spaces around colon and bracket",
|
||||||
html: `<html><body><script>
|
html: `<html><body><script>
|
||||||
|
|||||||
Reference in New Issue
Block a user