Files
yoink-go/comic/comic_test.go
Bryan Bailey dcb41deea9 fix: extract title from h1 or URL slug when page title starts with #
When readallcomics.com pages have a <title> containing only the issue
number (e.g. '#018 (2026)'), fall back to the h1 element first, then
derive the title from the URL slug by stripping the trailing year and
title-casing the hyphen-separated segments.

Closes #4
2026-03-11 18:13:14 -04:00

171 lines
4.6 KiB
Go

package comic
import (
"strings"
"testing"
"github.com/PuerkitoBio/goquery"
)
func newDocFromHTML(html string) *goquery.Document {
doc, _ := goquery.NewDocumentFromReader(strings.NewReader(html))
return doc
}
func TestExtractTitleFromMarkup(t *testing.T) {
tests := []struct {
name string
html string
url string
expected string
}{
{
name: "standard title with year",
html: `<html><head><title>Ultraman X Avengers 001 (2024)</title></head></html>`,
expected: "Ultraman X Avengers 001",
},
{
name: "title with year and extra text",
html: `<html><head><title>Batman 042 (2023 Digital)</title></head></html>`,
expected: "Batman 042",
},
{
name: "title with colon removed",
html: `<html><head><title>Spider-Man: No Way Home 001 (2022)</title></head></html>`,
expected: "Spider-Man No Way Home 001",
},
{
name: "no title tag",
html: `<html><head></head></html>`,
expected: "Untitled",
},
{
name: "title without year pattern",
html: `<html><head><title>Some Random Page</title></head></html>`,
expected: "Untitled",
},
{
name: "empty title",
html: `<html><head><title></title></head></html>`,
expected: "Untitled",
},
{
name: "title starts with # falls back to h1",
html: `<html><head><title>#018 (2026)</title></head><body><h1>Absolute Batman #018 (2026)</h1></body></html>`,
expected: "Absolute Batman #018",
},
{
name: "title starts with # but h1 also starts with #, falls back to slug",
html: `<html><head><title>#018 (2026)</title></head><body><h1>#018 (2026)</h1></body></html>`,
url: "https://readallcomics.com/absolute-batman-018-2026/",
expected: "Absolute Batman 018",
},
{
name: "title starts with # falls back to slug when no h1",
html: `<html><head><title>#018 (2026)</title></head></html>`,
url: "https://readallcomics.com/absolute-batman-018-2026/",
expected: "Absolute Batman 018",
},
{
name: "title starts with # no h1 no url",
html: `<html><head><title>#018 (2026)</title></head></html>`,
expected: "#018",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
doc := newDocFromHTML(tt.html)
c := Comic{Markup: doc, URL: tt.url}
result := extractTitleFromMarkup(c)
if result != tt.expected {
t.Errorf("extractTitleFromMarkup() = %q, want %q", result, tt.expected)
}
})
}
}
func TestTitleFromSlug(t *testing.T) {
tests := []struct {
name string
url string
expected string
}{
{
name: "standard comic URL",
url: "https://readallcomics.com/absolute-batman-018-2026/",
expected: "Absolute Batman 018",
},
{
name: "no trailing slash",
url: "https://readallcomics.com/absolute-batman-018-2026",
expected: "Absolute Batman 018",
},
{
name: "no year in slug",
url: "https://readallcomics.com/absolute-batman-018/",
expected: "Absolute Batman 018",
},
{
name: "single word slug",
url: "https://readallcomics.com/batman/",
expected: "Batman",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := titleFromSlug(tt.url)
if result != tt.expected {
t.Errorf("titleFromSlug() = %q, want %q", result, tt.expected)
}
})
}
}
func TestCover(t *testing.T) {
tests := []struct {
name string
filelist []string
wantSuffix string
expectErr bool
}{
{
name: "finds cover ending in 001.jpg",
filelist: []string{"https://example.com/image-002.jpg", "https://example.com/image-001.jpg", "https://example.com/image-003.jpg"},
wantSuffix: "image-001.jpg",
},
{
name: "finds cover ending in 000.jpg",
filelist: []string{"https://example.com/image-000.jpg", "https://example.com/image-001.jpg"},
wantSuffix: "image-000.jpg",
},
{
name: "returns error when no cover found",
filelist: []string{"https://example.com/image-002.jpg", "https://example.com/image-003.jpg"},
expectErr: true,
},
{
name: "returns error for empty filelist",
filelist: []string{},
expectErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
c := &Comic{Filelist: tt.filelist}
cover, err := c.Cover()
if tt.expectErr && err == nil {
t.Error("Cover() expected error, got nil")
}
if !tt.expectErr && err != nil {
t.Errorf("Cover() unexpected error: %v", err)
}
if tt.wantSuffix != "" && !strings.HasSuffix(cover, tt.wantSuffix) {
t.Errorf("Cover() = %q, want path ending in %q", cover, tt.wantSuffix)
}
})
}
}