fix: extract title from h1 or URL slug when page title starts with #

When readallcomics.com pages have a <title> containing only the issue number (e.g. '#018 (2026)'), fall back to the h1 element first, then derive the title from the URL slug by stripping the trailing year and title-casing the hyphen-separated segments. Closes #4
2026-03-11 18:13:14 -04:00
parent a7c3b632a5
commit dcb41deea9
9 changed files with 950 additions and 13 deletions
--- a/comic/comic.go
+++ b/comic/comic.go
@@ -26,21 +26,52 @@ type Comic struct {
 // Returns the extracted title as a string.
 func extractTitleFromMarkup(c Comic) string {
 	yearFormat := `^(.*?)\s+\(\d{4}(?:\s+.+)?\)`
-	selection := c.Markup.Find("title")
-
-	if selection.Length() == 0 {
-		return "Untitled"
-	}
-
-	content := selection.First().Text()
 	regex := regexp.MustCompile(yearFormat)
-	matches := regex.FindStringSubmatch(content)

-	if len(matches) != 2 {
-		return "Untitled"
+	extractFrom := func(text string) string {
+		matches := regex.FindStringSubmatch(text)
+		if len(matches) != 2 {
+			return ""
+		}
+		return strings.ReplaceAll(matches[1], ":", "")
 	}

-	return strings.ReplaceAll(matches[1], ":", "")
+	title := extractFrom(c.Markup.Find("title").First().Text())
+
+	if strings.HasPrefix(title, "#") {
+		if h1 := extractFrom(c.Markup.Find("h1").First().Text()); h1 != "" && !strings.HasPrefix(h1, "#") {
+			return h1
+		}
+		if slug := titleFromSlug(c.URL); slug != "" {
+			return slug
+		}
+	}
+
+	if title != "" {
+		return title
+	}
+
+	return "Untitled"
+}
+
+// titleFromSlug derives a comic title from the last path segment of a URL.
+// It strips a trailing year (-YYYY), replaces hyphens with spaces, and title-cases the result.
+func titleFromSlug(url string) string {
+	slug := strings.TrimRight(url, "/")
+	if i := strings.LastIndex(slug, "/"); i >= 0 {
+		slug = slug[i+1:]
+	}
+	slug = regexp.MustCompile(`-\d{4}$`).ReplaceAllString(slug, "")
+	if slug == "" {
+		return ""
+	}
+	words := strings.Split(slug, "-")
+	for i, w := range words {
+		if len(w) > 0 {
+			words[i] = strings.ToUpper(w[:1]) + w[1:]
+		}
+	}
+	return strings.Join(words, " ")
 }

 // NewComic creates a new Comic instance from the provided URL and library path.
@@ -61,13 +92,21 @@ func NewComic(
 		LibraryPath: libraryPath,
 	}

-	go Markup(c.URL, markupChannel)
+	if strings.Contains(url, "batcave.biz") {
+		go BatcaveBizMarkup(url, markupChannel)
+	} else {
+		go Markup(url, markupChannel)
+	}

 	markup := <-markupChannel
 	c.Markup = markup
 	c.Title = extractTitleFromMarkup(*c)

-	go ParseImageLinks(markup, imageChannel)
+	if strings.Contains(url, "batcave.biz") {
+		go ParseBatcaveBizImageLinks(markup, imageChannel)
+	} else {
+		go ParseImageLinks(markup, imageChannel)
+	}
 	links := <-imageChannel

 	c.Filelist = links