fix: extract title from h1 or URL slug when page title starts with #

When readallcomics.com pages have a <title> containing only the issue number (e.g. '#018 (2026)'), fall back to the h1 element first, then derive the title from the URL slug by stripping the trailing year and title-casing the hyphen-separated segments. Closes #4
2026-03-11 18:13:14 -04:00
parent a7c3b632a5
commit dcb41deea9
9 changed files with 950 additions and 13 deletions
--- a/comic/cleanup_test.go
+++ b/comic/cleanup_test.go
@@ -0,0 +1,93 @@
+package comic
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+func TestCleanup(t *testing.T) {
+	t.Run("keeps cover image 001 and removes others", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		title := "TestComic"
+		comicDir := filepath.Join(tmpDir, title)
+		os.MkdirAll(comicDir, os.ModePerm)
+
+		files := map[string]bool{
+			"TestComic 001.jpg": true,  // should be kept
+			"TestComic 002.jpg": false, // should be removed
+			"TestComic 003.jpg": false, // should be removed
+		}
+
+		for name := range files {
+			os.WriteFile(filepath.Join(comicDir, name), []byte("fake"), 0644)
+		}
+
+		c := &Comic{
+			Title:       title,
+			LibraryPath: tmpDir,
+		}
+
+		err := c.Cleanup()
+		if err != nil {
+			t.Fatalf("Cleanup() unexpected error: %v", err)
+		}
+
+		for name, shouldExist := range files {
+			path := filepath.Join(comicDir, name)
+			_, err := os.Stat(path)
+			exists := !os.IsNotExist(err)
+
+			if shouldExist && !exists {
+				t.Errorf("expected %s to be kept, but it was removed", name)
+			}
+			if !shouldExist && exists {
+				t.Errorf("expected %s to be removed, but it still exists", name)
+			}
+		}
+	})
+
+	t.Run("keeps non-image files", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		title := "TestComic"
+		comicDir := filepath.Join(tmpDir, title)
+		os.MkdirAll(comicDir, os.ModePerm)
+
+		os.WriteFile(filepath.Join(comicDir, "TestComic.cbz"), []byte("archive"), 0644)
+		os.WriteFile(filepath.Join(comicDir, "metadata.json"), []byte("data"), 0644)
+
+		c := &Comic{
+			Title:       title,
+			LibraryPath: tmpDir,
+		}
+
+		err := c.Cleanup()
+		if err != nil {
+			t.Fatalf("Cleanup() unexpected error: %v", err)
+		}
+
+		for _, name := range []string{"TestComic.cbz", "metadata.json"} {
+			path := filepath.Join(comicDir, name)
+			if _, err := os.Stat(path); os.IsNotExist(err) {
+				t.Errorf("expected non-image file %s to be kept", name)
+			}
+		}
+	})
+
+	t.Run("handles empty directory", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		title := "EmptyComic"
+		comicDir := filepath.Join(tmpDir, title)
+		os.MkdirAll(comicDir, os.ModePerm)
+
+		c := &Comic{
+			Title:       title,
+			LibraryPath: tmpDir,
+		}
+
+		err := c.Cleanup()
+		if err != nil {
+			t.Fatalf("Cleanup() unexpected error for empty dir: %v", err)
+		}
+	})
+}