-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathepub-linkchecker.go
87 lines (69 loc) · 1.38 KB
/
epub-linkchecker.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
package main
import (
"archive/zip"
"fmt"
"log"
"net/http"
"os"
"strings"
"sync"
"time"
"github.com/PuerkitoBio/goquery"
"github.com/campoy/unique"
)
var urls []string
var wg sync.WaitGroup
func URLCheck(urls []string) {
statuses := make(chan string)
wg.Add(len(urls))
for _, url := range urls {
go func(url string) {
defer wg.Done()
client := http.Client{
Timeout: time.Duration(30 * time.Second),
}
resp, err := client.Get(url)
if err != nil {
statuses <- fmt.Sprintf("Unreachable\t%s", url)
} else {
statuses <- fmt.Sprintf("%s\t%s", resp.Status, url)
}
}(url)
}
go func() {
for status := range statuses {
fmt.Println(status)
}
}()
wg.Wait()
}
func main() {
if len(os.Args) < 2 {
fmt.Println("USAGE: epub-linkchecker {file.epub}")
os.Exit(1)
}
r, err := zip.OpenReader(os.Args[1])
if err != nil {
log.Fatal(err)
}
defer r.Close()
for _, f := range r.File {
if strings.HasSuffix(f.Name, "html") {
page, err := f.Open()
defer page.Close()
doc, err := goquery.NewDocumentFromReader(page)
if err != nil {
log.Fatal(err)
}
doc.Find("a").Each(func(i int, s *goquery.Selection) {
href, _ := s.Attr("href")
if strings.HasPrefix(href, "http") {
urls = append(urls, href)
}
})
}
}
less := func(i, j int) bool { return urls[i] < urls[j] }
unique.Slice(&urls, less)
URLCheck(urls)
}