Skip to content

Commit

Permalink
fix link and h2 text lost
Browse files Browse the repository at this point in the history
  • Loading branch information
wedojava committed Sep 30, 2020
1 parent 36fb1b3 commit 4852a29
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 5 deletions.
5 changes: 3 additions & 2 deletions internal/fetcher/sites/cna/cna.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ func cna(p *Post) (string, error) {
return "", errors.New("[-] There is no element class is paragraph` from: " + p.URL.String())
}
n := nodes[0]
plist := htmldoc.ElementsByTag(n, "p")
plist := htmldoc.ElementsByTag(n, "h2", "p")
for _, v := range plist {
if v.FirstChild != nil {
body += v.FirstChild.Data + " \n"
Expand All @@ -143,8 +143,9 @@ func cna(p *Post) (string, error) {

body = strings.ReplaceAll(body, "「", "“")
body = strings.ReplaceAll(body, "」", "”")
body = strings.ReplaceAll(body, "</a>", "")

re := regexp.MustCompile(`<a.*?</a>`)
re := regexp.MustCompile(`<a.*?>`)
body = re.ReplaceAllString(body, "")
re = regexp.MustCompile(`<iframe.*?</iframe>`)
body = re.ReplaceAllString(body, "")
Expand Down
6 changes: 3 additions & 3 deletions internal/fetcher/sites/cna/cna_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import (
"github.com/wedojava/fetcher/internal/htmldoc"
)

var p = PostFactory("https://www.cna.com.tw/news/firstnews/202009295001.aspx")
var p = PostFactory("https://www.cna.com.tw/news/aopl/202009300058.aspx")

func PostFactory(rawurl string) *Post {
url, err := url.Parse(rawurl)
Expand All @@ -32,7 +32,7 @@ func TestSetDate(t *testing.T) {
if err := setDate(p); err != nil {
t.Errorf("test SetPost err: %v", doc)
}
want := "2020-09-29T11:49:00+08:00"
want := "2020-09-30T10:54:00+08:00"
if p.Date != want {
t.Errorf("\ngot: %v\nwant: %v", p.Date, want)
}
Expand All @@ -47,7 +47,7 @@ func TestSetTitle(t *testing.T) {
if err := setTitle(p); err != nil {
t.Errorf("test SetPost err: %v", err)
}
want := "早安世界》安心旅遊補助續辦至10月底 中秋雙十連假可用 | 生活 | 重點新聞"
want := "被爆10年沒繳稅 川普:避稅計畫展現我的才智 | 國際"
if p.Title != want {
t.Errorf("\ngot: %v\nwant: %v", p.Title, want)
}
Expand Down

0 comments on commit 4852a29

Please # to comment.