-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathscraper.go
194 lines (161 loc) · 4.29 KB
/
scraper.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
package scraper
import (
"context"
"fmt"
"hash/fnv"
"io"
"io/ioutil"
"log"
"net/http"
"net/url"
"sync"
"time"
"github.com/arriqaaq/boomerang"
"github.com/pkg/errors"
)
// TargetHealth describes the health state of a target.
type TargetHealth float64
// The possible health states of a target based on the last performed scrape.
const (
HealthUnknown TargetHealth = -1
HealthGood TargetHealth = 1
HealthBad TargetHealth = 0
)
// A scraper retrieves samples and accepts a status report at the end.
type scraper interface {
scrape(ctx context.Context) error
report(start time.Time, dur time.Duration, err error)
offset(interval time.Duration, jitterSeed uint64) time.Duration
url() *url.URL
}
// Store provides appends against a storage.
type Store interface {
// Add adds a target response for the given target.
Add(url *url.URL, health TargetHealth, duration time.Duration) error
// Commit commits the entries and clears the store. This should be called when all the entries are committed/reported.
Commit() []TargetResponse
}
// targetScraper implements the scraper interface for a target.
type targetScraper struct {
*Target
client *boomerang.HttpClient
req *http.Request
timeout time.Duration
}
// URL returns the target's URL.
func (s *targetScraper) url() *url.URL {
return s.URL()
}
func (s *targetScraper) scrape(ctx context.Context) error {
if s.req == nil {
req, err := http.NewRequest("GET", s.URL().String(), nil)
if err != nil {
return err
}
req.Header.Set("X-Scrape-Timeout-Seconds", fmt.Sprintf("%f", s.timeout.Seconds()))
s.req = req
}
resp, err := s.client.Do(s.req.WithContext(ctx))
if err != nil {
return err
}
defer func() {
io.Copy(ioutil.Discard, resp.Body)
resp.Body.Close()
}()
if resp.StatusCode != http.StatusOK {
return errors.Errorf("server returned HTTP status %s", resp.Status)
}
return nil
}
// NewStorage creates a storage for storing target responses.
func NewStorage(chSize int) Store {
n := new(Storage)
n.chSize = chSize
n.rws = make([]TargetResponse, 0, chSize)
return n
}
// Storage represents all the remote read and write endpoints
type Storage struct {
mtx sync.Mutex
rws []TargetResponse
chSize int
}
// Add implements Store.
func (t *Storage) Add(url *url.URL, health TargetHealth, duration time.Duration) error {
t.mtx.Lock()
defer t.mtx.Unlock()
report := TargetResponse{
URL: url,
Status: health,
ResponseTime: duration,
}
log.Println("report: ", report)
t.rws = append(t.rws, report)
return nil
}
// Commit implements Store.
func (t *Storage) Commit() []TargetResponse {
t.mtx.Lock()
resp := t.rws
t.rws = make([]TargetResponse, 0, t.chSize)
t.mtx.Unlock()
return resp
}
// TargetResponse refers to the query response from the target
type TargetResponse struct {
URL *url.URL `json:"url"`
Status TargetHealth `json:"status"`
ResponseTime time.Duration `json:"response_time"`
}
// Target refers to a singular HTTP or HTTPS endpoint.
type Target struct {
lastError error
lastScrape time.Time
lastScrapeDuration time.Duration
health TargetHealth
url *url.URL
}
// NewTarget creates a target for querying.
func NewTarget(url *url.URL) *Target {
return &Target{
health: HealthUnknown,
url: url,
}
}
// URL returns the target's URL.
func (t *Target) URL() *url.URL {
return t.url
}
// hash returns an identifying hash for the target.
func (t *Target) hash() uint64 {
h := fnv.New64a()
//nolint: errcheck
h.Write([]byte(t.URL().String()))
return h.Sum64()
}
// offset returns the time until the next scrape cycle for the target.
func (t *Target) offset(interval time.Duration, jitterSeed uint64) time.Duration {
now := time.Now().UnixNano()
// Base is a pinned to absolute time, no matter how often offset is called.
var (
base = int64(interval) - now%int64(interval)
offset = (t.hash() ^ jitterSeed) % uint64(interval)
next = base + int64(offset)
)
if next > int64(interval) {
next -= int64(interval)
}
return time.Duration(next)
}
// report sets target data about the last scrape.
func (t *Target) report(start time.Time, dur time.Duration, err error) {
if err == nil {
t.health = HealthGood
} else {
t.health = HealthBad
}
t.lastError = err
t.lastScrape = start
t.lastScrapeDuration = dur
}