Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Initial SDK #2

Merged
merged 27 commits into from
Jan 3, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
6bb1b50
init commit
maaz-munir Nov 23, 2023
62e861a
some improvements + added bing and baidu
maaz-munir Nov 24, 2023
7ada257
Apply suggestions from code review
maaz-munir Nov 28, 2023
37300b9
added google_search + adjusted or multiple return types
maaz-munir Dec 1, 2023
6e66508
added google source and check for empty url
maaz-munir Dec 1, 2023
fb3c840
Apply suggestions from code review v2
maaz-munir Dec 7, 2023
e72bf48
added remaining google serp sources
maaz-munir Dec 7, 2023
2ffd8e4
comments + some more checks
maaz-munir Dec 8, 2023
f2f2749
check for async runtime models
maaz-munir Dec 11, 2023
14e3d24
Apply suggestions from code review v3 + yandex
maaz-munir Dec 12, 2023
f7dd301
bing and baidu async models + some improvements
maaz-munir Dec 12, 2023
905ed7c
2 google funcs + better error handling with channels
maaz-munir Dec 12, 2023
1420df5
rest of google sources for async polling model
maaz-munir Dec 12, 2023
d14237b
parse checks in google_async + some comment fixes
maaz-munir Dec 16, 2023
d3c3c64
proxy endpoint integration method
maaz-munir Dec 16, 2023
ace829f
send custom headers with proxy endpoint
maaz-munir Dec 16, 2023
0d54a22
make GeoLocation param a ptr
maaz-munir Dec 18, 2023
1a9a06b
refactor async functions
maaz-munir Dec 18, 2023
179164d
update creating payload in google_search funcs
maaz-munir Dec 19, 2023
ce1b8ce
update public func comments
maaz-munir Dec 19, 2023
8fda0b6
Apply suggestions from code review v4
maaz-munir Dec 20, 2023
608084f
update readme
maaz-munir Dec 20, 2023
c689f01
comment
maaz-munir Dec 20, 2023
9270fd9
comments + spelling fixes
maaz-munir Dec 20, 2023
8a4b9c8
update readme
maaz-munir Dec 20, 2023
fe7dda5
update readme
maaz-munir Dec 21, 2023
c250f36
fmt
maaz-munir Dec 21, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions serp/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,18 @@ func Init(
}
}

type SerpClientAsync struct {
HttpClient *http.Client
ApiCredentials *ApiCredentials
BaseUrl string
}

// Init for Async runtime model.
func InitAsync(
username string,
password string,
) *SerpClient {
return &SerpClient{
) *SerpClientAsync {
return &SerpClientAsync{
ApiCredentials: &ApiCredentials{
Username: username,
Password: password,
Expand Down
131 changes: 131 additions & 0 deletions serp/yandex_async.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
package serp

import (
"bytes"
"encoding/json"
"fmt"
"io"
"net/http"
"time"
)

func (c *SerpClientAsync) ScrapeYandexSearch(
query string,
opts ...*YandexSearchOpts,
) (chan *Response, error) {
ResponseChan := make(chan *Response)
maaz-munir marked this conversation as resolved.
Show resolved Hide resolved

opt := &YandexSearchOpts{}
if len(opts) > 0 && opts[len(opts)-1] != nil {
opt = opts[len(opts)-1]
}

// Set defaults.
SetDefaultDomain(&opt.Domain)
SetDefaultStartPage(&opt.StartPage)
SetDefaultLimit(&opt.Limit)
SetDefaultPages(&opt.Pages)
SetDefaultUserAgent(&opt.UserAgent)

// Check validity of parameters.
err := opt.checkParameterValidity()
if err != nil {
return nil, err
}

// Prepare payload.
payload := map[string]interface{}{
"source": "yandex_search",
"domain": opt.Domain,
"query": query,
"start_page": opt.StartPage,
"pages": opt.Pages,
"limit": opt.Limit,
"locale": opt.Locale,
"geo_location": opt.GeoLocation,
"user_agent_type": opt.UserAgent,
"callback_url": opt.CallbackUrl,
}
jsonPayload, err := json.Marshal(payload)
if err != nil {
return nil, fmt.Errorf("error marshalling payload: %v", err)
}

client := &http.Client{}
request, _ := http.NewRequest(
"POST",
c.BaseUrl,
bytes.NewBuffer(jsonPayload),
)

request.Header.Add("Content-type", "application/json")
request.SetBasicAuth(c.ApiCredentials.Username, c.ApiCredentials.Password)
response, _ := client.Do(request)

responseBody, _ := io.ReadAll(response.Body)

// unmarshal into job object
job := &Job{}
json.Unmarshal(responseBody, &job)

request, _ = http.NewRequest("GET",
fmt.Sprintf("https://data.oxylabs.io/v1/queries/%s", job.ID),
nil,
)
request.Header.Add("Content-type", "application/json")
request.SetBasicAuth(c.ApiCredentials.Username, c.ApiCredentials.Password)
maaz-munir marked this conversation as resolved.
Show resolved Hide resolved

go func() {
for {
response, _ = client.Do(request)
responseBody, _ = io.ReadAll(response.Body)

json.Unmarshal(responseBody, &job)

if job.Status == "done" {
maaz-munir marked this conversation as resolved.
Show resolved Hide resolved
JobId := job.ID
request, _ = http.NewRequest("GET",
fmt.Sprintf("https://data.oxylabs.io/v1/queries/%s/results", JobId),
nil,
)

request.Header.Add("Content-type", "application/json")
request.SetBasicAuth(c.ApiCredentials.Username, c.ApiCredentials.Password)
response, _ = client.Do(request)

// Read the response body into a buffer.
responseBody, err := io.ReadAll(response.Body)
if err != nil {
err = fmt.Errorf("error reading response body: %v", err)
return
}

// Send back error message.
if response.StatusCode != 200 {
err = fmt.Errorf("error with status code %s: %s", response.Status, responseBody)
return
}

// Unmarshal the JSON object.
resp := &Response{}
if err := resp.UnmarshalJSON(responseBody); err != nil {
err = fmt.Errorf("failed to parse JSON object: %v", err)
return
}
resp.StatusCode = response.StatusCode
resp.Status = response.Status
ResponseChan <- resp
}

time.Sleep(2 * time.Second)
maaz-munir marked this conversation as resolved.
Show resolved Hide resolved
}
}()

if err != nil {
return nil, err
}

defer response.Body.Close()
maaz-munir marked this conversation as resolved.
Show resolved Hide resolved

return ResponseChan, nil
}