forked from hzzlzz/crawler
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhttp_requester.c
114 lines (99 loc) · 2.91 KB
/
http_requester.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#include "http_requester.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netdb.h>
#include <unistd.h>
#include <errno.h>
#define BUFFSIZE 4096
#define handle_error(msg) \
do { perror(msg); exit(EXIT_FAILURE); } while (0)
#define handle_error_en(en, msg) \
do { errno = en; perror(msg); exit(EXIT_FAILURE); } while (0)
/* The function get_web_page() gets the web page indicated by hostname and copies to the buffer pointed by page_buff at most size bytes. The function returns the actual number of saved bytes in dest, or negative value if error occured. */
int get_web_page(const char *hostname, const char* path, char *dest, size_t size) {
int sockfd, s;
unsigned char buffer[BUFFSIZE];
struct addrinfo hints;
struct addrinfo *result, *rp;
struct timeval tv_out;
char *send_buff, *service = "http";
char *http_request = "GET /%s HTTP/1.0\r\nHost: %s\r\n\r\n";
int nbytes, cursor;
bzero(&hints, sizeof(struct addrinfo));
hints.ai_family = AF_UNSPEC; /* Allow IPv4 or IPv6 */
hints.ai_socktype = SOCK_STREAM; /* Stream socket */
hints.ai_flags = 0;
hints.ai_protocol = 0; /* Any protocol */
s = getaddrinfo(hostname, service, &hints, &result);
if(s != 0) {
fprintf(stderr, "Could not resolve [%s]: %s\n",
hostname, gai_strerror(s));
return -1;
}
for(rp = result; rp != NULL; rp = rp->ai_next) {
sockfd = socket(rp->ai_family,
rp->ai_socktype,
rp->ai_protocol);
if(sockfd == -1)
continue;
if(connect(sockfd, rp->ai_addr, rp->ai_addrlen) != -1)
break;
close(sockfd);
}
if (rp == NULL) {
fprintf(stderr, "Could not connect\n");
return -2;
}
freeaddrinfo(result);
send_buff = (char *)malloc(strlen(http_request) +
strlen(hostname) + strlen(path) + 1);
sprintf(send_buff, http_request, path, hostname);
if(send(sockfd, send_buff, strlen(send_buff), 0) < 0) {
fprintf(stderr, "Sending Error:%s\a\n",
strerror(errno));
free(send_buff);
return -2; /*sending error */
}
free(send_buff);
/* Set time out of recv, in case dead connection occurs,
* causing recv to wait forever. */
tv_out.tv_sec = 10; /* Max waiting time */
tv_out.tv_usec = 0;
s = setsockopt(sockfd, SOL_SOCKET, SO_RCVTIMEO,
&tv_out, sizeof(tv_out));
if(s != 0)
handle_error_en(errno, "setsockopt");
cursor = 0;
fprintf(stderr, "Reading From [%s%s]...\n",
hostname, path);
while(1) {
nbytes = recv(sockfd, buffer, BUFFSIZE, 0);
if(nbytes == -1) {
fprintf(stderr,"[%s%s] %s\n",
hostname, path,
strerror(errno));
break;
}
if(nbytes == 0) {
fprintf(stderr,"[%s%s] %s\n",
hostname, path,
strerror(errno));
break;
}
if(cursor + nbytes > size) {
fprintf(stderr, "[%s%s] Rest Cut\n",
hostname, path);
nbytes = size - cursor;
memcpy(dest+cursor, buffer, nbytes);
cursor += nbytes;
break;
}
memcpy(dest+cursor, buffer, nbytes);
cursor += nbytes;
}
close(sockfd);
return cursor;
}