-
Notifications
You must be signed in to change notification settings - Fork 0
/
load_answers.js
97 lines (80 loc) · 2.99 KB
/
load_answers.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
const BigQuery = require('@google-cloud/bigquery');
var mysql = require('promise-mysql');
// const json2csv = require('json2csv');
const Promise = require('bluebird');
var inputDate = process.argv[2];
// Google Cloud Platform project ID
const projectId = 'utility-ratio-190419';
var DBconn = null;
var con = mysql.createConnection({
host: "localhost",
user: "admin",
password: "password",
database: 'stackNetwork'
}).then(function(conn) {
console.log("Connected");
DBconn = conn;
DBconn.query('DELETE FROM posts_answers').then(function(){
queryStackOverflow(projectId, q1);
});
}).catch(function(err) {
console.error("Error connection to database");
console.log(err);
});
// Creates a client
function queryStackOverflow(projectId, sqlQuery) {
// Imports the Google Cloud client library
const BigQuery = require('@google-cloud/bigquery');
const bigquery = new BigQuery({
projectId: projectId,
});
// Query options list: https://cloud.google.com/bigquery/docs/reference/v2/jobs/query
const options = {
query: sqlQuery,
useLegacySql: false, // Use standard SQL syntax for queries.
};
// Runs the query
bigquery
.query(options)
.then(results => {
const rows = results[0];
// var csv = json2csv({ data: rows, fields: fields });
// fs.writeFile('file.csv', csv, function(err) {
// if (err) throw err;
// console.log('file saved');
// });
var promises = [];
var values = [];
for (var i = 0; i < rows.length; i++)
values.push([rows[i].id, rows[i].location]);
//Since there can be huge number of rows. We split it into multiple batches
var noOfRecords = Math.floor(values.length / 10000);
for (i = 0; i <= noOfRecords; i++) {
var tempRecords = values.splice(0, 10000);
saveToDB(tempRecords, promises);
}
//When all queries are run
Promise.all(promises).then(function() {
console.log("Saved all records");
//Close the connection
DBconn.end();
process.exit(1);
}).catch(function() {
DBconn.end();
console.error("Error saving to DB");
})
}).catch(err => {
console.error('ERROR:', err);
});
}
var q1 = `SELECT
p.id, u.location
FROM \`bigquery-public-data.stackoverflow.posts_answers\` p
JOIN \`bigquery-public-data.stackoverflow.users\` u ON p.owner_user_id = u.id
WHERE p.creation_date > '`+inputDate+`' and u.location IS NOT NULL and u.location!=''`;
//var q2 = `DESCRIBE bigquery-public-data.stackoverflow.posts_questions`;
//queryStackOverflow(projectId, q1);
//queryStackOverflow(projectId, q2);
function saveToDB(records, promises) {
promises.push(DBconn.query('INSERT INTO posts_answers (id, Location) VALUES ?', [records]));
};