-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscrape.js
More file actions
80 lines (69 loc) · 3.29 KB
/
scrape.js
File metadata and controls
80 lines (69 loc) · 3.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
const puppeteer = require('puppeteer');
const fs = require('fs-extra');
const date = require('./date');
(async () => {
try{
const extractAll = async (url, userId) => {
console.log(url);
await page.goto(url);//Current page
const darkRows = await page.$$('.rowBg1');
const greyRows = await page.$$('.rowBg2');//because darkRows come first, don't need to waitForSelector
var combinedRows = new Array();//combines grayRows and darkRows to where rows alternate
for(var i = 0; i < darkRows.length; i++){
combinedRows.push(darkRows[i]);
combinedRows.push(greyRows[i]);
}
let currRowArr = [];
for(const row of combinedRows){
if(row === undefined)//base case for recursion if a row is undefined
return;
const ps = await row.$$('p');//grabs all the paragraph tags in this row
for(const p of ps){//finds the innerText of the paragraph and adds it to currRowArr
const currText = await page.evaluate(p => p.innerText.replace(/\r?\n/g, ''), p);
currRowArr.push(currText);
}
//adds contents to out.csv. currRowArr can only be length 8.
await fs.appendFile(fileName, `"${currRowArr[0]}","${currRowArr[1]}","${currRowArr[2]}","${currRowArr[3]}","${currRowArr[4]}","${currRowArr[5]}","${currRowArr[6]}","${currRowArr[7]}"\n`);
currRowArr = [];//resets array;
}
//Checks if darkRows or greyRows don't equal 10. If either is true base case is reached
if(darkRows.length !== 10 || greyRows.length !== 10)
return;
else{//if base case is not reached go to next page in match history
const nextPageNumber = parseInt(url.match(/&fb=&p=(\d+)&fb=$/)[1], 10) + 1;
const nextUrl = `https://www.poxnora.com/account/matchhistory.do?&i=${userId}&fb=&p=${nextPageNumber}&fb=`;
return await extractAll(nextUrl, userId);
}
};
if(process.argv.length != 3)
throw new Error("Only one argument(user name) can be passed to this script!");
let user = process.argv[2];
const fileName = `${user}${date.getCurrDate()}.csv`;
//set headers
await fs.writeFile(fileName, 'Win,Loss,Type,Ranked,Map,Rating,Date,Duration\n');
const browser = await puppeteer.launch({ headless: true });
const page = (await browser.pages())[0];
//To view match history, it requires login and a submit button click
await page.goto('https://www.poxnora.com/security/login.do');
await page.type("input[name='username']", "emf2", {delay: 100});
await page.type("input[name='password']", "7894561230", {delay: 100});
await Promise.all([
page.waitForNavigation(),
page.click("button[type='submit']")
]);
//Navigate to user profile
await page.goto(`https://www.poxnora.com/users/viewprofile.do?u=${user}`);
//Click game history button
await Promise.all([
page.waitForNavigation(),
page.click("img[src='/_themes/global/btn_gamehistory.jpg']")
]);
let userId = page.url().match(/i=(\d+)&fb=/)[1];
//Initial call of the recursive function with initial url at page 0
await extractAll(`https://www.poxnora.com/account/matchhistory.do?&i=${userId}&fb=&p=0&fb=`, userId);
//await page.screenshot({path: 'example.png'});
await browser.close();
}catch(e){
console.log("our error", e)
}
})();