Created
December 10, 2019 23:14
-
-
Save speeddragon/a2193916951fa31050f629fd9c87c4c7 to your computer and use it in GitHub Desktop.
Facebook Image Crawler for Puppeteer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Facebook Image Crawler | |
*/ | |
const puppeteer = require("puppeteer"); | |
var https = require("https"); | |
/** | |
* Get the biggest image possivel from FBID. | |
* @param {string} fbid | |
* @param {string} fb_dtsg | |
*/ | |
function request_big_image_link(fbid, fb_dtsg) { | |
var body = "__a=1&fb_dtsg=" + fb_dtsg; | |
var cookie = "..."; | |
var postRequest = { | |
host: "www.facebook.com", | |
path: "/ajax/photos/snowlift/menu/?fbid=" + fbid, | |
port: 443, | |
method: "POST", | |
headers: { | |
Cookie: cookie, | |
"Content-Type": "application/x-www-form-urlencoded", | |
"Content-Length": Buffer.byteLength(body) | |
} | |
}; | |
var buffer = ""; | |
var req = https.request(postRequest, function(res) { | |
console.log(res.statusCode); | |
var buffer = ""; | |
res.on("data", function(data) { | |
buffer = buffer + data; | |
}); | |
res.on("end", function(data) { | |
console.log(buffer); | |
}); | |
}); | |
req.on("error", function(e) { | |
console.log("problem with request: " + e.message); | |
}); | |
req.write(body); | |
req.end(); | |
} | |
(async () => { | |
var options = process.argv.reduce((acc, item) => { | |
let keyvalue = item.split("="); | |
if (keyvalue.length == 2) { | |
let key = keyvalue[0].replace("--", ""); | |
let value = keyvalue[1]; | |
acc[key] = value; | |
} | |
return acc; | |
}, {}); | |
if (options.url === undefined) { | |
console.log("Missing --url parameter"); | |
exit(null, 1); | |
} else { | |
const browser = await puppeteer.launch({headless: true, args: ['--no-sandbox']}); | |
const page = await browser.newPage(); | |
const viewPort = { width: 1280, height: 960 }; | |
await page.setViewport(viewPort); | |
await page.goto(options.url, { waitUntil: "domcontentloaded", timeout: 0 }); | |
const cookies = [ | |
{ name: "act", value: "..."}, | |
{ name: "datr", value: "..."}, | |
{ name: "fr", value: "..." }, | |
{ name: "sb", value: "..."}, | |
{ name: "c_user", value: "..." }, | |
{ name: "wd", value: "..." }, | |
{ name: "xs", value: "..." } | |
]; | |
await page.setCookie(...cookies); | |
await page.reload(); | |
var articles = await page.evaluate(() => { | |
let elements = Array.from(document.querySelectorAll('[rel="theater"]')); | |
let links = elements.reduce((acc, element) => { | |
link = element.getAttribute("data-ploi"); | |
if (link != null) { | |
acc.push(link); | |
} | |
return acc; | |
}, Array()); | |
return links; | |
}); | |
articles.forEach(item => { | |
console.log(item); | |
}); | |
exit(browser); | |
} | |
})(); | |
async function exit(browser, status = 0) { | |
if (browser !== null) { | |
await browser.close(); | |
} | |
process.exit(status); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment