Skip to content

Instantly share code, notes, and snippets.

@adrianhorning08
Created August 4, 2023 23:17
Show Gist options
  • Save adrianhorning08/dd72c19670b488ac5b42ec292a6d158a to your computer and use it in GitHub Desktop.
Save adrianhorning08/dd72c19670b488ac5b42ec292a6d158a to your computer and use it in GitHub Desktop.
Scrape Google Maps
import * as cheerio from "cheerio";
import puppeteerExtra from "puppeteer-extra";
import stealthPlugin from "puppeteer-extra-plugin-stealth";
import chromium from "@sparticuz/chromium";
async function searchGoogleMaps() {
try {
const start = Date.now();
puppeteerExtra.use(stealthPlugin());
const browser = await puppeteerExtra.launch({
headless: false,
// headless: "new",
// devtools: true,
executablePath: "", // your path here
});
// const browser = await puppeteerExtra.launch({
// args: chromium.args,
// defaultViewport: chromium.defaultViewport,
// executablePath: await chromium.executablePath(),
// headless: "new",
// ignoreHTTPSErrors: true,
// });
const page = await browser.newPage();
const query = "Auto repair shops austin";
try {
await page.goto(
`https://www.google.com/maps/search/${query.split(" ").join("+")}`
);
} catch (error) {
console.log("error going to page");
}
async function autoScroll(page) {
await page.evaluate(async () => {
const wrapper = document.querySelector('div[role="feed"]');
await new Promise((resolve, reject) => {
var totalHeight = 0;
var distance = 1000;
var scrollDelay = 3000;
var timer = setInterval(async () => {
var scrollHeightBefore = wrapper.scrollHeight;
wrapper.scrollBy(0, distance);
totalHeight += distance;
if (totalHeight >= scrollHeightBefore) {
totalHeight = 0;
await new Promise((resolve) => setTimeout(resolve, scrollDelay));
// Calculate scrollHeight after waiting
var scrollHeightAfter = wrapper.scrollHeight;
if (scrollHeightAfter > scrollHeightBefore) {
// More content loaded, keep scrolling
return;
} else {
// No more content loaded, stop scrolling
clearInterval(timer);
resolve();
}
}
}, 200);
});
});
}
await autoScroll(page);
const html = await page.content();
const pages = await browser.pages();
await Promise.all(pages.map((page) => page.close()));
await browser.close();
console.log("browser closed");
// get all a tag parent where a tag href includes /maps/place/
const $ = cheerio.load(html);
const aTags = $("a");
const parents = [];
aTags.each((i, el) => {
const href = $(el).attr("href");
if (!href) {
return;
}
if (href.includes("/maps/place/")) {
parents.push($(el).parent());
}
});
console.log("parents", parents.length);
const buisnesses = [];
parents.forEach((parent) => {
const url = parent.find("a").attr("href");
// get a tag where data-value="Website"
const website = parent.find('a[data-value="Website"]').attr("href");
// find a div that includes the class fontHeadlineSmall
const storeName = parent.find("div.fontHeadlineSmall").text();
// find span that includes class fontBodyMedium
const ratingText = parent
.find("span.fontBodyMedium > span")
.attr("aria-label");
// get the first div that includes the class fontBodyMedium
const bodyDiv = parent.find("div.fontBodyMedium").first();
const children = bodyDiv.children();
const lastChild = children.last();
const firstOfLast = lastChild.children().first();
const lastOfLast = lastChild.children().last();
buisnesses.push({
placeId: `ChI${url?.split("?")?.[0]?.split("ChI")?.[1]}`,
address: firstOfLast?.text()?.split("·")?.[1]?.trim(),
category: firstOfLast?.text()?.split("·")?.[0]?.trim(),
phone: lastOfLast?.text()?.split("·")?.[1]?.trim(),
googleUrl: url,
bizWebsite: website,
storeName,
ratingText,
stars: ratingText?.split("stars")?.[0]?.trim()
? Number(ratingText?.split("stars")?.[0]?.trim())
: null,
numberOfReviews: ratingText
?.split("stars")?.[1]
?.replace("Reviews", "")
?.trim()
? Number(
ratingText?.split("stars")?.[1]?.replace("Reviews", "")?.trim()
)
: null,
});
});
const end = Date.now();
console.log(`time in seconds ${Math.floor((end - start) / 1000)}`);
return buisnesses;
} catch (error) {
console.log("error at googleMaps", error.message);
}
}
@titoih
Copy link

titoih commented Nov 30, 2023

hi there!
Thanks for sharing.
What about this:

  • after scraping, save phone numbers in google contacts, as well as emails in a bulk email provider like mailchimp, to send commercial emails.

Could you contact me for talking about this?
Cheers

@muhamed-didovic
Copy link

@titoih I can help you with that, hit me up

@ChathurangaCPM
Copy link

@adrianhorning08 thanks for sharing,

Can I get images and prices for places (in a hotel search) like that? Is it possible?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment