This tutorial will look at utilizing Puppeteer to automate Table Data Scraping into JSON. An open-source Node library called Puppeteer offers a high-level API for managing browsers based on Chrome or Chromium via the DevTools Protocol. With Puppeteer, you can automate any task that you can carry out using a Chrome browser. A puppeteer is thus a perfect tool for test automation and site scraping. Everything you need to know about using Puppeteer to automate form submission will be covered in this post. We will talk about it.
- Automated Table Data Scraping
- Using Puppeteer selectors to target table data
const puppeteer = require("puppeteer");
const fs = require("fs");
async function scraper (option){
try {
//browser initiated
let browser = await puppeteer.launch({
args: ["--start-maximized"],
headless: false,
});
// opens a new blank page
let page = await browser.newPage();
// resize the browser
await page.setViewport({ width: 1366, height: 768 });
// navigate to url and wait until page loads completely
await page.goto(`https://www.publicsurplus.com/sms/ufl,fl/list/current?slth=&sma=&orgid=1239&sorg=&ctxnId=341800115&page=${option.code}&sortBy=timeLeft`, {
waitUntil: "networkidle0",
});
//wait for the page to load
await page.waitForSelector(".tabCurr");
const content = await page.evaluate(() => {
const tbody = document.querySelector(
".tabCurr tbody",
);
// iterate through the table rows
const trs = Array.from(
tbody.querySelectorAll("tr"),
);
const content = [];
// iterate through each row of table
for (const tr of trs) {
const tds = Array.from(tr.querySelectorAll("td"));
const data = tds.map((td) => td.innerText);
if (tds.length >= 5) {
// push the data
content.push({
auction: data[0],
title: data[1],
image: data[2],
time_left: data[3],
current_price: data[4],
notes: data[5],
});
}
}
return content;
});
// writes data into JSON file
fs.writeFile(
`./Data/Data-${option.index}.json`,
JSON.stringify(content, null, 2),
(err) => {
if (err) {
console.log(err);
} else {
console.log(`Data of Page-${option.index} Scraped`);
}
},
);
await browser.close();
} catch (e) {
console.log(e);
}
}
let main = async () => {
// Loop through the page
// increase the iteration with the number of pages to scrape
for (let i = 0; i <= 1; i++) {
await Promise.all([
scraper({
code: i,
index: i,
}),
]);
}
};
main()
.then(() => {
console.log("Scraping Completed!");
})
.catch((e) => {
console.log(`Failed due to exception - ${e}`);
});
I have commented on each and every line that is important. If you want further assistance contact me.
Happy coding …








































