Scraping Reddit Programmer Humor with Node.js using Puppeteer

/*
    This code comes from Vincent Lab
    And it has a video version linked here: https://www.youtube.com/watch?v=Zb--639XePw
*/

// Import dependencies
const puppeteer = require("puppeteer");
const fs = require("fs");

(async () => {

    // The number of posts. 25 would give you about 100
    const amount = 25;

    // The location / URL
    const url = "https://www.reddit.com/r/ProgrammerHumor/";

    // All of the jokes as key-value pairs
    let jokesObject = {};

    console.log("Getting posts from Reddit");

    // Create the browser
    const browser = await puppeteer.launch({
        headless: false
    });

    // Navigate to the website
    const page = await browser.newPage();
    await page.goto(url, { waitUntil: "load" });

    // Get the root element of all the posts
    const root = (await page.$$(`.rpBJOHq2PR60pnwJlUyP0`))[0];

    // All the posts
    const posts = [];

    // For amount
    for (let i = 0; i < amount; i++) {

        // Get all the posts in this chunk
        const chunk = await (await root.$$("._1poyrkZ7g36PawDueRza-J"));

        // Add all the posts in this chunk to the posts array
        posts.push(...chunk);

        // Wait for 1 second
        await sleep(1000);

        // Scroll to the next chunk
        await page.evaluate(() => {
            window.scrollBy(0, (632 * 12));
        });

    }

    console.log("Extracting jokes from posts");

    // For each post
    for (const post of posts) {

        try {

            // Get the title
            const title = await getProperty(post, "textContent", "_eYtD2XCVieq6emjKBH3m");

            // Get the image jokesObject
            const image = await getProperty(post, "src", "ImageBox-image");

            // Add the post to the jokes object
            jokesObject[title] = { image: image };

        } catch (error) {

        }

    }

    console.log("Converting jokes into an array");

    // Convert the jokes object into an array
    const jokes = [];
    for (const joke in jokesObject) {
        jokes.push({
            title: joke,
            image: jokesObject[joke].image
        })
    }

    console.log("Saving jokes");

    // Save the jokes to a file
    fs.writeFileSync("jokes.json", JSON.stringify(jokes));

    // Close the browser
    await browser.close();

})();

// Get a property on an element from within an object
async function getProperty(rootElement, property, className) {
    const element = (await rootElement.$$(`.${className}`))[0];
    return await (await element.getProperty(property)).jsonValue();
}

// Sleep for x
function sleep(ms) {
    return new Promise(resolve => setTimeout(resolve, ms));
}

Are there any code examples left?
Made with love
This website uses cookies to make IQCode work for you. By using this site, you agree to our cookie policy

Welcome Back!

Sign up to unlock all of IQCode features:
  • Test your skills and track progress
  • Engage in comprehensive interactive courses
  • Commit to daily skill-enhancing challenges
  • Solve practical, real-world issues
  • Share your insights and learnings
Create an account
Sign in
Recover lost password
Or log in with

Create a Free Account

Sign up to unlock all of IQCode features:
  • Test your skills and track progress
  • Engage in comprehensive interactive courses
  • Commit to daily skill-enhancing challenges
  • Solve practical, real-world issues
  • Share your insights and learnings
Create an account
Sign up
Or sign up with
By signing up, you agree to the Terms and Conditions and Privacy Policy. You also agree to receive product-related marketing emails from IQCode, which you can unsubscribe from at any time.
Creating a new code example
Code snippet title
Source