Quantcast
Channel: Recent Questions - Stack Overflow
Viewing all articles
Browse latest Browse all 11601

Working on an old.reddit webscraping project, How do I make it read the description of the post too?

$
0
0

The code for this project is on replit.com How do I add the description?

I know this is my first post btw, so I do not know If I made this question correctly.When I tried to fix it myself it either just didn’t give the post_desc or just decided to put the intro for the subreddit in thereI tried the normal stuff like

post_content = soup.find_all(“p”, class_=“md”)

And

Post_description = soup.find_all(“div”, class_=“p”)

And it didn't work; how could I fix this?

If you do not want to go on replit here is the code

import osimport requestsfrom bs4 import BeautifulSoupfrom gtts import gTTSdef reset_folder(base_folder):    try:        os.rmdir(base_folder)        print(f"Previous data removed.")    except OSError as e:        print(f"Error: {e}")def fetch_reddit_data(subreddit_name, num_posts):    # Reset option    reset_option = input("Do you want to reset existing data? (y/n): ").lower()    if reset_option == 'y':        reset_folder(f"videos_{subreddit_name}")    url = f"https://old.reddit.com/r/{subreddit_name}/"    response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})    if response.status_code == 200:        soup = BeautifulSoup(response.text, 'html.parser')        post_authors = soup.find_all("a", class_="author")        post_titles = soup.find_all("a", class_="title")        post_content = soup.find_all("div", class_="md")        post_urls = [a['href'] for a in soup.find_all('a', href=True, class_='title')]        # Display hot posts for user selection        for i in range(min(num_posts, len(post_titles), len(post_authors))):            print(f"({i + 1}): {post_titles[i].text} (Author: {post_authors[i].text})")        # Prompt user to choose posts        chosen_posts_input = input("Enter the numbers of the posts you want to use (comma-separated): ")        chosen_posts_indices = [int(index) - 1 for index in chosen_posts_input.split(',')]        # Validate user input        if any(index < 0 or index >= min(num_posts, len(post_titles), len(post_authors)) for index in chosen_posts_indices):            print("Invalid post index. Exiting.")            return        # Create a folder for the selected posts        folder_name = f"videos_{subreddit_name}"        os.makedirs(folder_name, exist_ok=True)        for chosen_post_index in chosen_posts_indices:            post_url = f"https://old.reddit.com{post_urls[chosen_post_index]}"            post_content = fetch_post_content(post_url)            text_content = f"{post_titles[chosen_post_index].text}\n\n{post_content}\n\nAuthor: {post_authors[chosen_post_index].text}"            create_video_folder(folder_name, chosen_post_index, text_content)    else:        print(f"Error: Unable to fetch data from Reddit. Status code: {response.status_code}")def fetch_post_content(post_url):    response = requests.get(post_url, headers={"User-Agent": "Mozilla/5.0"})    if response.status_code == 200:        soup = BeautifulSoup(response.text, 'html.parser')        # Adjust the following line based on the correct class or tag for post content        post_content = soup.find("div", class_="md").text        return post_content    else:        print(f"Error: Unable to fetch post content. Status code: {response.status_code}")        return ""def create_video_folder(base_folder, post_index, text_content):    video_folder = os.path.join(base_folder, f"video{post_index}")    os.makedirs(video_folder, exist_ok=True)    # Save text content as captions in a .txt file    captions_file_path = os.path.join(video_folder, "captions.txt")    with open(captions_file_path, 'w', encoding='utf-8') as captions_file:        captions_file.write(text_content)    # Convert text to speech and save as an audio file    audio_file_path = os.path.join(video_folder, "output.mp3")    tts = gTTS(text=text_content, lang='en-uk', slow=False)    tts.save(audio_file_path)    print(f"Text and descriptions converted to speech and saved as {audio_file_path}")if __name__ == "__main__":    subreddit_name = input("Enter the name of the subreddit: ")    num_posts = int(input("Enter the number of hot posts to fetch: "))    fetch_reddit_data(subreddit_name, num_posts)

I do not know if this is the right website for this but I need help!!!


Viewing all articles
Browse latest Browse all 11601

Trending Articles



<script src="https://jsc.adskeeper.com/r/s/rssing.com.1596347.js" async> </script>