From 4f8c0546634d2791a15696e5837bd063d0d1d677 Mon Sep 17 00:00:00 2001 From: Arnaud Vergnet Date: Sun, 9 May 2021 11:20:33 +0200 Subject: [PATCH] Update fb parser --- facebook/facebook_handler.py | 6 ++++-- requirements.txt | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/facebook/facebook_handler.py b/facebook/facebook_handler.py index a3a6b7e..8c2400d 100644 --- a/facebook/facebook_handler.py +++ b/facebook/facebook_handler.py @@ -1,20 +1,21 @@ import json import facebook_scraper -import enum FILE = 'facebook_data.json' PAGES = ["amicale.deseleves", "campus.insat"] + def scrape_data(page): post_list = [] - for post in facebook_scraper.get_posts(page, pages=3): + for post in facebook_scraper.get_posts(page, pages=4): print(post) cleaned_post = { "id": post["post_id"], "message": post["post_text"], "url": post["post_url"], "image": post["image"], + "images": post["images"], "video": post["video"], "link": post["link"], "time": post["time"].timestamp(), @@ -27,6 +28,7 @@ def scrape_data(page): def get_all_data(): data = {} for page in PAGES: + print(" -> " + page) data[page] = scrape_data(page) return data diff --git a/requirements.txt b/requirements.txt index 5e90153..0bf495f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ bs4==0.0.1 certifi==2020.6.20 chardet==3.0.4 cssselect==1.1.0 -facebook-scraper==0.2.9 +facebook-scraper==0.2.34 fake-useragent==0.1.11 html2text==2020.1.16 idna==2.10