Update fb parser

This commit is contained in:
Arnaud Vergnet 2021-05-09 11:20:33 +02:00
parent 33fd3a4668
commit 4f8c054663
2 changed files with 5 additions and 3 deletions

View file

@ -1,20 +1,21 @@
import json import json
import facebook_scraper import facebook_scraper
import enum
FILE = 'facebook_data.json' FILE = 'facebook_data.json'
PAGES = ["amicale.deseleves", "campus.insat"] PAGES = ["amicale.deseleves", "campus.insat"]
def scrape_data(page): def scrape_data(page):
post_list = [] post_list = []
for post in facebook_scraper.get_posts(page, pages=3): for post in facebook_scraper.get_posts(page, pages=4):
print(post) print(post)
cleaned_post = { cleaned_post = {
"id": post["post_id"], "id": post["post_id"],
"message": post["post_text"], "message": post["post_text"],
"url": post["post_url"], "url": post["post_url"],
"image": post["image"], "image": post["image"],
"images": post["images"],
"video": post["video"], "video": post["video"],
"link": post["link"], "link": post["link"],
"time": post["time"].timestamp(), "time": post["time"].timestamp(),
@ -27,6 +28,7 @@ def scrape_data(page):
def get_all_data(): def get_all_data():
data = {} data = {}
for page in PAGES: for page in PAGES:
print(" -> " + page)
data[page] = scrape_data(page) data[page] = scrape_data(page)
return data return data

View file

@ -4,7 +4,7 @@ bs4==0.0.1
certifi==2020.6.20 certifi==2020.6.20
chardet==3.0.4 chardet==3.0.4
cssselect==1.1.0 cssselect==1.1.0
facebook-scraper==0.2.9 facebook-scraper==0.2.34
fake-useragent==0.1.11 fake-useragent==0.1.11
html2text==2020.1.16 html2text==2020.1.16
idna==2.10 idna==2.10