Update fb parser
This commit is contained in:
parent
33fd3a4668
commit
4f8c054663
2 changed files with 5 additions and 3 deletions
|
@ -1,20 +1,21 @@
|
||||||
import json
|
import json
|
||||||
import facebook_scraper
|
import facebook_scraper
|
||||||
import enum
|
|
||||||
|
|
||||||
FILE = 'facebook_data.json'
|
FILE = 'facebook_data.json'
|
||||||
|
|
||||||
PAGES = ["amicale.deseleves", "campus.insat"]
|
PAGES = ["amicale.deseleves", "campus.insat"]
|
||||||
|
|
||||||
|
|
||||||
def scrape_data(page):
|
def scrape_data(page):
|
||||||
post_list = []
|
post_list = []
|
||||||
for post in facebook_scraper.get_posts(page, pages=3):
|
for post in facebook_scraper.get_posts(page, pages=4):
|
||||||
print(post)
|
print(post)
|
||||||
cleaned_post = {
|
cleaned_post = {
|
||||||
"id": post["post_id"],
|
"id": post["post_id"],
|
||||||
"message": post["post_text"],
|
"message": post["post_text"],
|
||||||
"url": post["post_url"],
|
"url": post["post_url"],
|
||||||
"image": post["image"],
|
"image": post["image"],
|
||||||
|
"images": post["images"],
|
||||||
"video": post["video"],
|
"video": post["video"],
|
||||||
"link": post["link"],
|
"link": post["link"],
|
||||||
"time": post["time"].timestamp(),
|
"time": post["time"].timestamp(),
|
||||||
|
@ -27,6 +28,7 @@ def scrape_data(page):
|
||||||
def get_all_data():
|
def get_all_data():
|
||||||
data = {}
|
data = {}
|
||||||
for page in PAGES:
|
for page in PAGES:
|
||||||
|
print(" -> " + page)
|
||||||
data[page] = scrape_data(page)
|
data[page] = scrape_data(page)
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
|
@ -4,7 +4,7 @@ bs4==0.0.1
|
||||||
certifi==2020.6.20
|
certifi==2020.6.20
|
||||||
chardet==3.0.4
|
chardet==3.0.4
|
||||||
cssselect==1.1.0
|
cssselect==1.1.0
|
||||||
facebook-scraper==0.2.9
|
facebook-scraper==0.2.34
|
||||||
fake-useragent==0.1.11
|
fake-useragent==0.1.11
|
||||||
html2text==2020.1.16
|
html2text==2020.1.16
|
||||||
idna==2.10
|
idna==2.10
|
||||||
|
|
Loading…
Reference in a new issue