Browse Source

Use facebook scraper

Facebook graph API was too annoying to use so I switched to using a good old scraper
Arnaud Vergnet 3 years ago
parent
commit
92863bd82c
3 changed files with 38 additions and 8 deletions
  1. 2
    2
      cron_appli_update.sh
  2. 35
    0
      facebook/facebook_handler.py
  3. 1
    6
      facebook/facebook_update.sh

+ 2
- 2
cron_appli_update.sh View File

@@ -31,8 +31,8 @@
31 31
 # Update the menu every day
32 32
 0 0 * * * cd "$HOME"/public_html/v2/menu/ && ./menu_update.sh >/dev/null 2>&1
33 33
 
34
-# Update facebook data every minute
35
-* * * * * cd "$HOME"/public_html/v2/facebook/ && ./facebook_update.sh >/dev/null 2>&1
34
+# Update facebook data every 5 minute
35
+5 * * * * cd "$HOME"/public_html/v2/facebook/ && ./facebook_update.sh >/dev/null 2>&1
36 36
 
37 37
 # Update the dashboard every 20 sec. The dashboard also update the machine list
38 38
 # Call 3 times, one with a 20 sec delay, and one with 40 sec, because cron cannot call more than each minute

+ 35
- 0
facebook/facebook_handler.py View File

@@ -0,0 +1,35 @@
1
+import json
2
+from facebook_scraper import get_posts
3
+
4
+FILE = 'facebook_data.json'
5
+
6
+
7
+def scrape_data():
8
+    post_list = []
9
+    for post in get_posts('amicale.deseleves', pages=3):
10
+        print(post)
11
+        cleaned_post = {
12
+            "post_id": post["post_id"],
13
+            "post_text": post["post_text"],
14
+            "post_url": post["post_url"],
15
+            "image": post["image"],
16
+            "video": post["video"],
17
+            "link": post["link"],
18
+            "time": post["time"].timestamp(),
19
+        }
20
+        post_list.append(cleaned_post)
21
+    return post_list
22
+
23
+
24
+def write_data(data):
25
+    with open(FILE, 'w') as f:
26
+        json.dump(data, f)
27
+
28
+
29
+def main():
30
+    print("Fetching facebook data...")
31
+    write_data(scrape_data())
32
+    print('DONE')
33
+
34
+
35
+main()

+ 1
- 6
facebook/facebook_update.sh View File

@@ -1,10 +1,5 @@
1 1
 #!/bin/bash
2 2
 
3
-# A token is required to access the facebook public page
4
-# This token must be saved in a file named "token" in the same folder as this script
5
-# /!\ Do not sync this token with git /!\
6
-
7 3
 touch lock
8
-token=$(cat token)
9
-curl "https://graph.facebook.com/v7.0/amicale.deseleves/published_posts?fields=full_picture,message,permalink_url,created_time&date_format=U&access_token=$token" > facebook_data.json
4
+python3 facebook_handler.py > log 2> err
10 5
 rm lock

Loading…
Cancel
Save