Implemented washinsa parser in python
This commit is contained in:
parent
4cbc13dd01
commit
6a072cb4e2
8 changed files with 321 additions and 181 deletions
102
__examples__/washinsa.json
Normal file
102
__examples__/washinsa.json
Normal file
|
@ -0,0 +1,102 @@
|
|||
{
|
||||
"dryers": [
|
||||
{
|
||||
"number": "1",
|
||||
"state": "DISPONIBLE",
|
||||
"startTime": "",
|
||||
"endTime": "",
|
||||
"donePercent": "",
|
||||
"remainingTime": ""
|
||||
},
|
||||
{
|
||||
"number": "2",
|
||||
"state": "EN COURS",
|
||||
"startTime": "15:24",
|
||||
"endTime": "15:59",
|
||||
"donePercent": "62.9",
|
||||
"remainingTime": "13"
|
||||
},
|
||||
{
|
||||
"number": "3",
|
||||
"state": "TERMINE",
|
||||
"startTime": "",
|
||||
"endTime": "",
|
||||
"donePercent": "",
|
||||
"remainingTime": ""
|
||||
}
|
||||
],
|
||||
"washers": [
|
||||
{
|
||||
"number": "4",
|
||||
"state": "EN COURS",
|
||||
"startTime": "\u00a0",
|
||||
"endTime": "\u00a0",
|
||||
"donePercent": "",
|
||||
"remainingTime": "0"
|
||||
},
|
||||
{
|
||||
"number": "5",
|
||||
"state": "EN COURS",
|
||||
"startTime": "15:35",
|
||||
"endTime": "16:15",
|
||||
"donePercent": "26.9",
|
||||
"remainingTime": "29"
|
||||
},
|
||||
{
|
||||
"number": "6",
|
||||
"state": "DISPONIBLE",
|
||||
"startTime": "",
|
||||
"endTime": "",
|
||||
"donePercent": "",
|
||||
"remainingTime": ""
|
||||
},
|
||||
{
|
||||
"number": "7",
|
||||
"state": "EN COURS",
|
||||
"startTime": "15:34",
|
||||
"endTime": "16:19",
|
||||
"donePercent": "26",
|
||||
"remainingTime": "33"
|
||||
},
|
||||
{
|
||||
"number": "8",
|
||||
"state": "DISPONIBLE",
|
||||
"startTime": "",
|
||||
"endTime": "",
|
||||
"donePercent": "",
|
||||
"remainingTime": ""
|
||||
},
|
||||
{
|
||||
"number": "9",
|
||||
"state": "DISPONIBLE",
|
||||
"startTime": "",
|
||||
"endTime": "",
|
||||
"donePercent": "",
|
||||
"remainingTime": ""
|
||||
},
|
||||
{
|
||||
"number": "10",
|
||||
"state": "DISPONIBLE",
|
||||
"startTime": "",
|
||||
"endTime": "",
|
||||
"donePercent": "",
|
||||
"remainingTime": ""
|
||||
},
|
||||
{
|
||||
"number": "11",
|
||||
"state": "DISPONIBLE",
|
||||
"startTime": "",
|
||||
"endTime": "",
|
||||
"donePercent": "",
|
||||
"remainingTime": ""
|
||||
},
|
||||
{
|
||||
"number": "12",
|
||||
"state": "EN COURS",
|
||||
"startTime": "15:02",
|
||||
"endTime": "15:47",
|
||||
"donePercent": "97.7",
|
||||
"remainingTime": "1"
|
||||
}
|
||||
]
|
||||
}
|
|
@ -1 +0,0 @@
|
|||
{"dryers":[{"number":"1","state":"DISPONIBLE","startTime":"","endTime":"","donePercent":"","remainingTime":""},{"number":"2","state":"EN COURS","startTime":"15:24","endTime":"15:59","donePercent":"62.9","remainingTime":"13"},{"number":"3","state":"TERMINE","startTime":"","endTime":"","donePercent":"","remainingTime":""}],"washers":[{"number":"4","state":"EN COURS","startTime":"\u00a0","endTime":"\u00a0","donePercent":"","remainingTime":"0"},{"number":"5","state":"EN COURS","startTime":"15:35","endTime":"16:15","donePercent":"26.9","remainingTime":"29"},{"number":"6","state":"DISPONIBLE","startTime":"","endTime":"","donePercent":"","remainingTime":""},{"number":"7","state":"EN COURS","startTime":"15:34","endTime":"16:19","donePercent":"26","remainingTime":"33"},{"number":"8","state":"DISPONIBLE","startTime":"","endTime":"","donePercent":"","remainingTime":""},{"number":"9","state":"DISPONIBLE","startTime":"","endTime":"","donePercent":"","remainingTime":""},{"number":"10","state":"DISPONIBLE","startTime":"","endTime":"","donePercent":"","remainingTime":""},{"number":"11","state":"DISPONIBLE","startTime":"","endTime":"","donePercent":"","remainingTime":""},{"number":"12","state":"EN COURS","startTime":"15:02","endTime":"15:47","donePercent":"97.7","remainingTime":"1"}]}
|
|
@ -1,179 +0,0 @@
|
|||
<?php
|
||||
/**
|
||||
* Wash-INSA PARSER
|
||||
* This program get all information about the laundry
|
||||
* Author : Gabin NOBLET (Promo 55)
|
||||
* Date : 07/2019
|
||||
* Version 1.0
|
||||
*/
|
||||
|
||||
|
||||
/**Get Type of machines (1 for dryers, 2 for washers, 0 else)
|
||||
* @param $str (string)
|
||||
* @return int
|
||||
*/
|
||||
function mType($str)
|
||||
{
|
||||
$a = explode(' ', $str);
|
||||
if (substr($a[0], -5) == "SECHE") {
|
||||
return 1;
|
||||
} elseif (substr($a[0], -4) == "LAVE") {
|
||||
return 2;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**Get Status of machines (1 for available, 2 for current, 3 for finished, 0 for out of order)
|
||||
* @param $node (DOMNode)
|
||||
* @return int
|
||||
*/
|
||||
function mStatus($node)
|
||||
{
|
||||
$a = explode(' ', $node[2]->nodeValue);
|
||||
if (substr($a[0], -10) == "DISPONIBLE") {
|
||||
return 1;
|
||||
} elseif (substr($a[0], -7) == "TERMINE") {
|
||||
return 3;
|
||||
} elseif (isset($node[4])) { //If there is time information, it is current
|
||||
return 2;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/** Clean strings (removes spaces, tabs end return before and behind the string)
|
||||
* @param $str (string)
|
||||
* @return string
|
||||
*/
|
||||
function clean($str)
|
||||
{
|
||||
return rtrim(ltrim(htmlspecialchars_decode($str)));
|
||||
}
|
||||
|
||||
|
||||
function generateJson()
|
||||
{
|
||||
$page = new DOMDocument();
|
||||
$page->loadHTMLFile("https://www.proxiwash.com/weblaverie/ma-laverie-2?s=cf4f39&16d33a57b3fb9a05d4da88969c71de74=1"); //Get page
|
||||
|
||||
|
||||
$division = $page->getElementById("liste-machines");
|
||||
$liste_machines = $division->childNodes[0]->childNodes;
|
||||
|
||||
//Remove titles
|
||||
$titles = $liste_machines->item(0);
|
||||
$titles->parentNode->removeChild($titles);
|
||||
|
||||
|
||||
$return = array(
|
||||
'dryers' => array(),
|
||||
'washers' => array()
|
||||
);
|
||||
|
||||
foreach ($liste_machines as $machines) {
|
||||
|
||||
$specs = $machines->childNodes;
|
||||
|
||||
//Format array
|
||||
$template = array(
|
||||
'number' => "",
|
||||
'state' => "",
|
||||
'startTime' => "",
|
||||
'endTime' => "",
|
||||
'donePercent' => "",
|
||||
'remainingTime' => ""
|
||||
);
|
||||
|
||||
//Get info from HTML
|
||||
$specsArray = array();
|
||||
foreach ($specs as $info) {
|
||||
if (isset($info->tagName)) {
|
||||
if ($info->tagName == "td") {
|
||||
array_push($specsArray, $info);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//Get Type
|
||||
$type = mType(clean($specsArray[0]->nodeValue));
|
||||
|
||||
//Get Number
|
||||
preg_match('/\d{1,2}/', $specsArray[1]->nodeValue, $number);
|
||||
$template['number'] = $number[0];
|
||||
|
||||
//Get Status and DonePercent
|
||||
$status = mStatus($specsArray);
|
||||
switch ($status) {
|
||||
case 1:
|
||||
$template['state'] = "DISPONIBLE";
|
||||
break;
|
||||
case 2:
|
||||
$template['state'] = "EN COURS";
|
||||
foreach ($specsArray[2]->childNodes as $child) {
|
||||
if (isset($child->tagName)) {
|
||||
if ($child->tagName == "table") {
|
||||
$progressBar = $child->childNodes[0]->childNodes[0];
|
||||
if ($progressBar->getAttribute('bgcolor') == "Green") {
|
||||
$template['donePercent'] = substr($progressBar->getAttribute('width'), 0, -1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
$template['state'] = "TERMINE";
|
||||
break;
|
||||
default :
|
||||
$template['state'] = "HORS SERVICE";
|
||||
break;
|
||||
}
|
||||
|
||||
if ($template['state'] === "EN COURS") { // We set Times only when they could exist
|
||||
//Get StartTime
|
||||
$template['startTime'] = $specsArray[4]->nodeValue;
|
||||
//Get EndTime
|
||||
$template['endTime'] = $specsArray[5]->nodeValue;
|
||||
$template['remainingTime'] = get_remaining_time($template['startTime'], $template['endTime'], $template['donePercent']);
|
||||
}
|
||||
|
||||
if ($type == 1) {
|
||||
array_push($return['dryers'], $template);
|
||||
} elseif ($type == 2) {
|
||||
array_push($return['washers'], $template);
|
||||
}
|
||||
|
||||
// echo "<pre>";
|
||||
// echo "Type : ".$type."\n" ;
|
||||
// print_r($template);
|
||||
// echo "</pre>";
|
||||
}
|
||||
$jsonData = json_encode($return);
|
||||
file_put_contents('washinsa.json', $jsonData);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get remaining time for the current machine
|
||||
*
|
||||
* @param $startTime
|
||||
* @param $endTime
|
||||
* @return string
|
||||
*/
|
||||
function get_remaining_time($startTime, $endTime, $percentDone)
|
||||
{
|
||||
$startArray = explode(':', $startTime);
|
||||
$endArray = explode(':', $endTime);
|
||||
|
||||
$unixStart = mktime($startArray[0], $startArray[1], 0, 1, 0);
|
||||
$unixEnd = mktime($endArray[0], $endArray[1], 0, 1, 0);
|
||||
|
||||
if ($unixStart > $unixEnd) { // Machine ends the following day
|
||||
$unixEnd = mktime($endArray[0], $endArray[1], 0, 1, 1);
|
||||
}
|
||||
$deltaMinutes = ($unixEnd - $unixStart)/60;
|
||||
$remainingTime = $deltaMinutes * (1 - $percentDone/100);
|
||||
return strval(round($remainingTime, 0));
|
||||
}
|
||||
|
||||
generateJson();
|
218
washinsa/washinsa_handler.py
Normal file
218
washinsa/washinsa_handler.py
Normal file
|
@ -0,0 +1,218 @@
|
|||
# Parser made with BeautifulSoup4
|
||||
# https://www.crummy.com/software/BeautifulSoup/bs4/doc
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
import urllib.request
|
||||
from enum import Enum, auto
|
||||
import pprint
|
||||
import re
|
||||
import json
|
||||
|
||||
'''
|
||||
PAGE STRUCTURE
|
||||
as of june 2020
|
||||
|
||||
A table with a row (tr html tag) for each machine
|
||||
Each machine row is composed of 6 columns
|
||||
- 1 - Type ("SECHE LINGE 14 KG" or "LAVE LINGE 6 KG")
|
||||
- 2 - Number ("No X" with X the current number)
|
||||
- 3 - Status (A string showing the status or a table showing the progress)
|
||||
- 4 - Program (Name of the program or empty)
|
||||
- 5 - Start time (The start time in format HH:MM or empty)
|
||||
- 6 - End time (The end time in format HH:MM or empty)
|
||||
'''
|
||||
|
||||
DUMP_FILE = "washinsa_data.json"
|
||||
WASHINSA_URL = "https://www.proxiwash.com/weblaverie/component/weblaverie/?view=instancesfiche&format=raw&s=cf4f39"
|
||||
DRYER_STRING = "SECHE LINGE"
|
||||
|
||||
|
||||
class State(Enum):
|
||||
AVAILABLE = 0
|
||||
RUNNING = 1
|
||||
RUNNING_NOT_STARTED = 2
|
||||
FINISHED = 3
|
||||
UNAVAILABLE = 4
|
||||
ERROR = 5
|
||||
UNKNOWN = 6
|
||||
|
||||
|
||||
# Table used to convert state string given by the page into State enum
|
||||
STATE_CONVERSION_TABLE = {
|
||||
"DISPONIBLE": State.AVAILABLE,
|
||||
"TERMINE": State.FINISHED,
|
||||
"HORS SERVICE": State.UNAVAILABLE,
|
||||
"ERREUR": State.ERROR,
|
||||
}
|
||||
|
||||
TIME_RE = re.compile("^\d\d:\d\d$")
|
||||
|
||||
|
||||
def download_page():
|
||||
"""
|
||||
Downloads the page from proxiwash website
|
||||
"""
|
||||
try:
|
||||
with urllib.request.urlopen(WASHINSA_URL) as response:
|
||||
return response.read().decode()
|
||||
except:
|
||||
print("Error processing following url: " + WASHINSA_URL)
|
||||
return ""
|
||||
|
||||
|
||||
def get_rows(soup):
|
||||
"""
|
||||
Gets rows corresponding to machines on the page
|
||||
"""
|
||||
rows = soup.table.contents
|
||||
del rows[0]
|
||||
return rows
|
||||
|
||||
|
||||
def is_machine_dryer(row):
|
||||
"""
|
||||
Checks if the given machine is a dryer. If it is not, it means it is a washer.
|
||||
|
||||
To check this, we look if the test string is included in the state string
|
||||
"""
|
||||
return DRYER_STRING in row.contents[0].text
|
||||
|
||||
|
||||
def get_machine_number(row):
|
||||
"""
|
||||
Gets the current machine number.
|
||||
|
||||
To find it, we look in the Number cell and remove the "No" prefix to keep only the number.
|
||||
We do not rely on the index in the list as it could get lost in parsing.
|
||||
Plus, this method allows for non numeric machine identifiers.
|
||||
"""
|
||||
return row.contents[1].text.split()[1]
|
||||
|
||||
|
||||
def get_machine_state(row):
|
||||
"""
|
||||
Gets the current machine state.
|
||||
|
||||
The state is usually written in plain text inside the State cell.
|
||||
In this case, we simply check it against the state translation table,
|
||||
to turn it into a State Enum, which is easier to manipulate.
|
||||
|
||||
In some cases, this plain text is replace by a progress bar.
|
||||
This the machine is running.
|
||||
|
||||
If the state string cannot be recognized, this returns the unknown state.
|
||||
"""
|
||||
content = row.contents[2].contents[0]
|
||||
state = State.UNKNOWN
|
||||
if content.name == "table":
|
||||
state = State.RUNNING
|
||||
else:
|
||||
for key in STATE_CONVERSION_TABLE:
|
||||
if key in content.text:
|
||||
state = STATE_CONVERSION_TABLE[key]
|
||||
break
|
||||
return state
|
||||
|
||||
|
||||
def get_machine_program(row):
|
||||
"""
|
||||
Gets the machine program as written in plain text in the Program cell
|
||||
"""
|
||||
return row.contents[3].text.strip()
|
||||
|
||||
|
||||
def get_machine_times(row):
|
||||
"""
|
||||
Gets the start and end time for the machine.
|
||||
|
||||
If one of these times is invalid (does not respect the HH:MM format), an empty string is returned
|
||||
"""
|
||||
start_time = row.contents[4].text
|
||||
end_time = row.contents[5].text
|
||||
if TIME_RE.match(start_time) and TIME_RE.match(end_time):
|
||||
return start_time, end_time
|
||||
else:
|
||||
return "", ""
|
||||
|
||||
|
||||
def get_machine_done_percent(row):
|
||||
"""
|
||||
Gets the machine done percentage.
|
||||
|
||||
This percent is given in the width property of the first column, in the table inside the State cell.
|
||||
This only applies if the machine is running.
|
||||
"""
|
||||
content = row.contents[2].contents[0]
|
||||
percent = ""
|
||||
if content.name == "table":
|
||||
percent = content.td["width"].replace("%", "")
|
||||
if float(percent) > 100: # because the website is not doing this check...
|
||||
percent = '100'
|
||||
return percent
|
||||
|
||||
|
||||
def get_machine_remaining_time(row):
|
||||
"""
|
||||
Gets the remaining time in minutes.
|
||||
|
||||
The time is written in the title property of the table inside the State cell.
|
||||
It is written inside a phrase, so we need to extract it.
|
||||
This only applies if the machine is running.
|
||||
"""
|
||||
content = row.contents[2].contents[0]
|
||||
time = 0
|
||||
if content.name == "table":
|
||||
time = content["title"].split("=")[1].split()[0]
|
||||
return time
|
||||
|
||||
|
||||
def get_parsed_data(rows):
|
||||
"""
|
||||
Gets the parsed data from the web page, farmatting it in a easy to use object
|
||||
"""
|
||||
dryers = []
|
||||
washers = []
|
||||
for row in rows:
|
||||
state = get_machine_state(row)
|
||||
machine = {
|
||||
"number": get_machine_number(row),
|
||||
"state": state.value,
|
||||
"startTime": "",
|
||||
"endTime": "",
|
||||
"donePercent": "",
|
||||
"remainingTime": "",
|
||||
"program": "",
|
||||
}
|
||||
if state == State.RUNNING:
|
||||
machine_times = get_machine_times(row)
|
||||
machine["startTime"] = machine_times[0]
|
||||
machine["endTime"] = machine_times[1]
|
||||
if len(machine_times[0]) == 0:
|
||||
state = State.RUNNING_NOT_STARTED
|
||||
machine["state"] = state.value
|
||||
machine["program"] = get_machine_program(row)
|
||||
machine["donePercent"] = get_machine_done_percent(row)
|
||||
machine["remainingTime"] = get_machine_remaining_time(row)
|
||||
|
||||
if is_machine_dryer(row):
|
||||
dryers.append(machine)
|
||||
else:
|
||||
washers.append(machine)
|
||||
|
||||
return {
|
||||
"dryers": dryers,
|
||||
"washers": washers
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
soup = BeautifulSoup(download_page(), 'html.parser')
|
||||
rows = get_rows(soup)
|
||||
with open(DUMP_FILE, 'w') as f:
|
||||
json.dump(get_parsed_data(rows), f)
|
||||
|
||||
|
||||
main()
|
||||
|
||||
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
#!/bin/bash
|
||||
|
||||
touch lock
|
||||
php index.php
|
||||
python3 washinsa_handler.py > log 2> err
|
||||
rm lock
|
||||
|
|
Loading…
Reference in a new issue