Making some fixes
This commit is contained in:
@@ -1,7 +1,6 @@
|
||||
FROM python:3.10-slim-bullseye
|
||||
|
||||
COPY thesillyhome_src /thesillyhome_src
|
||||
# COPY thesillyhome /thesillyhome
|
||||
|
||||
|
||||
RUN apt-get update && apt-get install -y curl bash
|
||||
@@ -14,7 +13,6 @@ RUN \
|
||||
nodejs
|
||||
|
||||
RUN pip3 install -U setuptools && \
|
||||
# pip3 install -r /thesillyhome/requirements.txt &&\
|
||||
pip3 install -e /thesillyhome_src/thesillyhome/ && \
|
||||
pip3 install appdaemon==4.2.1
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ volumes:
|
||||
services:
|
||||
thesillyhome:
|
||||
container_name: thesillyhome
|
||||
image: lcmchris1/thesillyhome-container:latest
|
||||
image: lcmchris1/thesillyhome-container:0.0.1
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- thesillyhome_config:/thesillyhome_src/data/config/
|
||||
|
||||
@@ -6,5 +6,5 @@ appdaemon:
|
||||
plugins:
|
||||
HASS:
|
||||
type: hass
|
||||
ha_url: <ha_url>
|
||||
token: <ha_token>
|
||||
ha_url: http://192.168.1.100:8123
|
||||
token: eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJmODRjZTM2NmQzNTE0Y2UwOTYzMjFjZmZmZGRkZDk2ZSIsImlhdCI6MTY1NDQ5NTIyNSwiZXhwIjoxOTY5ODU1MjI1fQ.wBLKK5xqzUgcHgIEhhZcYsExHKIRB0gZErbDh6RkJhI
|
||||
|
||||
@@ -6,7 +6,8 @@ from pandas import DataFrame
|
||||
from sklearn.tree import DecisionTreeClassifier
|
||||
from datetime import datetime
|
||||
import copy
|
||||
import os
|
||||
import os.path
|
||||
import logging
|
||||
|
||||
|
||||
class ModelExecutor(hass.Hass):
|
||||
@@ -18,36 +19,44 @@ class ModelExecutor(hass.Hass):
|
||||
self.log("TheSillyHome has now started!")
|
||||
|
||||
def load_models(self):
|
||||
'''
|
||||
Loads all models to a dictionary
|
||||
'''
|
||||
actuators = tsh_config.actuators
|
||||
act_model_set = {}
|
||||
for act in actuators:
|
||||
if os.isFile(f"/data/model/{self.model_name_version}/{act}.pickle"):
|
||||
if os.path.isfile(f"/thesillyhome_src/data/model/{self.model_name_version}/{act}.pickle"):
|
||||
with open(
|
||||
f"/data/model/{self.model_name_version}/{act}.pickle", "rb"
|
||||
f"/thesillyhome_src/data/model/{self.model_name_version}/{act}.pickle", "rb"
|
||||
) as pickle_file:
|
||||
content = pickle.load(pickle_file)
|
||||
act_model_set[act] = content
|
||||
else:
|
||||
print(f"No model for {act}")
|
||||
act_model_set[act] = None
|
||||
logging.info(f"No model for {act}")
|
||||
return act_model_set
|
||||
|
||||
def state_handler(self, entity, attribute, old, new, kwargs):
|
||||
sensors = tsh_config.sensors
|
||||
float_sensors = tsh_config.float_sensors
|
||||
self.log(f'Received state {entity}')
|
||||
|
||||
if entity in sensors:
|
||||
self.log(f"{entity} is {new}")
|
||||
self.log(f"<----- {entity} is {new} ----->")
|
||||
|
||||
# Get feature list from parsed data header, set all columns to 0
|
||||
feature_list = pd.read_csv("/data/act_states.csv").columns
|
||||
feature_list = feature_list.drop(["entity_id", "state"])
|
||||
feature_list = pd.DataFrame(columns=feature_list)
|
||||
feature_list = feature_list.append(pd.Series(), ignore_index=True)
|
||||
feature_list.iloc[0] = 0
|
||||
feature_list = pd.read_pickle(
|
||||
"/thesillyhome_src/data/parsed/act_states.pkl").columns
|
||||
feature_list = sorted(
|
||||
list(set(feature_list) -
|
||||
set(['entity_id', 'state', "duplicate"]))
|
||||
)
|
||||
|
||||
current_state_base = pd.DataFrame(columns=feature_list)
|
||||
current_state_base.loc[len(current_state_base)] = 0
|
||||
print (current_state_base)
|
||||
|
||||
# Get state of all sensors for model input
|
||||
df_sen_states = copy.deepcopy(feature_list)
|
||||
# Get current state of all sensors for model input
|
||||
df_sen_states = copy.deepcopy(current_state_base)
|
||||
for sensor in sensors:
|
||||
true_state = self.get_state(entity_id=sensor)
|
||||
if sensor not in float_sensors:
|
||||
@@ -59,10 +68,18 @@ class ModelExecutor(hass.Hass):
|
||||
|
||||
# Execute all models for sensor and set states
|
||||
for act, model in self.act_model_set.items():
|
||||
prediction = model.predict(df_sen_states)[0].split("::")[1]
|
||||
if prediction == "on":
|
||||
# the actuators feature state should not affect the model and also the duplicate column
|
||||
cur_act_list = []
|
||||
for feature in feature_list:
|
||||
if feature.startswith(act):
|
||||
cur_act_list.append(feature)
|
||||
new_feature_list = sorted(list(set(feature_list) - set(cur_act_list)))
|
||||
df_sen_states_less = df_sen_states[new_feature_list]
|
||||
|
||||
prediction = model.predict(df_sen_states_less)
|
||||
if prediction == 1:
|
||||
self.log(f"Turn on {act}")
|
||||
self.turn_on(act)
|
||||
elif prediction == "off":
|
||||
elif prediction == 0:
|
||||
self.log(f"Turn off {act}")
|
||||
self.turn_off(act)
|
||||
|
||||
@@ -2,6 +2,8 @@
|
||||
from datetime import datetime
|
||||
import mysql.connector
|
||||
import pandas as pd
|
||||
import os.path
|
||||
import logging
|
||||
|
||||
# Local application imports
|
||||
import thesillyhome.model_creator.read_config_json as tsh_config
|
||||
@@ -20,7 +22,10 @@ class homedb:
|
||||
self.password = tsh_config.db_password
|
||||
self.database = tsh_config.db_database
|
||||
|
||||
def get_data(self):
|
||||
def get_data(self, from_cache=False):
|
||||
if from_cache and os.path.exists(f"{tsh_config.data_dir}/parsed/all_states.pkl"):
|
||||
logging.info("Using cached all_states.pkl")
|
||||
return pd.read_pickle(f"{tsh_config.data_dir}/parsed/all_states.pkl")
|
||||
mydb = mysql.connector.connect(
|
||||
host=self.host,
|
||||
port=self.port,
|
||||
@@ -36,9 +41,8 @@ class homedb:
|
||||
mycursor.execute(query)
|
||||
myresult = mycursor.fetchall()
|
||||
df = pd.DataFrame.from_dict(myresult)
|
||||
df.to_csv(f"{tsh_config.data_dir}/parsed/all_states.csv")
|
||||
return df
|
||||
|
||||
def store_data(self, table: str):
|
||||
today = datetime.today().strftime("%Y_%m_%d")
|
||||
self.get_data(table).to_csv(f"{today}_{table}.csv")
|
||||
df.to_csv(f"{tsh_config.data_dir}/parsed/all_states.csv")
|
||||
df.to_pickle(f"{tsh_config.data_dir}/parsed/all_states.pkl")
|
||||
|
||||
return df
|
||||
|
||||
@@ -7,6 +7,7 @@ from sklearn.tree import DecisionTreeClassifier, export_graphviz
|
||||
import numpy as np
|
||||
from sklearn.metrics import accuracy_score
|
||||
import pickle
|
||||
import logging
|
||||
|
||||
# Local application imports
|
||||
import thesillyhome.model_creator.read_config_json as tsh_config
|
||||
@@ -33,36 +34,43 @@ def visualize_tree(tree, actuators, feature_names, model_name_version):
|
||||
]
|
||||
try:
|
||||
subprocess.check_call(command)
|
||||
os.remove(f"{tsh_config.data_dir}/model/{model_name_version}/{actuator}.dot")
|
||||
os.remove(
|
||||
f"{tsh_config.data_dir}/model/{model_name_version}/{actuator}.dot")
|
||||
except:
|
||||
exit("Could not run dot, ie graphviz, to produce visualization")
|
||||
|
||||
|
||||
def train_model(actuators: list, model_name_version):
|
||||
def train_model(model_name_version):
|
||||
'''
|
||||
Train models for each actuator
|
||||
'''
|
||||
|
||||
actuators = tsh_config.actuators
|
||||
|
||||
df_act_states = pd.read_pickle(
|
||||
f"{tsh_config.data_dir}/parsed/act_states.pkl"
|
||||
)
|
||||
df_act_states = df_act_states.reset_index(drop=True)
|
||||
|
||||
# Generate feature and output vectors from act states.
|
||||
df_act_states = pd.read_csv(
|
||||
f"{tsh_config.data_dir}/parsed/act_states.csv", index_col=False
|
||||
).drop(columns=["index"])
|
||||
|
||||
df_act_states = df_act_states.replace([np.inf, -np.inf], np.nan)
|
||||
df_act_states = df_act_states.fillna(999)
|
||||
|
||||
output_list = tsh_config.output_list.copy()
|
||||
act_list = list(set(df_act_states.columns) - set(output_list))
|
||||
|
||||
for actuator in actuators:
|
||||
print(f"Training model for {actuator}")
|
||||
logging.info(f"Training model for {actuator}")
|
||||
|
||||
df_act = df_act_states[df_act_states["entity_id"] == actuator]
|
||||
df_act = df_act.reset_index(drop=True)
|
||||
|
||||
if df_act.empty:
|
||||
print(f"No cases found for {actuator}")
|
||||
logging.info(f"No cases found for {actuator}")
|
||||
continue
|
||||
output_vector = np.where(df_act["state"] == "on", 1, 0)
|
||||
|
||||
# the actuators state should not affect the model
|
||||
'''
|
||||
Setting output and feature vector
|
||||
'''
|
||||
output_vector = df_act["state"]
|
||||
|
||||
# the actuators feature state should not affect the model and also the duplicate column
|
||||
cur_act_list = []
|
||||
for feature in act_list:
|
||||
if feature.startswith(actuator):
|
||||
@@ -78,20 +86,19 @@ def train_model(actuators: list, model_name_version):
|
||||
)
|
||||
|
||||
# # Weighting more recent observations more. 3 times if in top 50 percent
|
||||
# sample_weight = np.ones(len(X_train))
|
||||
# sample_weight[: int(len(sample_weight) * 0.2)] = 3
|
||||
sample_weight = np.ones(len(X_train))
|
||||
sample_weight[: int(len(sample_weight) * 0.2)] = 3
|
||||
|
||||
# Weighting duplicates less
|
||||
sample_weight = X_train["duplicate"]
|
||||
|
||||
X_train = X_train.drop(columns="duplicate")
|
||||
X_test = X_test.drop(columns="duplicate")
|
||||
sample_weight = sample_weight * X_train['duplicate']
|
||||
X_train = X_train.drop(columns='duplicate')
|
||||
X_test = X_test.drop(columns='duplicate')
|
||||
y_train = y_train.drop(columns='duplicate')
|
||||
y_test = y_test.drop(columns='duplicate')
|
||||
|
||||
model_tree = DecisionTreeClassifier(random_state=99)
|
||||
model_tree.fit(X_train, y_train, sample_weight=sample_weight)
|
||||
|
||||
feature_list.remove("duplicate")
|
||||
|
||||
# Visualization of tress:
|
||||
# tree_to_code(model_tree, feature_list)
|
||||
# visualize_tree(model_tree, feature_list)
|
||||
@@ -100,7 +107,7 @@ def train_model(actuators: list, model_name_version):
|
||||
y_tree_predictions = model_tree.predict(X_test)
|
||||
|
||||
# Extract predictions for each output variable and calculate accuracy and f1 score
|
||||
print(
|
||||
logging.info(
|
||||
f"{actuator} accuracy score: {accuracy_score(y_test, y_tree_predictions) * 100}"
|
||||
)
|
||||
|
||||
@@ -110,4 +117,4 @@ def train_model(actuators: list, model_name_version):
|
||||
|
||||
filename = open(f"{model_directory}/{actuator}.pickle", "wb")
|
||||
pickle.dump(model_tree, filename)
|
||||
print("Completed!")
|
||||
logging.info("Completed!")
|
||||
|
||||
@@ -3,12 +3,12 @@
|
||||
import thesillyhome.model_creator.read_config_json as tsh_config
|
||||
from thesillyhome.model_creator.parse_data import parse_data_from_db
|
||||
from thesillyhome.model_creator.learning_model import train_model
|
||||
|
||||
import logging
|
||||
if __name__ == "__main__":
|
||||
|
||||
logging.getLogger().setLevel(logging.INFO)
|
||||
actuators = tsh_config.actuators
|
||||
sensors = tsh_config.sensors
|
||||
model_name_version = tsh_config.model_name_version
|
||||
tsh_config.replace_yaml()
|
||||
parse_data_from_db(actuators, sensors)
|
||||
train_model(actuators, model_name_version)
|
||||
parse_data_from_db()
|
||||
train_model(model_name_version)
|
||||
|
||||
@@ -8,6 +8,7 @@ import tqdm
|
||||
from tqdm import tqdm
|
||||
import numpy as np
|
||||
from multiprocessing import cpu_count
|
||||
import logging
|
||||
|
||||
# Local application imports
|
||||
from thesillyhome.model_creator.home import homedb
|
||||
@@ -28,16 +29,24 @@ def parallelize_dataframe(df1, df2, devices, func):
|
||||
|
||||
|
||||
def add_device_states(df_output: pd.DataFrame, df_states: pd.DataFrame, devices, pbar):
|
||||
'''
|
||||
Convert dataframe to:
|
||||
act_state, last_state, sen_state1, sen_state2...
|
||||
|
||||
1) add last_state for entity_id
|
||||
2) add duplicate state
|
||||
3) add latest states of all sensors
|
||||
'''
|
||||
|
||||
for index, row in df_output.iterrows():
|
||||
|
||||
# Add last_state non-float onto dataframe as trigger
|
||||
# get last states (for non-float for now)
|
||||
last_device_state = df_states[
|
||||
(df_states["entity_id"].isin(devices))
|
||||
& (df_states["entity_id"] != row["entity_id"])
|
||||
& (df_states["last_changed"] < row["last_changed"])
|
||||
& ~(df_states["entity_id"].isin(tsh_config.float_sensors))
|
||||
]
|
||||
|
||||
if not last_device_state.empty:
|
||||
df_output.loc[
|
||||
index, "last_state"
|
||||
@@ -45,12 +54,12 @@ def add_device_states(df_output: pd.DataFrame, df_states: pd.DataFrame, devices,
|
||||
else:
|
||||
df_output.loc[index, "last_state"] = np.NaN
|
||||
|
||||
# Value actual state changes more!
|
||||
last_current_device_state = df_states[
|
||||
(df_states["entity_id"] == row["entity_id"])
|
||||
& (df_states["last_changed"] < row["last_changed"])
|
||||
]
|
||||
|
||||
# Value actual state changes more!
|
||||
if not last_current_device_state.empty:
|
||||
if last_current_device_state["state"].iloc[0] == row["state"]:
|
||||
df_output.loc[index, "duplicate"] = 1
|
||||
@@ -67,7 +76,8 @@ def add_device_states(df_output: pd.DataFrame, df_states: pd.DataFrame, devices,
|
||||
]
|
||||
|
||||
if not previous_device_state.empty:
|
||||
df_output.loc[index, device] = previous_device_state["state"].iloc[0]
|
||||
df_output.loc[index,
|
||||
device] = previous_device_state["state"].iloc[0]
|
||||
else:
|
||||
if device in tsh_config.float_sensors:
|
||||
df_output.loc[index, device] = 0
|
||||
@@ -77,7 +87,6 @@ def add_device_states(df_output: pd.DataFrame, df_states: pd.DataFrame, devices,
|
||||
return df_output
|
||||
|
||||
|
||||
# Hot encoding for all features
|
||||
def one_hot_encoder(df: DataFrame, column: str) -> DataFrame:
|
||||
one_hot = pd.get_dummies(df[column], prefix=column)
|
||||
df = df.drop(column, axis=1)
|
||||
@@ -107,28 +116,32 @@ def convert_unavailabe(df: DataFrame) -> DataFrame:
|
||||
return df
|
||||
|
||||
|
||||
def parse_data_from_db(actuators: list, sensors: list):
|
||||
def parse_data_from_db():
|
||||
"""
|
||||
Our data base currently stores by events.
|
||||
To create a valid ML classification case, we will parse all last
|
||||
sensor states for each actuator event and append it to the dataframe.
|
||||
"""
|
||||
actuators = tsh_config.actuators
|
||||
sensors = tsh_config.sensors
|
||||
|
||||
print("Reading from homedb...")
|
||||
logging.info("Reading from homedb...")
|
||||
df_all = homedb().get_data()
|
||||
df_all = convert_unavailabe(df_all)
|
||||
assert ~df_all["state"].isnull().values.any(), df_all[df_all["state"].isnull()]
|
||||
assert ~df_all["state"].isnull().values.any(
|
||||
), df_all[df_all["state"].isnull()]
|
||||
|
||||
print("Add previous state...")
|
||||
logging.info("Add previous state...")
|
||||
devices = actuators + sensors
|
||||
df_states = df_all[df_all["entity_id"].isin(devices)]
|
||||
df_act_states = df_all[df_all["entity_id"].isin(actuators)]
|
||||
|
||||
df_output = copy.deepcopy(df_act_states)
|
||||
|
||||
print("Start parallelization processing...")
|
||||
logging.info("Start parallelization processing...")
|
||||
|
||||
df_output = parallelize_dataframe(df_output, df_states, devices, add_device_states)
|
||||
df_output = parallelize_dataframe(
|
||||
df_output, df_states, devices, add_device_states)
|
||||
|
||||
"""
|
||||
Code to add one hot encoding for date time.
|
||||
@@ -140,24 +153,31 @@ def parse_data_from_db(actuators: list, sensors: list):
|
||||
)
|
||||
df_output = df_output.drop(columns=["last_changed"])
|
||||
|
||||
'''
|
||||
feature list extraction
|
||||
'''
|
||||
output_list = tsh_config.output_list.copy()
|
||||
output_list.append("duplicate")
|
||||
feature_list = sorted(list(set(df_output.columns) - set(output_list)))
|
||||
|
||||
'''
|
||||
Hot encoding for all columns bar float_sensors which has int format
|
||||
'''
|
||||
float_sensors = tsh_config.float_sensors
|
||||
for feature in feature_list:
|
||||
# For float sensors, these are already in Int format so no encoding.
|
||||
if feature not in float_sensors:
|
||||
df_output = one_hot_encoder(df_output, feature)
|
||||
|
||||
# Remove some empty entity_id rows
|
||||
df_output = df_output[df_output["entity_id"] != ""]
|
||||
'''
|
||||
Output and checks
|
||||
'''
|
||||
df_output["state"] = np.where(df_output["state"] == "on", 1, 0)
|
||||
|
||||
assert ~df_output.isnull().values.any(), df_output[
|
||||
df_output["sensor.corridor_entrance_sensor_illuminance_lux"].isnull()
|
||||
]
|
||||
assert df_output[df_output["entity_id"] == ""].empty
|
||||
assert ~df_output.isnull().values.any()
|
||||
assert ~df_output.isin([np.inf, -np.inf, np.nan]).values.any()
|
||||
|
||||
df_output.to_csv(
|
||||
f"{tsh_config.data_dir}/parsed/act_states.csv", index=True, index_label="index"
|
||||
)
|
||||
df_output.to_pickle(f"{tsh_config.data_dir}/parsed/act_states.pkl")
|
||||
|
||||
@@ -62,7 +62,9 @@ def extract_float_sensors(sensors: list):
|
||||
|
||||
float_sensors = extract_float_sensors(sensors)
|
||||
|
||||
output_list_og = ["entity_id", "state"]
|
||||
output_list = ["entity_id", "state", "last_changed"]
|
||||
output_list_dup = ["entity_id", "state", "last_changed", "duplicate"]
|
||||
|
||||
|
||||
def replace_yaml():
|
||||
|
||||
Reference in New Issue
Block a user