Overview#
Regression predicts a continuous target value based on the input time series.
Import#
from sktime.datasets import load_covid_3month
from sktime.regression.distance_based import KNeighborsTimeSeriesRegressor
from sklearn.metrics import mean_squared_error
Data#
The goal of this dataset is to predict COVID-19’s death rate on 1st April 2020 for each country using daily confirmed cases for the last three months. This dataset contains 201 time series, where each time series is the daily confirmed cases for a country. The data was obtained from WHO’s COVID-19 database. Please refer to https://covid19.who.int/ for more details.
X_train, y_train = load_covid_3month(split="train")
y_train = y_train.astype("float")
X_test, y_test = load_covid_3month(split="test")
y_test = y_test.astype("float")
Experiment#
regressor = KNeighborsTimeSeriesRegressor()
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)
mean_squared_error(y_test, y_pred)
0.002921957478363366