Skip to content
Permalink
main
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
RussellBentley I'm done
Latest commit b706129 Apr 26, 2024 History
0 contributors

Users who have contributed to this file

'''
PHM08.py
Utilities for loading PHM08 for use with classifiers.
'''
import numpy as np
import math
from random import shuffle
# load_raw_data()
#
# Load the raw dataset from file
# Returns (training_data_raw, test_data_raw)
def load_raw_data():
train_data_raw = np.loadtxt("PHM08/train.txt")
test_data_raw = np.loadtxt("PHM08/test.txt")
return (train_data_raw, test_data_raw)
# process_raw_data(x)
#
# In order to use PHM08 for classification,
# we do two things:
#
# 1) Strip unit number feature.
# This was for training as time series,
# However, we want each data point to be "independent."
#
# 2) Generate Labels
# It is assumed that the last datapoint for a given unit series
# is that units last run, that it has reached end-of-life.
def process_raw_data(data):
(m, _) = data.shape
y = []
for i in range(0, m):
unit = data[i][0]
if i < m - 1 and unit != data[i + 1][0]:
y.append(1)
elif i == m - 1:
y.append(1)
else:
y.append(0)
return (data[:,1:], y)
# dataset()
#
# Return processed PHM08 data.
def dataset():
(train_raw, test_raw) = load_raw_data()
(train_x, train_y) = process_raw_data(train_raw)
(test_x, test_y) = process_raw_data(test_raw)
return (train_x, train_y, test_x, test_y)
class DataSplits:
def __init__(self):
data = np.loadtxt("PHM08/train.txt")
self.series = []
(m, _) = data.shape
x_series = []
y_series = []
for i in range(0, m):
unit = data[i][0]
x_series.append(data[i,1:])
if (i < m - 1 and unit != data[i + 1][0]) or i == m - 1 :
y_series.append(1)
self.series.append((np.array(x_series), y_series))
x_series = []
y_series = []
else:
y_series.append(0)
shuffle(self.series)
# There are 218 series, that we shuffled after loading.
# We save the last 18 for testing.
# Then we split series 0 through 200 into bunches of 20,
# and do ten fold cross validation with them
#
# So this function takes i in [1, 10),
# and returns the data_test, data_validate fold.
def get_train_validate_split(self, j):
x_train_series = [self.series[i][0] for i in range(0, 200) if j * 20 > i or i >= (j + 1) * 20 ]
y_train_series = [self.series[i][1] for i in range(0, 200) if j * 20 > i or i >= (j + 1) * 20 ]
x_validate_series = [self.series[i][0] for i in range(0, 200) if j * 20 <= i and i < (j + 1) * 20 ]
y_validate_series = [self.series[i][1] for i in range(0, 200) if j * 20 <= i and i < (j + 1) * 20]
x_train = np.concatenate(x_train_series, axis = 0)
y_train = np.concatenate(y_train_series, axis = 0)
x_validate = np.concatenate(x_validate_series, axis = 0)
y_validate = np.concatenate(y_validate_series, axis = 0)
return (x_train, y_train, x_validate, y_validate)
# Returns the last 18 series for testing
def get_test_data(self):
x_series = [self.series[i][0] for i in range(200, 218)]
y_series = [self.series[i][1] for i in range(200, 218)]
x_test = np.concatenate(x_series, axis = 0)
y_test = np.concatenate(y_series, axis = 0)
return (x_test, y_test)
# Returns all 200 series reserved for training.
def get_train_data(self):
x_series = [self.series[i][0] for i in range(0, 200)]
y_series = [self.series[i][1] for i in range(0, 200)]
x_test = np.concatenate(x_series, axis = 0)
y_test = np.concatenate(y_series, axis = 0)
return (x_test, y_test)