#from scipy.spatial.distance import euclidean
import numpy as np
import scipy
import scipy.signal
def matrix_from_csv_file(file_path):
"""
Returns the data matrix given the path of a CSV file.
Parameters:
file_path (str): path for the CSV file with a time stamp in the first column
and the signals in the subsequent ones.
Time stamps are in seconds, with millisecond precision
Returns:
numpy.ndarray: 2D matrix containing the data read from the CSV
Author:
Original: [lmanso]
Revision and documentation: [fcampelo]
"""
csv_data = np.genfromtxt(file_path, delimiter = ',')
full_matrix = csv_data[1:]
#headers = csv_data[0] # Commented since not used or returned [fcampelo]
return full_matrix
def get_time_slice(full_matrix, start = 0., period = 1.):
"""
Returns a slice of the given matrix, where start is the offset and period is
used to specify the length of the signal.
Parameters:
full_matrix (numpy.ndarray): matrix returned by matrix_from_csv()
start (float): start point (in seconds after the beginning of records)
period (float): duration of the slice to be extracted (in seconds)
Returns:
numpy.ndarray: 2D matrix with the desired slice of the matrix
float: actual length of the resulting time slice
Author:
Original: [lmanso]
Reimplemented: [fcampelo]
"""
# Changed for greater efficiency [fcampelo]
rstart = full_matrix[0, 0] + start
index_0 = np.max(np.where(full_matrix[:, 0] <= rstart))
index_1 = np.max(np.where(full_matrix[:, 0] <= rstart + period))
duration = full_matrix[index_1, 0] - full_matrix[index_0, 0]
return full_matrix[index_0:index_1, :], duration
def feature_mean(matrix):
"""
Returns the mean value of each signal for the full time window
Parameters:
matrix (numpy.ndarray): 2D [nsamples x nsignals] matrix containing the
values of nsignals for a time window of length nsamples
Returns:
numpy.ndarray: 1D array containing the means of each column from the input matrix
list: list containing feature names for the quantities calculated.
Author:
Original: [lmanso]
Revision and documentation: [fcampelo]
"""
ret = np.mean(matrix, axis = 0).flatten()
names = ['mean_' + str(i) for i in range(matrix.shape[1])]
return ret, names
def feature_mean_d(h1, h2):
"""
Computes the change in the means (backward difference) of all signals
between the first and second half-windows, mean(h2) - mean(h1)
Parameters:
h1 (numpy.ndarray): 2D matrix containing the signals for the first
half-window
h2 (numpy.ndarray): 2D matrix containing the signals for the second
half-window
Returns:
numpy.ndarray: 1D array containing the difference between the mean in h2
and the mean in h1 of all signals
list: list containing feature names for the quantities calculated.
Author:
Original: [lmanso]
Revision and documentation: [fcampelo]
"""
ret = (feature_mean(h2)[0] - feature_mean(h1)[0]).flatten()
# Fixed naming [fcampelo]
names = ['mean_d_h2h1_' + str(i) for i in range(h1.shape[1])]
return ret, names
def feature_mean_q(q1, q2, q3, q4):
"""
Computes the mean values of each signal for each quarter-window, plus the
paired differences of means of each signal for the quarter-windows, i.e.,
feature_mean(q1), feature_mean(q2), feature_mean(q3), feature_mean(q4),
(feature_mean(q1) - feature_mean(q2)), (feature_mean(q1) - feature_mean(q3)),
...
Parameters:
q1 (numpy.ndarray): 2D matrix containing the signals for the first
quarter-window
q2 (numpy.ndarray): 2D matrix containing the signals for the second
quarter-window
q3 (numpy.ndarray): 2D matrix containing the signals for the third
quarter-window
q4 (numpy.ndarray): 2D matrix containing the signals for the fourth
quarter-window
Returns:
numpy.ndarray: 1D array containing the means of each signal in q1, q2,
q3 and q4; plus the paired differences of the means of each signal on
each quarter-window.
list: list containing feature names for the quantities calculated.
Author:
Original: [lmanso]
Revision and documentation: [fcampelo]
"""
v1 = feature_mean(q1)[0]
v2 = feature_mean(q2)[0]
v3 = feature_mean(q3)[0]
v4 = feature_mean(q4)[0]
ret = np.hstack([v1, v2, v3, v4,
v1 - v2, v1 - v3, v1 - v4,
v2 - v3, v2 - v4, v3 - v4]).flatten()
# Fixed naming [fcampelo]
names = []
for i in range(4): # for all quarter-windows
names.extend(['mean_q' + str(i + 1) + "_" + str(j) for j in range(len(v1))])
for i in range(3): # for quarter-windows 1-3
for j in range((i + 1), 4): # and quarter-windows (i+1)-4
names.extend(['mean_d_q' + str(i + 1) + 'q' + str(j + 1) + "_" + str(k) for k in range(len(v1))])
return ret, names
def feature_stddev(matrix):
"""
Computes the standard deviation of each signal for the full time window
Parameters:
matrix (numpy.ndarray): 2D [nsamples x nsignals] matrix containing the
values of nsignals for a time window of length nsamples
Returns:
numpy.ndarray: 1D array containing the standard deviation of each column
from the input matrix
list: list containing feature names for the quantities calculated.
Author:
Original: [lmanso]
Revision and documentation: [fcampelo]
"""
# fix ddof for finite sampling correction (N-1 instead of N in denominator)
ret = np.std(matrix, axis = 0, ddof = 1).flatten()
names = ['std_' + str(i) for i in range(matrix.shape[1])]
return ret, names
def feature_stddev_d(h1, h2):
"""
Computes the change in the standard deviations (backward difference) of all
signals between the first and second half-windows, std(h2) - std(h1)
Parameters:
h1 (numpy.ndarray): 2D matrix containing the signals for the first
half-window
h2 (numpy.ndarray): 2D matrix containing the signals for the second
half-window
Returns:
numpy.ndarray: 1D array containing the difference between the stdev in h2
and the stdev in h1 of all signals
list: list containing feature names for the quantities calculated.
Author:
Original: [lmanso]
Revision and documentation: [fcampelo]
"""
ret = (feature_stddev(h2)[0] - feature_stddev(h1)[0]).flatten()
# Fixed naming [fcampelo]
names = ['std_d_h2h1_' + str(i) for i in range(h1.shape[1])]
return ret, names
def feature_moments(matrix):
"""
Computes the 3rd and 4th standardised moments about the mean (i.e., skewness
and kurtosis) of each signal, for the full time window. Notice that
scipy.stats.moments() returns the CENTRAL moments, which need to be
standardised to compute skewness and kurtosis.
Notice: Kurtosis is calculated as excess kurtosis, e.g., with the Gaussian
kurtosis set as the zero point (Fisher's definition)
- https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.kurtosis.html
- https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.skew.html
- https://en.wikipedia.org/wiki/Standardized_moment
- http://www.econ.nyu.edu/user/ramseyj/textbook/pg93.99.pdf
Parameters:
matrix (numpy.ndarray): 2D [nsamples x nsignals] matrix containing the
values of nsignals for a time window of length nsamples
Returns:
numpy.ndarray: 1D array containing the skewness and kurtosis of each
column from the input matrix
list: list containing feature names for the quantities calculated.
Author:
Original: [fcampelo]
"""
skw = scipy.stats.skew(matrix, axis = 0, bias = False)
krt = scipy.stats.kurtosis(matrix, axis = 0, bias = False)
ret = np.append(skw, krt)
names = ['skew_' + str(i) for i in range(matrix.shape[1])]
names.extend(['kurt_' + str(i) for i in range(matrix.shape[1])])
return ret, names
How to view each one method to print the output values?
35 Views Asked by Dhivya Bharkavi At
0