print(sys.executable)
C:\Users\bruce\anaconda3\python.exe
import pandas as pd
from glob import glob
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sea
from IPython.display import display,Markdown, HTML
import inflection
import numpy as np
from statsmodels import regression
import statsmodels.api as sm
import math
def printmd(text, fmts = {},all_fmt = '{:,.2f}', humanize = True):
if type(text) == pd.Series:
text = pd.DataFrame(text)
if type(text) == pd.DataFrame:
text = text.copy()
for col in text.columns:
try:
text[col] = pd.to_numeric(text[col])
fmt = all_fmt
if col in fmts.keys():
fmt = fmts[col]
text[col] = text[col].apply(lambda x: fmt.format(x))
except Exception as e:
pass
if humanize:
text.columns = text.columns.map(inflection.humanize)
display(HTML(text.to_html()))
return
display(Markdown(text))
phi = 1.6180339887498948420
import xmltodict
def conv_text(text):
ans = [float(x.strip()) for x in text.split('\n')]
return ans
def load_ve():
with open('ve1.table') as f:
x = f.read()
x = xmltodict.parse(x)
x = x['tableData']['table']
x_axis = conv_text(x['xAxis']['#text'])
y_axis = conv_text(x['yAxis']['#text'])
z_values = x['zValues']['#text']
rows = z_values.split('\n')
data = []
for row in rows:
data.append(row.strip().split(' '))
data = pd.DataFrame(data, index = y_axis, columns=x_axis)
for col in data.columns:
data[col] = pd.to_numeric(data[col])
return data
files = glob('*.msl')
print(files)
dtypes = False
cache = {}
def get_data():
global cache
global dtypes
if 'data' in cache.keys():
return cache['data'].copy()
file_list = dict(zip(list(range(len(files))),files))
print(file_list)
num = input('File number?')
datas =[]
for i in num.split('-'):
with open(file_list[int(i)]) as f:
raw_data = f.read()
lines = raw_data.split('\n')
columns = lines[2].split('\t')
dtypes = lines[3].split('\t')
data = lines[4:-1]
data = pd.DataFrame(data = [x.split('\t') for x in data])
data.columns = columns
dtypes = dict(zip(columns, dtypes))
datas.append(data)
data = pd.concat(datas)
for col in tqdm(data.columns):
data[col] = pd.to_numeric(data[col].str.replace("NaN", ""))
cache['data'] = data.copy()
return data
['2021-10-21_LOG0344.msl', '2021-10-21_LOG0345.msl', '2021-10-21_LOG0346.msl', '2021-10-21_LOG0347.msl', '2021-10-21_LOG0348.msl', '2021-10-21_LOG0349.msl', '2021-10-21_LOG0350.msl', '2021-10-26_11.14.56_LOG0352.msl', '2021-10-26_11.31.58_LOG0353.msl', '2021-10-26_11.42.50_LOG0354.msl', '2021-10-26_11.53.04_LOG0355.msl', '2021-10-26_12.03.16_LOG0356.msl', '2021-10-26_12.06.20_LOG0357.msl', '2021-10-26_12.16.34_LOG0358.msl']
data = get_data()
' , '.join(data.columns)
{0: '2021-10-21_LOG0344.msl', 1: '2021-10-21_LOG0345.msl', 2: '2021-10-21_LOG0346.msl', 3: '2021-10-21_LOG0347.msl', 4: '2021-10-21_LOG0348.msl', 5: '2021-10-21_LOG0349.msl', 6: '2021-10-21_LOG0350.msl', 7: '2021-10-26_11.14.56_LOG0352.msl', 8: '2021-10-26_11.31.58_LOG0353.msl', 9: '2021-10-26_11.42.50_LOG0354.msl', 10: '2021-10-26_11.53.04_LOG0355.msl', 11: '2021-10-26_12.03.16_LOG0356.msl', 12: '2021-10-26_12.06.20_LOG0357.msl', 13: '2021-10-26_12.16.34_LOG0358.msl'}
100%|██████████| 39/39 [00:01<00:00, 27.74it/s]
'Time , SecL , RPM , MAP , Boost psi , TPS , AFR , MAT , CLT , Engine , Batt V , Fuel: Air cor , Fuel: Warmup cor , Fuel: Total cor , VE1 , PW , Duty Cycle1 , Seq PW5 , SPK: Spark Advance , Knock in , Barometer , PWM Idle duty , Boost target 1 , AFR 1 Target , AFR 1 Error , TPSdot , Load , Lost sync count , VSS1 , VSS1 ms-1 , Engine in cruise state , Engine accelerating slowly , Engine decelerating slowly , Engine in overrun , Engine idling , Engine WOT , Fuel Flow cc , Fuel Flow lph , Gallons Used'
dtypes['VSS1']
'MPH'
print("Drive Length")
print(round(data['Time'].max()/60), 'Minutes')
print("Average AFR")
print(data['AFR'].mean())
print("Average AFR under Positive TPSDOT Acceleration")
data[data['TPSdot'] > 0]['AFR'].mean()
Drive Length 10 Minutes Average AFR 14.6600080037354 Average AFR under Positive TPSDOT Acceleration
15.127652850370541
def engine_pic(data):
fig, ax = plt.subplots()
fig.set_size_inches(8*1.6, 8)
sc = ax.scatter(data['RPM'], data['Load'], alpha = .3, c = data['AFR'])
plt.colorbar(sc)
ax.set_xlabel("RPM")
ax.set_ylabel("Load")
plt.show()
data_a = get_data()
engine_pic(data_a)
data = get_data()
data.set_index("Time")['CLT'].plot()
<AxesSubplot:xlabel='Time'>
min_clt = 180
data = get_data()
data = data[data["CLT"] > min_clt]
data['rpm_binned'] = (data['RPM']/100).apply(int) * 100
data['Load_binned'] = (data['Load']/10).apply(int) * 10
table = data.pivot_table('AFR', index = "Load_binned", columns='rpm_binned', aggfunc= np.mean)
fig, ax = plt.subplots()
fig.set_size_inches(8 * phi, 8)
sea.heatmap(table.loc[reversed(table.index)], center = 14.7)
<AxesSubplot:xlabel='rpm_binned', ylabel='Load_binned'>
data = get_data()
data = data[data["CLT"] > min_clt]
data['rpm_binned'] = (data['RPM']/100).apply(int) * 100
data['Load_binned'] = (data['Load']/10).apply(int) * 10
table = data.pivot_table('VE1', index = "Load_binned", columns='rpm_binned', aggfunc= np.mean)
fig, ax = plt.subplots()
fig.set_size_inches(8 * phi, 8)
sea.heatmap(table.loc[reversed(table.index)], center = data['VE1'].mean())
<AxesSubplot:xlabel='rpm_binned', ylabel='Load_binned'>
data = get_data()
data = data[data["CLT"] > min_clt]
data['rpm_binned'] = (data['RPM']/100).apply(int) * 100
data['Load_binned'] = (data['Load']/10).apply(int) * 10
table = data.pivot_table('AFR 1 Error', index = "Load_binned", columns='rpm_binned', aggfunc= np.median)
fig, ax = plt.subplots()
fig.set_size_inches(8 * phi, 8)
sea.heatmap(table.loc[reversed(table.index)], center = 0)
errors = table.copy()
data = get_data()
data['count'] = 1
data = data[data["CLT"] > min_clt]
data['rpm_binned'] = (data['RPM']/100).apply(int) * 100
data['Load_binned'] = (data['Load']/10).apply(int) * 10
table = data.pivot_table('count', index = "Load_binned", columns='rpm_binned', aggfunc= np.sum)
table = table/100
for col in table.columns:
table[col] = table[col].apply(lambda x: min(x, 1))
fig, ax = plt.subplots()
fig.set_size_inches(8 * phi, 8)
sea.heatmap(table.loc[reversed(table.index)], center = 0, cmap = 'Spectral')
conf = table.copy()
fig, ax = plt.subplots()
fig.set_size_inches(8 * phi, 8)
sea.heatmap((errors * conf).loc[reversed(conf.index)], center = 0)
<AxesSubplot:xlabel='rpm_binned', ylabel='Load_binned'>
ve = load_ve()
((errors/errors.max().max()) * conf/4) + 1
rpm_binned | 700 | 800 | 900 | 1000 | 1100 | 1200 | 1300 | 1400 | 1500 | 1600 | ... | 5400 | 5500 | 5600 | 5700 | 5800 | 5900 | 6000 | 6100 | 6200 | 6300 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Load_binned | |||||||||||||||||||||
10 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | 0.998584 | 0.998673 | 0.998584 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
20 | NaN | NaN | NaN | NaN | 1.000288 | 1.006040 | 0.966814 | 0.962389 | 0.960177 | 0.953540 | ... | NaN | 0.999513 | 0.997832 | 0.997345 | NaN | NaN | NaN | NaN | NaN | NaN |
30 | 0.980088 | 0.980088 | 0.977876 | 0.977876 | 0.977876 | 0.975664 | 0.978584 | 1.000885 | 1.000000 | 1.004425 | ... | NaN | NaN | NaN | 0.998938 | 0.999358 | NaN | NaN | NaN | NaN | NaN |
40 | 0.997876 | 0.999602 | 0.999115 | NaN | 1.003186 | 0.997832 | 1.004248 | 1.004779 | 1.004513 | 0.997788 | ... | 0.999447 | NaN | NaN | 0.998938 | NaN | 0.999469 | NaN | NaN | NaN | NaN |
50 | 0.999558 | NaN | 0.999535 | NaN | 1.024159 | 1.005575 | 0.999204 | 0.999646 | 0.997279 | 0.995354 | ... | NaN | 0.999226 | NaN | NaN | 0.998960 | 0.999535 | 0.999535 | NaN | NaN | NaN |
60 | 0.999558 | 0.999447 | 0.997611 | 0.999867 | 1.001858 | 1.003717 | 1.008761 | 1.002633 | 1.003805 | 1.004469 | ... | NaN | NaN | NaN | 0.999292 | 0.998960 | 0.998982 | 0.999535 | NaN | NaN | NaN |
70 | NaN | 0.998916 | 0.998805 | 1.006195 | 1.002478 | 1.001659 | 1.002190 | 1.000996 | 0.999137 | 0.998562 | ... | NaN | NaN | NaN | NaN | 0.999093 | 0.998274 | NaN | NaN | NaN | NaN |
80 | 0.998916 | 0.999071 | 0.999801 | 1.004602 | 0.999668 | 0.999624 | 1.009624 | 1.000575 | 0.999535 | 0.998451 | ... | NaN | NaN | NaN | NaN | 0.999093 | 0.998274 | NaN | NaN | NaN | NaN |
90 | NaN | 0.999889 | NaN | NaN | NaN | NaN | NaN | 1.001858 | 1.001062 | NaN | ... | NaN | NaN | NaN | NaN | 0.999248 | 0.999513 | NaN | 0.999270 | NaN | NaN |
100 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | 0.999071 | NaN | NaN | NaN | NaN |
110 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | 0.999491 | NaN | NaN | 0.999646 | NaN |
120 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | 0.999580 | NaN | NaN | NaN | 0.999757 |
130 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | 0.999801 | NaN | NaN | NaN | 0.999845 |
140 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | 0.999867 | 0.999823 | NaN | NaN | NaN | 0.999889 |
150 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | 0.999956 | 0.999735 | NaN | NaN | 0.999845 | NaN |
160 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | 1.000066 | 0.999912 | 0.999336 | NaN | 0.999558 | NaN | NaN |
170 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | 0.999425 | 0.999381 | 0.999823 | 0.999469 | 0.999403 | 0.999867 | 0.999801 | NaN | NaN | NaN |
180 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | 1.000088 | 1.000066 | 1.000044 | 1.000000 | 1.000000 | 0.999978 | NaN | NaN | NaN | NaN |
190 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
200 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
20 rows × 57 columns
ve
130.0 | 700.0 | 1100.0 | 1500.0 | 2000.0 | 2500.0 | 3000.0 | 3500.0 | 4000.0 | 4500.0 | 5000.0 | 5500.0 | 6000.0 | 6500.0 | 7000.0 | 7500.0 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
18.0 | 61.0 | 61.0 | 55.1 | 41.8 | 42.4 | 40.9 | 36.4 | 37.1 | 35.0 | 23.5 | 16.5 | 13.4 | 35.2 | 68.8 | 69.2 | 69.2 |
25.0 | 61.2 | 54.7 | 50.7 | 43.1 | 45.0 | 49.9 | 48.0 | 45.0 | 41.5 | 22.5 | 23.6 | 33.1 | 47.1 | 70.9 | 71.8 | 71.8 |
35.0 | 62.1 | 50.7 | 49.1 | 52.4 | 54.8 | 57.5 | 55.5 | 50.2 | 42.3 | 29.4 | 41.2 | 48.8 | 62.2 | 73.7 | 74.0 | 74.0 |
45.0 | 61.2 | 50.1 | 47.7 | 59.7 | 60.9 | 63.2 | 62.0 | 60.8 | 52.3 | 41.3 | 54.7 | 62.6 | 74.4 | 78.9 | 79.0 | 79.0 |
60.0 | 68.5 | 56.1 | 50.3 | 64.6 | 64.3 | 65.5 | 65.1 | 66.4 | 70.1 | 52.1 | 58.5 | 69.5 | 82.2 | 82.9 | 82.3 | 82.0 |
75.0 | 81.6 | 80.3 | 56.5 | 65.7 | 66.8 | 67.7 | 67.0 | 66.3 | 71.9 | 60.4 | 59.0 | 63.7 | 76.5 | 85.6 | 85.2 | 85.0 |
90.0 | 84.3 | 83.5 | 64.2 | 64.9 | 67.4 | 68.6 | 68.4 | 68.9 | 70.2 | 63.0 | 62.6 | 65.3 | 70.5 | 85.6 | 88.2 | 88.0 |
105.0 | 82.5 | 85.1 | 68.9 | 61.2 | 68.0 | 73.6 | 72.7 | 74.3 | 77.9 | 68.6 | 73.4 | 75.8 | 73.0 | 82.4 | 92.5 | 92.0 |
120.0 | 81.7 | 82.7 | 83.7 | 84.7 | 82.8 | 77.4 | 74.5 | 77.7 | 83.1 | 77.8 | 77.8 | 78.7 | 75.3 | 79.3 | 95.0 | 95.0 |
135.0 | 81.7 | 82.7 | 83.7 | 84.7 | 84.7 | 85.9 | 83.0 | 79.6 | 82.3 | 82.1 | 81.0 | 78.3 | 77.0 | 79.5 | 96.8 | 96.0 |
150.0 | 81.8 | 82.8 | 83.8 | 84.8 | 84.8 | 81.2 | 87.0 | 82.8 | 82.6 | 81.1 | 83.9 | 79.3 | 77.7 | 83.0 | 95.8 | 96.0 |
165.0 | 83.5 | 84.5 | 85.5 | 86.5 | 86.5 | 85.2 | 77.8 | 82.9 | 86.2 | 86.9 | 84.1 | 80.2 | 79.3 | 80.2 | 95.2 | 94.1 |
180.0 | 84.9 | 85.9 | 86.9 | 87.9 | 87.9 | 87.9 | 81.7 | 77.2 | 85.2 | 87.2 | 84.4 | 80.9 | 80.8 | 87.0 | 95.2 | 93.5 |
195.0 | 85.6 | 86.6 | 87.6 | 88.6 | 88.6 | 89.2 | 88.6 | 83.6 | 85.2 | 86.4 | 87.1 | 84.2 | 94.0 | 97.4 | 94.3 | 92.1 |
210.0 | 86.4 | 87.4 | 88.4 | 89.4 | 89.4 | 89.5 | 89.5 | 92.7 | 97.4 | 93.0 | 95.7 | 99.4 | 99.0 | 97.5 | 94.3 | 92.1 |
225.0 | 87.3 | 88.3 | 89.3 | 90.3 | 90.3 | 90.4 | 90.4 | 92.8 | 99.3 | 100.7 | 103.1 | 101.8 | 99.5 | 98.0 | 94.8 | 92.5 |
sea.heatmap(ve.loc[reversed(ve.index)])
<AxesSubplot:>
def linreg(series_x,series_y):
X = series_x.values
Y = series_y.values
# Running the linear regression
X = sm.add_constant(X)
model = regression.linear_model.OLS(Y, X).fit()
a = model.params[0]
b = model.params[1]
X = X[:, 1]
# Return summary of the regression and plot results
X2 = np.linspace(X.min(), X.max(), 100)
Y_hat = X2 * b + a
fig, ax = plt.subplots()
fig.set_size_inches(8 * phi, 8 )
plt.scatter(X, Y, alpha=0.3) # Plot the raw data
plt.plot(X2, Y_hat, 'r', alpha=0.9); # Add the regression line, colored in red
plt.xlabel(series_x.name)
plt.ylabel('Y values')
return model.summary()
data = get_data()
data = data[data['Engine in overrun'] != 1]
data = data[data["Engine idling"] != 1]
data = data[data['Engine decelerating slowly'] != 1]
linreg(data['Load'], data['AFR'])
Dep. Variable: | y | R-squared: | 0.210 |
---|---|---|---|
Model: | OLS | Adj. R-squared: | 0.210 |
Method: | Least Squares | F-statistic: | 3970. |
Date: | Tue, 26 Oct 2021 | Prob (F-statistic): | 0.00 |
Time: | 14:46:18 | Log-Likelihood: | -25020. |
No. Observations: | 14957 | AIC: | 5.004e+04 |
Df Residuals: | 14955 | BIC: | 5.006e+04 |
Df Model: | 1 | ||
Covariance Type: | nonrobust |
coef | std err | t | P>|t| | [0.025 | 0.975] | |
---|---|---|---|---|---|---|
const | 16.0487 | 0.022 | 728.303 | 0.000 | 16.006 | 16.092 |
x1 | -0.0239 | 0.000 | -63.009 | 0.000 | -0.025 | -0.023 |
Omnibus: | 12718.343 | Durbin-Watson: | 0.117 |
---|---|---|---|
Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 470888.140 |
Skew: | 3.954 | Prob(JB): | 0.00 |
Kurtosis: | 29.326 | Cond. No. | 122. |
x = data[(data['Load'] < 60) & (data['AFR'] > 15)]
not_x = data[(data['Load'] >= 60) & (data['AFR'] <= 15)]
probs = pd.DataFrame()
for column in x.columns:
probs.loc[column, 'x' ] = x[column].mean()
probs.loc[column, 'not_x'] = not_x[column].mean()
probs['difference'] = probs['x'] - probs['not_x']
probs['pct_error'] = probs['difference']/probs['not_x']
probs = probs.sort_values('pct_error')
probs = probs.dropna()
probs = probs.replace(np.inf, probs[probs['pct_error'].apply(abs) != np.inf]['pct_error'].max() + 1)
fig, ax = plt.subplots()
fig.set_size_inches(8 , 8 * phi)
sea.barplot( probs['pct_error'], probs.index)
sea.despine(ax = ax)
C:\Users\bruce\anaconda3\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. warnings.warn(
time_spent = len(x) * data['Time'].diff().mean()
'{:,.2f} seconds in a {:.0f} minute drive - % {:,.2f} of driving time'.format(time_spent, data['Time'].max()/60,100* time_spent/data["Time"].max())
'136.06 seconds in a 10 minute drive - % 22.32 of driving time'
import os
os.system('jupyter nbconvert --to html runNotebook.ipynb')