In [85]:
import pandas as pd
from glob import glob
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sea
from IPython.display import display,Markdown, HTML
import inflection
import numpy as np
from statsmodels import regression
import statsmodels.api as sm
import math


def printmd(text, fmts = {},all_fmt = '{:,.2f}', humanize = True):
    if type(text) == pd.Series:
        text = pd.DataFrame(text)
        
    if type(text) == pd.DataFrame:
        text = text.copy()
        for col in text.columns:
            try:
                text[col] = pd.to_numeric(text[col])
                fmt = all_fmt
                if col in fmts.keys():
                    fmt = fmts[col]
                    
                text[col] = text[col].apply(lambda x: fmt.format(x))
            except Exception as e:
                pass
        if humanize:
            text.columns = text.columns.map(inflection.humanize)
        display(HTML(text.to_html()))
        return
    
    display(Markdown(text))
    
phi =  1.6180339887498948420
In [2]:
import xmltodict

def conv_text(text):
    ans = [float(x.strip()) for x in text.split('\n')]
    return ans

def load_ve():
    with open('ve1.table') as f:
        x = f.read()
        x = xmltodict.parse(x)
        x = x['tableData']['table']
        x_axis = conv_text(x['xAxis']['#text'])
        y_axis = conv_text(x['yAxis']['#text'])
        z_values = x['zValues']['#text']
        rows = z_values.split('\n')

        data = []
        for row in rows:
            data.append(row.strip().split(' '))

        data = pd.DataFrame(data, index = y_axis, columns=x_axis)
        for col in data.columns:
            data[col] = pd.to_numeric(data[col])
    return data
In [3]:
files = glob('*.msl')
dtypes = False
cache = {}
def get_data():
    global cache
    global dtypes
    if 'data' in cache.keys():
        return cache['data'].copy()
    file_list = dict(zip(list(range(len(files))),files))
    print(file_list)
    num = input('File number?')
    datas =[]
    
    for i in num.split('-'):
        with open(file_list[int(i)]) as f:
            raw_data = f.read()
            lines = raw_data.split('\n')
            columns = lines[2].split('\t')
            dtypes = lines[3].split('\t')
            data = lines[4:-1]
            data = pd.DataFrame(data = [x.split('\t') for x in data])
            data.columns = columns
            dtypes = dict(zip(columns, dtypes))
            datas.append(data)
            
    data = pd.concat(datas)
        
    for col in tqdm(data.columns):
        data[col] = pd.to_numeric(data[col].str.replace("NaN", ""))
    cache['data'] = data.copy()
    return data
    
In [4]:
data = get_data()
{0: '2019-11-17_19.07.09.msl', 1: '2019-11-18_11.11.59.msl', 2: '2019-11-18_18.03.44_drive_home.msl', 3: '2019-11-19_08.04.02_morning.msl', 4: '2019-11-19_12.00.12_puppy_walk.msl', 5: '2019-11-19_12.00.12_puppy_walk_b.msl', 6: '2019-11-19_16.16.04_Drive_home.msl', 7: '2019-11-21_08_to_work.51.34.msl'}
File number?7
100%|██████████████████████| 141/141 [00:07<00:00, 18.54it/s]
In [9]:
data = get_data()

' , '.join(data.columns)
Out[9]:
'Time , SecL , RPM , MAP , Boost psi , TPS , TPSADC , MAF , MAFload , MAF volts , MAF Freq , AFR , MAT , CLT , Engine , Batt V , EGO cor1 , Fuel: Air cor , Fuel: Warmup cor , Fuel: Baro cor , Fuel: Total cor , Fuel: Accel enrich , Fuel: Accel PW , VE1 , PW , Duty Cycle1 , VE2 , PW2 , Duty Cycle2 , VE1 raw , VE2 raw , VE3 raw , VE4 raw , Seq PW1 , Seq PW2 , Seq PW3 , Seq PW4 , Seq PW5 , Seq PW6 , Seq PW7 , Seq PW8 , Seq PW9 , Seq PW10 , Seq PW11 , Seq PW12 , SPK: Spark Advance , SPK: Knock retard , Knock in , SPK: Cold advance , SPK: Traction retard , SPK: CEL retard , SPK: Fuel cut retard , SPK: External advance , SPK: Base Spark Advance , SPK: Idle Correction Advance , SPK: MAT Retard , SPK: Flex Advance , SPK: Spark Table 1 , SPK: Spark Table 2 , SPK: Spark Table 3 , SPK: Spark Table 4 , SPK: Revlim Retard , SPK: ALS Timing , SPK: Launch Timing , SPK: 3-step Timing , SPK: Launch VSS Retard , Dwell , Barometer , PWM Idle duty , Closed-loop idle target RPM , Closed-loop idle RPM error , AFR 1 Target , AFR 2 Target , AFR 1 Error , AFR 2 Error , TPSdot , MAPdot , RPMdot , Load , Secondary load , Ign load , Secondary ign load , AFR load , Injector timing pri , Injector timing sec , ECU Temperature , canin1_8 , canout1_8 , canout9_16 , Timing err , Lost sync count , Lost sync reason , Fuel flow cc/min , VSS1 , VSS1 ms-1 , MPG(USA) , MPG(UK) , VSS1dot , Status1 , Status2 , Status3 , Status4 , Status5 , Status5s , Status6 , Status7 , Status8 , Status9 , TPS accel , MAP accel , Total accel , porta , portb , porteh , portk , portmj , portp , portt , CEL status , CEL status2 , CEL error code , Fuel temperature cor , Fuel pressure cor , Engine in cruise state , Engine accelerating slowly , Engine decelerating slowly , Engine in overrun , Engine idling , Engine WOT , TC slip * time , Loop , CAN error bits , CAN error count , Mainloop time , Fuel Flow cc , Fuel Flow lph , Trip Meter Miles , Odometer Miles , MPH , Power , Torque'
In [210]:
dtypes['MPH']
Out[210]:
'MPH'
In [10]:
print("Drive Length")
print(round(data['Time'].max()/60), 'Minutes')

print("Average AFR")
print(data['AFR'].mean())

print("Average AFR under  Positive TPSDOT Acceleration")
data[data['TPSdot'] > 0]['AFR'].mean()
Drive Length
16 Minutes
Average AFR
12.518368918446006
Average AFR under  Positive TPSDOT Acceleration
Out[10]:
12.547374938088177
In [11]:
def engine_pic(data):
    fig, ax = plt.subplots()
    fig.set_size_inches(8*1.6, 8)
    
    sc = ax.scatter(data['RPM'], data['Load'], alpha = .3, c = data['AFR'])
    plt.colorbar(sc)
    ax.set_xlabel("RPM")
    ax.set_ylabel("Load")
    plt.show()

Engine Pic

In [12]:
data = get_data()
engine_pic(data)

Color is AFR

Temps Over Time

In [235]:
data = get_data()
data.set_index("Time")['CLT'].plot()
Out[235]:
<matplotlib.axes._subplots.AxesSubplot at 0x2518edff780>
In [14]:
min_clt = 60

Average AFRs Across different load values ( CLT > 60)

In [15]:
data = get_data()
data = data[data["CLT"] > min_clt]
data['rpm_binned'] = (data['RPM']/100).apply(int) * 100
data['load_binned'] = (data['Load']/10).apply(int) * 10
table = data.pivot_table('AFR', index = "load_binned", columns='rpm_binned', aggfunc= np.mean)
fig, ax = plt.subplots()
fig.set_size_inches(8 * phi, 8)
sea.heatmap(table.loc[reversed(table.index)], center = 14.7)
Out[15]:
<matplotlib.axes._subplots.AxesSubplot at 0x25190ae0438>

We're running pretty Rich down low

VE

In [16]:
data = get_data()
data = data[data["CLT"] > min_clt]
data['rpm_binned'] = (data['RPM']/100).apply(int) * 100
data['load_binned'] = (data['Load']/10).apply(int) * 10
table = data.pivot_table('VE1', index = "load_binned", columns='rpm_binned', aggfunc= np.mean)
fig, ax = plt.subplots()
fig.set_size_inches(8 * phi, 8)
sea.heatmap(table.loc[reversed(table.index)], center = data['VE1'].mean())
Out[16]:
<matplotlib.axes._subplots.AxesSubplot at 0x2518efd6160>

VE suggested by computer

AFR ERRORS

vs target AFR

In [17]:
data = get_data()
data = data[data["CLT"] > min_clt]
data['rpm_binned'] = (data['RPM']/100).apply(int) * 100
data['load_binned'] = (data['Load']/10).apply(int) * 10
table = data.pivot_table('AFR 1 Error', index = "load_binned", columns='rpm_binned', aggfunc= np.median)
fig, ax = plt.subplots()
fig.set_size_inches(8 * phi, 8)
sea.heatmap(table.loc[reversed(table.index)], center =  0)
errors = table.copy()

I originally though I would need to do lag time calculations on this, but averaging the data in each bin should do the trick. I can see a few weird spots but I don't know if that is due to low amount of observations.

Confidence of estimates

I just count the number of time I have an observation in each bin, that way I can weight my adjustments. to avoid over adjusting an area based on very few observations

In [243]:
data = get_data()

data['count'] = 1
data = data[data["CLT"] > min_clt]
data['rpm_binned'] = (data['RPM']/100).apply(int) * 100
data['load_binned'] = (data['Load']/10).apply(int) * 10
table = data.pivot_table('count', index = "load_binned", columns='rpm_binned', aggfunc= np.sum)
table = table/100
for col in table.columns:
    table[col] = table[col].apply(lambda x: min(x, 1))
fig, ax = plt.subplots()
fig.set_size_inches(8 * phi, 8)
sea.heatmap(table.loc[reversed(table.index)], center = 0, cmap = 'Spectral')
conf = table.copy()

Reccomended Changes by combining error and estimate confidence

In [244]:
fig, ax = plt.subplots()
fig.set_size_inches(8 * phi, 8)
sea.heatmap((errors  * conf).loc[reversed(conf.index)], center =  0)
Out[244]:
<matplotlib.axes._subplots.AxesSubplot at 0x2518bf28208>
In [21]:
ve = load_ve()
In [211]:
((errors/errors.max().max()) * conf/4) + 1
Out[211]:
rpm_binned 800 900 1000 1100 1200 1300 1400 1500 1600 1700 ... 5600 5700 5800 5900 6000 6100 6200 6300 6400 6500
load_binned
10 NaN NaN NaN NaN NaN NaN NaN 0.996094 0.986719 0.950417 ... 0.995417 0.995417 0.990833 0.993125 NaN NaN NaN NaN NaN NaN
20 NaN 1.044271 1.010417 0.822917 0.798906 0.833594 0.776042 0.765625 0.770833 0.786458 ... 0.998073 0.997708 NaN 0.997708 0.988542 NaN NaN NaN NaN NaN
30 1.004740 1.062500 1.052083 1.036094 0.968281 0.985625 1.005156 0.976250 0.957812 0.962500 ... 0.997656 NaN 0.998385 NaN 0.998021 NaN NaN NaN NaN NaN
40 NaN 1.004687 1.026354 0.997448 0.994167 0.998125 0.991615 0.990156 0.985104 0.973125 ... 0.997604 NaN NaN NaN 0.997812 NaN NaN NaN NaN NaN
50 0.999531 1.000625 0.994062 0.994271 1.000000 0.998698 1.006302 1.003906 0.999219 1.014844 ... NaN NaN NaN NaN 0.997448 NaN NaN 0.997500 NaN NaN
60 NaN 1.000625 1.000625 0.999687 0.997969 0.995208 0.999740 NaN 1.014063 1.030677 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
70 NaN NaN NaN 0.999740 1.000938 1.003281 1.006094 1.006250 1.010885 1.007448 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
80 NaN NaN 0.999427 NaN NaN 1.000573 NaN NaN 1.001562 1.012604 ... NaN NaN NaN 0.997604 NaN NaN NaN NaN NaN NaN
90 1.000573 NaN 1.000625 NaN NaN 0.999687 NaN 1.002135 NaN 1.001927 ... NaN NaN NaN 0.997708 NaN NaN NaN NaN NaN NaN
100 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN 0.997865 NaN NaN NaN NaN NaN
110 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
120 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.998125
130 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 0.999740 NaN NaN NaN NaN NaN NaN NaN NaN NaN
140 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 0.998177 0.999896 0.999062 0.997500 0.998333 0.997552 0.998437 0.998646 NaN NaN
150 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 0.997656 0.990521 0.993542 0.987187 0.986250 0.982396 0.994583 0.992188 0.998281 NaN

15 rows × 58 columns

Adjust based on something like this? TODO: Figure how how to adjust

Current VE Table

In [245]:
ve
Out[245]:
500.0 800.0 1100.0 1400.0 2000.0 2600.0 3100.0 3700.0 4300.0 4900.0 5400.0 6000.0 6500.0 7000.0 7200.0 7500.0
15.0 31.0 31.5 22.9 23.6 23.4 19.6 17.2 9.6 8.3 9.6 10.6 13.5 19.8 23.0 23.0 23.0
20.0 30.5 33.0 25.0 23.5 16.1 11.9 11.4 10.6 15.2 26.9 19.8 16.3 20.2 37.7 37.9 38.0
25.0 29.8 20.9 20.9 29.6 17.7 16.0 16.6 26.3 36.2 41.6 41.0 40.5 36.8 44.6 45.3 45.5
30.0 26.0 20.1 19.5 25.8 16.7 17.9 29.6 36.8 46.2 52.3 51.1 50.8 51.6 52.3 52.6 53.0
35.0 21.9 21.3 21.3 28.6 28.8 30.0 36.3 47.7 57.6 61.0 60.0 58.5 57.5 56.5 56.0 55.0
40.0 26.0 26.3 26.8 37.6 30.0 31.4 34.6 49.2 58.1 65.0 65.0 64.0 62.0 60.0 58.0 57.0
45.0 24.9 49.1 52.2 49.4 45.4 42.8 44.1 53.6 62.5 68.5 75.0 73.0 71.0 69.0 67.0 67.0
50.0 25.6 53.1 57.3 50.7 48.9 50.1 52.5 58.2 66.3 73.4 77.1 76.0 74.0 72.0 71.0 70.0
60.0 27.4 57.7 58.6 57.6 60.8 61.3 61.9 62.0 69.7 76.0 78.7 80.8 78.4 76.7 75.0 74.0
70.0 30.5 60.4 65.7 64.6 66.1 65.2 69.3 69.7 75.8 78.6 83.5 82.3 79.2 77.7 78.2 78.0
80.0 34.6 65.5 69.1 67.5 74.4 77.2 79.8 72.1 75.4 78.5 84.1 82.9 80.0 77.6 78.6 78.8
90.0 38.0 59.1 61.6 65.1 70.8 77.3 79.6 79.4 88.4 86.9 88.0 85.1 83.2 87.1 87.8 92.5
100.0 62.3 64.0 67.8 69.9 72.5 80.3 80.1 77.2 89.4 90.1 90.9 89.2 86.9 89.7 91.5 97.3
110.0 60.4 65.5 68.4 69.0 72.0 80.8 83.0 80.2 90.8 92.9 88.2 93.2 93.7 102.0 95.5 95.0
150.0 67.9 70.4 70.9 70.0 73.0 77.2 91.3 87.4 90.7 95.1 90.5 94.9 96.9 106.0 98.8 98.0
220.0 81.0 79.6 77.8 76.0 79.0 85.8 97.5 92.9 93.7 98.8 94.1 101.3 109.7 118.0 113.7 114.8
In [24]:
sea.heatmap(ve.loc[reversed(ve.index)])
Out[24]:
<matplotlib.axes._subplots.AxesSubplot at 0x2519134d080>

Need to translate bins...

TODO add code to adjust the VE based on error and confidence of estimates

In [93]:
def linreg(series_x,series_y):
    X = series_x.values
    Y = series_y.values
    # Running the linear regression
    X = sm.add_constant(X)
    model = regression.linear_model.OLS(Y, X).fit()
    a = model.params[0]
    b = model.params[1]
    X = X[:, 1]

    # Return summary of the regression and plot results
    X2 = np.linspace(X.min(), X.max(), 100)
    Y_hat = X2 * b + a
    fig, ax = plt.subplots()
    fig.set_size_inches(8 * phi, 8 )
    plt.scatter(X, Y, alpha=0.3) # Plot the raw data
    plt.plot(X2, Y_hat, 'r', alpha=0.9);  # Add the regression line, colored in red
    plt.xlabel(series_x.name)
    plt.ylabel('Y values')
    return model.summary()

Removing Idle state, Overrun State, and Decelerating Slowly State - Compare the Load vs AFR

In [213]:
data = get_data()

data = data[data['Engine in overrun'] != 1]
data = data[data["Engine idling"] != 1]
data = data[data['Engine decelerating slowly'] != 1]
linreg(data['Load'], data['AFR'])
Out[213]:
OLS Regression Results
Dep. Variable: y R-squared: 0.000
Model: OLS Adj. R-squared: -0.000
Method: Least Squares F-statistic: 0.7021
Date: Thu, 21 Nov 2019 Prob (F-statistic): 0.402
Time: 10:41:12 Log-Likelihood: -12288.
No. Observations: 5816 AIC: 2.458e+04
Df Residuals: 5814 BIC: 2.459e+04
Df Model: 1
Covariance Type: nonrobust
coef std err t P>|t| [0.025 0.975]
const 12.6487 0.059 212.671 0.000 12.532 12.765
x1 -0.0008 0.001 -0.838 0.402 -0.003 0.001
Omnibus: 16.605 Durbin-Watson: 0.178
Prob(Omnibus): 0.000 Jarque-Bera (JB): 16.711
Skew: 0.131 Prob(JB): 0.000235
Kurtosis: 2.994 Cond. No. 149.


Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

Questions:

  • Should I lean up the area under high load a little bit?
  • How do I reduce variance in my AFRS under 100 load? ?
In [215]:
x = data[(data['Load'] < 60) & (data['AFR'] > 15)]
not_x = data[(data['Load'] >= 60) & (data['AFR'] <= 15)]
In [216]:
probs = pd.DataFrame()
for column in x.columns:
    probs.loc[column, 'x' ] = x[column].mean()
    probs.loc[column,  'not_x'] = not_x[column].mean()
    
probs['difference'] = probs['x'] - probs['not_x']
probs['pct_error'] = probs['difference']/probs['not_x']
probs = probs.sort_values('pct_error')
probs = probs.dropna()
probs = probs.replace(np.inf, probs[probs['pct_error'].apply(abs) != np.inf]['pct_error'].max() + 1)

What variables differ the most in the less than 60 load with more than 15 afrs?

In [217]:
fig, ax = plt.subplots()
fig.set_size_inches(8 , 8 * phi)
sea.barplot( probs['pct_error'], probs.index)
sea.despine(ax = ax)

How long do we stay in this state that I consider a "problem"?

Estimated by taking number of records during more than 15 afr and less than 60 loads and multiplying it by the average time difference in the dataset

In [231]:
time_spent = len(x) * data['Time'].diff().mean()
'{:,.2f} seconds  in a {:.0f} minute drive - % {:,.2f} of driving time'.format(time_spent, data['Time'].max()/60,100* time_spent/data["Time"].max())
Out[231]:
'41.55 seconds  in a 16 minute drive - % 4.41 of driving time'