import pandas as pd
from glob import glob
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sea
from IPython.display import display,Markdown, HTML
import inflection
import numpy as np
from statsmodels import regression
import statsmodels.api as sm
import math


def printmd(text, fmts = {},all_fmt = '{:,.2f}', humanize = True):
    if type(text) == pd.Series:
        text = pd.DataFrame(text)
        
    if type(text) == pd.DataFrame:
        text = text.copy()
        for col in text.columns:
            try:
                text[col] = pd.to_numeric(text[col])
                fmt = all_fmt
                if col in fmts.keys():
                    fmt = fmts[col]
                    
                text[col] = text[col].apply(lambda x: fmt.format(x))
            except Exception as e:
                pass
        if humanize:
            text.columns = text.columns.map(inflection.humanize)
        display(HTML(text.to_html()))
        return
    
    display(Markdown(text))
    
phi =  1.6180339887498948420

import xmltodict

def conv_text(text):
    ans = [float(x.strip()) for x in text.split('\n')]
    return ans

def load_ve():
    with open('ve1.table') as f:
        x = f.read()
        x = xmltodict.parse(x)
        x = x['tableData']['table']
        x_axis = conv_text(x['xAxis']['#text'])
        y_axis = conv_text(x['yAxis']['#text'])
        z_values = x['zValues']['#text']
        rows = z_values.split('\n')

        data = []
        for row in rows:
            data.append(row.strip().split(' '))

        data = pd.DataFrame(data, index = y_axis, columns=x_axis)
        for col in data.columns:
            data[col] = pd.to_numeric(data[col])
    return data

files = glob('*.msl')
dtypes = False
cache = {}
def get_data():
    global cache
    global dtypes
    if 'data' in cache.keys():
        return cache['data'].copy()
    file_list = dict(zip(list(range(len(files))),files))
    print(file_list)
    num = input('File number?')
    datas =[]
    
    for i in num.split('-'):
        with open(file_list[int(i)]) as f:
            raw_data = f.read()
            lines = raw_data.split('\n')
            columns = lines[2].split('\t')
            dtypes = lines[3].split('\t')
            data = lines[4:-1]
            data = pd.DataFrame(data = [x.split('\t') for x in data])
            data.columns = columns
            dtypes = dict(zip(columns, dtypes))
            datas.append(data)
            
    data = pd.concat(datas)
        
    for col in tqdm(data.columns):
        data[col] = pd.to_numeric(data[col].str.replace("NaN", ""))
    cache['data'] = data.copy()
    return data

data = get_data()

{0: '2019-11-17_19.07.09.msl', 1: '2019-11-18_11.11.59.msl', 2: '2019-11-18_18.03.44_drive_home.msl', 3: '2019-11-19_08.04.02_morning.msl', 4: '2019-11-19_12.00.12_puppy_walk.msl', 5: '2019-11-19_12.00.12_puppy_walk_b.msl', 6: '2019-11-19_16.16.04_Drive_home.msl', 7: '2019-11-21_08_to_work.51.34.msl'}
File number?7

100%|██████████████████████| 141/141 [00:07<00:00, 18.54it/s]

data = get_data()

' , '.join(data.columns)

'Time , SecL , RPM , MAP , Boost psi , TPS , TPSADC , MAF , MAFload , MAF volts , MAF Freq , AFR , MAT , CLT , Engine , Batt V , EGO cor1 , Fuel: Air cor , Fuel: Warmup cor , Fuel: Baro cor , Fuel: Total cor , Fuel: Accel enrich , Fuel: Accel PW , VE1 , PW , Duty Cycle1 , VE2 , PW2 , Duty Cycle2 , VE1 raw , VE2 raw , VE3 raw , VE4 raw , Seq PW1 , Seq PW2 , Seq PW3 , Seq PW4 , Seq PW5 , Seq PW6 , Seq PW7 , Seq PW8 , Seq PW9 , Seq PW10 , Seq PW11 , Seq PW12 , SPK: Spark Advance , SPK: Knock retard , Knock in , SPK: Cold advance , SPK: Traction retard , SPK: CEL retard , SPK: Fuel cut retard , SPK: External advance , SPK: Base Spark Advance , SPK: Idle Correction Advance , SPK: MAT Retard , SPK: Flex Advance , SPK: Spark Table 1 , SPK: Spark Table 2 , SPK: Spark Table 3 , SPK: Spark Table 4 , SPK: Revlim Retard , SPK: ALS Timing , SPK: Launch Timing , SPK: 3-step Timing , SPK: Launch VSS Retard , Dwell , Barometer , PWM Idle duty , Closed-loop idle target RPM , Closed-loop idle RPM error , AFR 1 Target , AFR 2 Target , AFR 1 Error , AFR 2 Error , TPSdot , MAPdot , RPMdot , Load , Secondary load , Ign load , Secondary ign load , AFR load , Injector timing pri , Injector timing sec , ECU Temperature , canin1_8 , canout1_8 , canout9_16 , Timing err , Lost sync count , Lost sync reason , Fuel flow cc/min , VSS1 , VSS1 ms-1 , MPG(USA) , MPG(UK) , VSS1dot , Status1 , Status2 , Status3 , Status4 , Status5 , Status5s , Status6 , Status7 , Status8 , Status9 , TPS accel , MAP accel , Total accel , porta , portb , porteh , portk , portmj , portp , portt , CEL status , CEL status2 , CEL error code , Fuel temperature cor , Fuel pressure cor , Engine in cruise state , Engine accelerating slowly , Engine decelerating slowly , Engine in overrun , Engine idling , Engine WOT , TC slip * time , Loop , CAN error bits , CAN error count , Mainloop time , Fuel Flow cc , Fuel Flow lph , Trip Meter Miles , Odometer Miles , MPH , Power , Torque'

dtypes['MPH']

'MPH'

print("Drive Length")
print(round(data['Time'].max()/60), 'Minutes')

print("Average AFR")
print(data['AFR'].mean())

print("Average AFR under  Positive TPSDOT Acceleration")
data[data['TPSdot'] > 0]['AFR'].mean()

Drive Length
16 Minutes
Average AFR
12.518368918446006
Average AFR under  Positive TPSDOT Acceleration

12.547374938088177

def engine_pic(data):
    fig, ax = plt.subplots()
    fig.set_size_inches(8*1.6, 8)
    
    sc = ax.scatter(data['RPM'], data['Load'], alpha = .3, c = data['AFR'])
    plt.colorbar(sc)
    ax.set_xlabel("RPM")
    ax.set_ylabel("Load")
    plt.show()

Engine Pic¶

data = get_data()
engine_pic(data)

Color is AFR

Temps Over Time¶

data = get_data()
data.set_index("Time")['CLT'].plot()

<matplotlib.axes._subplots.AxesSubplot at 0x2518edff780>

min_clt = 60

Average AFRs Across different load values ( CLT > 60)¶

data = get_data()
data = data[data["CLT"] > min_clt]
data['rpm_binned'] = (data['RPM']/100).apply(int) * 100
data['load_binned'] = (data['Load']/10).apply(int) * 10
table = data.pivot_table('AFR', index = "load_binned", columns='rpm_binned', aggfunc= np.mean)
fig, ax = plt.subplots()
fig.set_size_inches(8 * phi, 8)
sea.heatmap(table.loc[reversed(table.index)], center = 14.7)

<matplotlib.axes._subplots.AxesSubplot at 0x25190ae0438>

We're running pretty Rich down low

VE¶

data = get_data()
data = data[data["CLT"] > min_clt]
data['rpm_binned'] = (data['RPM']/100).apply(int) * 100
data['load_binned'] = (data['Load']/10).apply(int) * 10
table = data.pivot_table('VE1', index = "load_binned", columns='rpm_binned', aggfunc= np.mean)
fig, ax = plt.subplots()
fig.set_size_inches(8 * phi, 8)
sea.heatmap(table.loc[reversed(table.index)], center = data['VE1'].mean())

<matplotlib.axes._subplots.AxesSubplot at 0x2518efd6160>

VE suggested by computer

AFR ERRORS¶

vs target AFR

data = get_data()
data = data[data["CLT"] > min_clt]
data['rpm_binned'] = (data['RPM']/100).apply(int) * 100
data['load_binned'] = (data['Load']/10).apply(int) * 10
table = data.pivot_table('AFR 1 Error', index = "load_binned", columns='rpm_binned', aggfunc= np.median)
fig, ax = plt.subplots()
fig.set_size_inches(8 * phi, 8)
sea.heatmap(table.loc[reversed(table.index)], center =  0)
errors = table.copy()

I originally though I would need to do lag time calculations on this, but averaging the data in each bin should do the trick. I can see a few weird spots but I don't know if that is due to low amount of observations.

Confidence of estimates¶

I just count the number of time I have an observation in each bin, that way I can weight my adjustments. to avoid over adjusting an area based on very few observations

data = get_data()

data['count'] = 1
data = data[data["CLT"] > min_clt]
data['rpm_binned'] = (data['RPM']/100).apply(int) * 100
data['load_binned'] = (data['Load']/10).apply(int) * 10
table = data.pivot_table('count', index = "load_binned", columns='rpm_binned', aggfunc= np.sum)
table = table/100
for col in table.columns:
    table[col] = table[col].apply(lambda x: min(x, 1))
fig, ax = plt.subplots()
fig.set_size_inches(8 * phi, 8)
sea.heatmap(table.loc[reversed(table.index)], center = 0, cmap = 'Spectral')
conf = table.copy()

Reccomended Changes by combining error and estimate confidence¶

fig, ax = plt.subplots()
fig.set_size_inches(8 * phi, 8)
sea.heatmap((errors  * conf).loc[reversed(conf.index)], center =  0)

<matplotlib.axes._subplots.AxesSubplot at 0x2518bf28208>

ve = load_ve()

((errors/errors.max().max()) * conf/4) + 1

Adjust based on something like this? TODO: Figure how how to adjust

Current VE Table¶

ve

sea.heatmap(ve.loc[reversed(ve.index)])

<matplotlib.axes._subplots.AxesSubplot at 0x2519134d080>

Need to translate bins...

TODO add code to adjust the VE based on error and confidence of estimates¶

def linreg(series_x,series_y):
    X = series_x.values
    Y = series_y.values
    # Running the linear regression
    X = sm.add_constant(X)
    model = regression.linear_model.OLS(Y, X).fit()
    a = model.params[0]
    b = model.params[1]
    X = X[:, 1]

    # Return summary of the regression and plot results
    X2 = np.linspace(X.min(), X.max(), 100)
    Y_hat = X2 * b + a
    fig, ax = plt.subplots()
    fig.set_size_inches(8 * phi, 8 )
    plt.scatter(X, Y, alpha=0.3) # Plot the raw data
    plt.plot(X2, Y_hat, 'r', alpha=0.9);  # Add the regression line, colored in red
    plt.xlabel(series_x.name)
    plt.ylabel('Y values')
    return model.summary()

Removing Idle state, Overrun State, and Decelerating Slowly State - Compare the Load vs AFR¶

data = get_data()

data = data[data['Engine in overrun'] != 1]
data = data[data["Engine idling"] != 1]
data = data[data['Engine decelerating slowly'] != 1]
linreg(data['Load'], data['AFR'])

Questions:

Should I lean up the area under high load a little bit?
How do I reduce variance in my AFRS under 100 load? ?

x = data[(data['Load'] < 60) & (data['AFR'] > 15)]
not_x = data[(data['Load'] >= 60) & (data['AFR'] <= 15)]

probs = pd.DataFrame()
for column in x.columns:
    probs.loc[column, 'x' ] = x[column].mean()
    probs.loc[column,  'not_x'] = not_x[column].mean()
    
probs['difference'] = probs['x'] - probs['not_x']
probs['pct_error'] = probs['difference']/probs['not_x']
probs = probs.sort_values('pct_error')
probs = probs.dropna()
probs = probs.replace(np.inf, probs[probs['pct_error'].apply(abs) != np.inf]['pct_error'].max() + 1)

What variables differ the most in the less than 60 load with more than 15 afrs?¶

fig, ax = plt.subplots()
fig.set_size_inches(8 , 8 * phi)
sea.barplot( probs['pct_error'], probs.index)
sea.despine(ax = ax)

How long do we stay in this state that I consider a "problem"?¶

Estimated by taking number of records during more than 15 afr and less than 60 loads and multiplying it by the average time difference in the dataset

time_spent = len(x) * data['Time'].diff().mean()
'{:,.2f} seconds  in a {:.0f} minute drive - % {:,.2f} of driving time'.format(time_spent, data['Time'].max()/60,100* time_spent/data["Time"].max())

'41.55 seconds  in a 16 minute drive - % 4.41 of driving time'

	500.0	800.0	1100.0	1400.0	2000.0	2600.0	3100.0	3700.0	4300.0	4900.0	5400.0	6000.0	6500.0	7000.0	7200.0	7500.0
15.0	31.0	31.5	22.9	23.6	23.4	19.6	17.2	9.6	8.3	9.6	10.6	13.5	19.8	23.0	23.0	23.0
20.0	30.5	33.0	25.0	23.5	16.1	11.9	11.4	10.6	15.2	26.9	19.8	16.3	20.2	37.7	37.9	38.0
25.0	29.8	20.9	20.9	29.6	17.7	16.0	16.6	26.3	36.2	41.6	41.0	40.5	36.8	44.6	45.3	45.5
30.0	26.0	20.1	19.5	25.8	16.7	17.9	29.6	36.8	46.2	52.3	51.1	50.8	51.6	52.3	52.6	53.0
35.0	21.9	21.3	21.3	28.6	28.8	30.0	36.3	47.7	57.6	61.0	60.0	58.5	57.5	56.5	56.0	55.0
40.0	26.0	26.3	26.8	37.6	30.0	31.4	34.6	49.2	58.1	65.0	65.0	64.0	62.0	60.0	58.0	57.0
45.0	24.9	49.1	52.2	49.4	45.4	42.8	44.1	53.6	62.5	68.5	75.0	73.0	71.0	69.0	67.0	67.0
50.0	25.6	53.1	57.3	50.7	48.9	50.1	52.5	58.2	66.3	73.4	77.1	76.0	74.0	72.0	71.0	70.0
60.0	27.4	57.7	58.6	57.6	60.8	61.3	61.9	62.0	69.7	76.0	78.7	80.8	78.4	76.7	75.0	74.0
70.0	30.5	60.4	65.7	64.6	66.1	65.2	69.3	69.7	75.8	78.6	83.5	82.3	79.2	77.7	78.2	78.0
80.0	34.6	65.5	69.1	67.5	74.4	77.2	79.8	72.1	75.4	78.5	84.1	82.9	80.0	77.6	78.6	78.8
90.0	38.0	59.1	61.6	65.1	70.8	77.3	79.6	79.4	88.4	86.9	88.0	85.1	83.2	87.1	87.8	92.5
100.0	62.3	64.0	67.8	69.9	72.5	80.3	80.1	77.2	89.4	90.1	90.9	89.2	86.9	89.7	91.5	97.3
110.0	60.4	65.5	68.4	69.0	72.0	80.8	83.0	80.2	90.8	92.9	88.2	93.2	93.7	102.0	95.5	95.0
150.0	67.9	70.4	70.9	70.0	73.0	77.2	91.3	87.4	90.7	95.1	90.5	94.9	96.9	106.0	98.8	98.0
220.0	81.0	79.6	77.8	76.0	79.0	85.8	97.5	92.9	93.7	98.8	94.1	101.3	109.7	118.0	113.7	114.8

Dep. Variable:	y	R-squared:	0.000
Model:	OLS	Adj. R-squared:	-0.000
Method:	Least Squares	F-statistic:	0.7021
Date:	Thu, 21 Nov 2019	Prob (F-statistic):	0.402
Time:	10:41:12	Log-Likelihood:	-12288.
No. Observations:	5816	AIC:	2.458e+04
Df Residuals:	5814	BIC:	2.459e+04
Df Model:	1
Covariance Type:	nonrobust

	coef	std err	t	P>\|t\|	[0.025	0.975]
const	12.6487	0.059	212.671	0.000	12.532	12.765
x1	-0.0008	0.001	-0.838	0.402	-0.003	0.001

Omnibus:	16.605	Durbin-Watson:	0.178
Prob(Omnibus):	0.000	Jarque-Bera (JB):	16.711
Skew:	0.131	Prob(JB):	0.000235
Kurtosis:	2.994	Cond. No.	149.

rpm_binned	800	900	1000	1100	1200	1300	1400	1500	1600	1700	...	5600	5700	5800	5900	6000	6100	6200	6300	6400	6500
load_binned
10	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0.996094	0.986719	0.950417	...	0.995417	0.995417	0.990833	0.993125	NaN	NaN	NaN	NaN	NaN	NaN
20	NaN	1.044271	1.010417	0.822917	0.798906	0.833594	0.776042	0.765625	0.770833	0.786458	...	0.998073	0.997708	NaN	0.997708	0.988542	NaN	NaN	NaN	NaN	NaN
30	1.004740	1.062500	1.052083	1.036094	0.968281	0.985625	1.005156	0.976250	0.957812	0.962500	...	0.997656	NaN	0.998385	NaN	0.998021	NaN	NaN	NaN	NaN	NaN
40	NaN	1.004687	1.026354	0.997448	0.994167	0.998125	0.991615	0.990156	0.985104	0.973125	...	0.997604	NaN	NaN	NaN	0.997812	NaN	NaN	NaN	NaN	NaN
50	0.999531	1.000625	0.994062	0.994271	1.000000	0.998698	1.006302	1.003906	0.999219	1.014844	...	NaN	NaN	NaN	NaN	0.997448	NaN	NaN	0.997500	NaN	NaN
60	NaN	1.000625	1.000625	0.999687	0.997969	0.995208	0.999740	NaN	1.014063	1.030677	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
70	NaN	NaN	NaN	0.999740	1.000938	1.003281	1.006094	1.006250	1.010885	1.007448	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
80	NaN	NaN	0.999427	NaN	NaN	1.000573	NaN	NaN	1.001562	1.012604	...	NaN	NaN	NaN	0.997604	NaN	NaN	NaN	NaN	NaN	NaN
90	1.000573	NaN	1.000625	NaN	NaN	0.999687	NaN	1.002135	NaN	1.001927	...	NaN	NaN	NaN	0.997708	NaN	NaN	NaN	NaN	NaN	NaN
100	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	0.997865	NaN	NaN	NaN	NaN	NaN
110	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
120	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0.998125
130	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	0.999740	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
140	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	0.998177	0.999896	0.999062	0.997500	0.998333	0.997552	0.998437	0.998646	NaN	NaN
150	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	0.997656	0.990521	0.993542	0.987187	0.986250	0.982396	0.994583	0.992188	0.998281	NaN