0 - Minimal Example#

This example is to illustrate core functionality of Oloren Chem Engine. In depth tutorials of these features and core classes are provided in the notebooks prefixed by ‘1’. Advanced usage is provided in the notebooks prefixed by ‘2’. Custom implementation of functionality is provided in the notebooks prefixed by ‘3’.

import olorenchemengine as oce
import pandas as pd

df = pd.read_csv("https://storage.googleapis.com/oloren-public-data/CHEMBL%20Datasets/997_2298%20-%20VEGFR1%20(CHEMBL1868).csv")
dataset = (oce.BaseDataset(data = df.to_csv(),
    structure_col = "Smiles", property_col = "pChEMBL Value") +
           oce.CleanStructures() +
           oce.ScaffoldSplit()
)
model = oce.BaseBoosting([
    oce.RandomForestModel(oce.DescriptastorusDescriptor("morgan3counts"), n_estimators=1000),
    oce.RandomForestModel(oce.OlorenCheckpoint("default"), n_estimators=1000),
    oce.ChemPropModel(epochs=20, batch_size=64)
])

model.fit(*dataset.train_dataset)
model.test(*dataset.test_dataset)
100it [00:00, 386.25it/s]
100%|██████████| 2/2 [00:00<00:00, 43.01it/s]
{'r2': 0.5604408249112198,
 'Spearman': 0.7808815382903245,
 'Explained Variance': 0.6063047803553241,
 'Max Error': 2.547735018266718,
 'Mean Absolute Error': 0.4941828751987506,
 'Mean Squared Error': 0.5353116205186318,
 'Root Mean Squared Error': 0.7316499303072691}
model.fit_cv(*dataset.train_dataset, error_model = oce.kNNwRMSD1())
oce.save(model, "tmp.oce")
results = model.predict(dataset.test_dataset[0], return_ci = True, return_vis = True)
results["vis"].iloc[32].render_ipynb()
vis = oce.VisualizeModelSim(dataset, model, eval_set="test")
vis.render_ipynb()
100it [00:00, 385.68it/s]
100%|██████████| 2/2 [00:00<00:00, 46.87it/s]