print(med.__file__)# Shows file pathifhasattr(med.Global,'version_info'):print(med.Global.version_info)# Shows compilation date and git commit hash when compliled "version"
Inspect Available Functions
Documentation is a work in progress.
To inspect available methods:
## This is not the proper way to work with Python, since Python loops are very slow, however, it's nice as a poc.## Don't do this, using python loops is slow - use get_sig to retrieve dataframesigname='Albumin'rep=med.PidRepository()rep.read_all('/home/Repositories/THIN/thin_jun2017/thin.repository',[],[signame])print(med.cerr())forpidinrep.pids[20:30]:usv=rep.uget(pid,rep.sig_id(signame))iflen(usv)>0:print('\n\n')forrecinusv:print("Patient {} had {}={:.2} at {}".format(pid,signame,rec.val(),rec.date()))
When using "get_sig" function, there is no need to call "read_all" before, "init" is enough. get_sig, loads the signal automatically from disk if needed and not loaded
When using "get_sig" function, there is no need to call "read_all" before, "init" is enough. get_sig, loads the signal automatically from disk if needed and not loaded.
We can use dictionaries to query specific categorical codes and their hierarchies.
For example, by defining "ICD10_CODE:J00-J99", we'll capture all codes within this group based on the dictionary's definition of ICD10.
This method relies on predefined parent-child pairs for hierarchy and does not use regular expressions.
It is not limited to ICD10, ICD9 or specific known code system, but you will need to define the dictionaries correctly and their hierarchies.
# readmissions is data frame with readmitted patientsrep=med.PidRepository()rep.read_all(FLAGS.rep,readmissions.pid.values.astype('int32'),['ADMISSION','DIAGNOSIS_IP','DIAGNOSIS_OP'])admissions=rep.get_sig('ADMISSION').rename(columns={'time0':'outcomeTime'})readmissions=readmissions.merge(admissions,on=['pid','outcomeTime'],how='outer')# Handle missing admissionsreadmissions.loc[((readmissions.outcome==1)|(readmissions.outcome==2)|(readmissions.outcome==3))&(readmissions.time1.isna()),'time1']=readmissions.loc[((readmissions.outcome==1)|(readmissions.outcome==2)|(readmissions.outcome==3))&(readmissions.time1.isna()),'outcomeTime']# Read Relevant Codesicd9=pd.read_csv(FLAGS.icd9,header=None,names=['code']).code.values# Add Adverse Events for each ICD9 code in icd9 dataframe. It also uses hierarchy defined in the dictionary, for example using "487" includes: 487.0, 487.1, 487.8, etc.lut=rep.dict.prep_sets_lookup_table(rep.dict.section_id('DIAGNOSIS_IP'),['ICD9_CODE:'+str(x)forxinicd9])ip_diagnosis=rep.get_sig('DIAGNOSIS_IP',translate=False)ip_diagnosis=ip_diagnosis[(lut[ip_diagnosis.val0]!=0)]op_diagnosis=rep.get_sig('DIAGNOSIS_OP',translate=False)op_diagnosis=op_diagnosis[(lut[op_diagnosis.val0]!=0)]
rep_path=''#Path of repositroymodel_file=''#Path of MedModelsamples_file=''#path of samples or load samples from DataFrame using: samples.from_df(dataframe_object with the right columns)print("Reading basic Repository structure for fitting model")rep=med.PidRepository()rep.init(rep_path)#init model for first proccesing of "model.fit_for_repository"print("Reading Model")model=med.Model()model.read_from_file(model_file)model.fit_for_repository(rep)signalNamesSet=model.get_required_signal_names()#Get list of relevant signals the model needed to fetch from repositoryprint("Reading Samples")samples=med.Samples()samples.read_from_file(samples_file)ids=samples.get_ids()#Fetch relevant ids from samples to read from repositoryprint("Reading Repository")rep.read_all(rep_path,ids,signalNamesSet)#read needed repository data#Apply model:model.apply(rep,samples)df=samples.to_df()df.to_csv('output_file')samples.write_to_file('write_to_samples_file')#feature matrix exists in - model.features.to_df() . The "samples" object now has the scores
rep_path=''#Path of repositroyjson_model=''#Path of jsonsamples_file=''#path of samples or load samples from DataFrame using: samples.from_df(dataframe_object with the right columns)print("Reading basic Repository structure for fitting model")rep=med.PidRepository()rep.init(rep_path)#init model for first proccesing of "model.fit_for_repository"print("Reading Model")model=med.Model()model.init_from_json_file(model_file)model.fit_for_repository(rep)signalNamesSet=model.get_required_signal_names()#Get list of relevant signals the model needed to fetch from repositoryprint("Reading Samples")samples=med.Samples()samples.read_from_file(samples_file)ids=samples.get_ids()#Fetch relevant ids from samples to read from repositoryprint("Reading Repository")rep.read_all(rep_path,ids,signalNamesSet)#read needed repository data#Learn model:model.learn(rep,samples)model.features.to_df().write_to_file('write_to_matrix_file')
importpandasaspddf=pd.read_feather('/nas1/Work/Users/Ilya/Mayo/Feathers/predictions_073.feather')df=df[['true_V','prob_V']]#Example of Analyzing df with bootstrapbt=med.Bootstrap()res=bt.bootstrap(df['prob_V'],df['true_V'])all_measurment_names=res.keys()print('AUC: %2.3f [%2.3f - %2.3f]'%(res['AUC_Mean'],res['AUC_CI.Lower.95'],res['AUC_CI.Upper.95']))#Can convert to dataframe with Measurement and Value columns:res_df=res.to_df()res_df[res_df['Measurement'].str.startswith('AUC')]