-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathclassroom_data_preprocess.py
More file actions
38 lines (29 loc) · 1.85 KB
/
classroom_data_preprocess.py
File metadata and controls
38 lines (29 loc) · 1.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import pandas as pd
import numpy as np
import pickle
drug = pd.read_csv("classroom_data/Drug_sensitivity_AUC_(Sanger_GDSC1)_subsetted.csv")
expression = pd.read_csv("classroom_data/Expression_Public_25Q3_subsetted.csv")
mutation = pd.read_csv("classroom_data/OmicsSomaticMutationsMatrixHotspot.csv")
gefitinib = drug.loc[:, ["Unnamed: 0", "GEFITINIB (GDSC1:1010)"]]
gefitinib = gefitinib.rename(columns={'GEFITINIB (GDSC1:1010)': 'GEFITINIB'})
gefitinib_expression = gefitinib.merge(expression)
gefitinib_expression = gefitinib_expression.drop(columns="Unnamed: 0")
gefitinib_expression = gefitinib_expression.dropna(subset=["GEFITINIB"])
with open("classroom_data/GEFITINIB_Expression.pickle", 'wb') as file:
pickle.dump(gefitinib_expression, file)
mutation = mutation.drop(columns=["Unnamed: 0", 'SequencingID', 'ModelConditionID', 'IsDefaultEntryForModel', 'IsDefaultEntryForMC'])
gefitinib_mutation = gefitinib.merge(mutation, left_on = "Unnamed: 0", right_on = "ModelID")
gefitinib_mutation = gefitinib_mutation.dropna(subset=["GEFITINIB"])
gefitinib_mutation = gefitinib_mutation.drop(columns=["Unnamed: 0", "ModelID"])
with open("classroom_data/GEFITINIB_mutation.pickle", 'wb') as file:
pickle.dump(gefitinib_mutation, file)
with open("classroom_data/GEFITINIB_Expression.pickle", 'wb') as file:
pickle.dump(gefitinib_expression, file)
docetaxel = drug.loc[:, ["Unnamed: 0", "DOCETAXEL (GDSC1:1007)"]]
docetaxel = docetaxel.rename(columns={'DOCETAXEL (GDSC1:1007)': 'DOCETAXEL'})
docetaxel_expression = docetaxel.merge(expression)
docetaxel_expression = docetaxel_expression.drop(columns="Unnamed: 0")
docetaxel_expression = docetaxel_expression.dropna(subset=["DOCETAXEL"])
docetaxel_expression_s = docetaxel_expression.sample(n=600, random_state=1)
with open("classroom_data/DOCETAXEL_Expression.pickle", 'wb') as file:
pickle.dump(docetaxel_expression_s, file)