-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathadd_refseq_gen_names
More file actions
111 lines (97 loc) · 2.79 KB
/
add_refseq_gen_names
File metadata and controls
111 lines (97 loc) · 2.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#!/usr/bin/env ruby
require 'csv'
usage =<<EOF
#{$0} txt bed csv
------------------------------------------------------
* txt :: text_file
* bed :: bed_file
* csv :: csv_file
------------------------------------------------------
- VERSION 5/16/2013 ----------------------------------
------------------------------------------------------
by khayer
EOF
if ARGV.length != 3
puts usage
exit
end
text_file = ARGV[0]
bed_file = ARGV[1]
csv_file = ARGV[2]
all_genes = Hash.new
File.open(bed_file).each do |line|
next if line =~ /^#/
line.chomp!
fields = line.split("\t")
all_genes[fields[1]] = fields[-4]
end
CSV.open(csv_file, "wb") do |csv|
csv << ["GeneID","GenName","logRPKM_MUTAS","logRPKM_MUTAS3",
"logRPKM_MUTPS","logRPKM_MUTPS3","logRPKM_SIBAS",
"logRPKM_SIBAS3","logRPKM_SIBPS","logRPKM_SIBPS3",
"FC:MUTAS|SIBAS","log2_FC:MUTAS|SIBAS",
"FC:MUTAS3|SIBAS3","log2_FC:MUTAS3|SIBAS3",
"FC:MUTPS|SIBPS","log2_FC:MUTPS|SIBPS",
"FC:MUTPS3|SIBPS3","log2_FC:MUTPS3|SIBPS3",
"FC:MUTAS|MUTPS","log2_FC:MUTAS|MUTPS",
"FC:MUTAS3|MUTPS3","log2_FC:MUTAS3|MUTPS3",
"FC:FC:SIBAS|SIBPS","log2_FC:SIBAS|SIBPS",
"FC:SIBAS3|SIBPS3","log2_FC:SIBAS3|SIBPS3",
"UCount_MUTAS","UCount_MUTAS3","UCount_MUTPS",
"Ucount_MUTPS3","Ucount_SIBAS","UCount_SIBAS3",
"UCount_SIBPS","UCount_SIBPS3"]
File.open(text_file).each do |line|
next unless line =~ /^NM/
line.chomp!
fields = line.split("\t")
new_fields = fields[0..8]
skip = false
(1..4).each do |i|
mut = fields[i].to_f
sib = fields[i+4].to_f
mut = 0.1 if mut == 0.0
sib = 0.1 if sib == 0.0
fold_change = sib/mut
if fold_change == 1.0
skip = true
break
end
new_fields << fold_change
new_fields << Math.log2(fold_change)
end
# FC:MUTAS|MUTPS
(1..2).each do |i|
mut = fields[i].to_f
sib = fields[i+2].to_f
mut = 0.01 if mut == 0.0
sib = 0.01 if sib == 0.0
fold_change = sib/mut
if fold_change == 1.0
skip = true
break
end
new_fields << fold_change
new_fields << Math.log2(fold_change)
end
(5..6).each do |i|
mut = fields[i].to_f
sib = fields[i+2].to_f
mut = 0.01 if mut == 0.0
sib = 0.01 if sib == 0.0
fold_change = sib/mut
if fold_change == 1.0
skip = true
break
end
new_fields << fold_change
new_fields << Math.log2(fold_change)
end
next if skip
identifier = fields[0].split("[")[0]
new_fields.insert(1,all_genes[identifier.delete("(refseq)")])
# Add UCount
new_fields << fields[9..16]
new_fields.flatten!
csv << new_fields #.join("\t")
end
end