From b4c6a87c9ed8cc6223091678dd15ca3d42b858b9 Mon Sep 17 00:00:00 2001
From: "Bruno P. Kinoshita" <brunodepaulak@yahoo.com.br>
Date: Sun, 16 Apr 2017 18:27:43 +1200
Subject: [PATCH 1/3] Add gitignore

---
 .gitignore | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 .gitignore

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..38877ea
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+*~
+*.pyc
+__pycache__/

From 60bd28cab0f3b8ba67231e5994300997863b7ce1 Mon Sep 17 00:00:00 2001
From: "Bruno P. Kinoshita" <brunodepaulak@yahoo.com.br>
Date: Sun, 16 Apr 2017 18:27:57 +1200
Subject: [PATCH 2/3] Remove temporary files

---
 jv3/study/wolfe/note_create.py~ | 117 --------------------------------
 jv3/study/wolfe/tfidf.py~       |  60 ----------------
 2 files changed, 177 deletions(-)
 delete mode 100644 jv3/study/wolfe/note_create.py~
 delete mode 100644 jv3/study/wolfe/tfidf.py~

diff --git a/jv3/study/wolfe/note_create.py~ b/jv3/study/wolfe/note_create.py~
deleted file mode 100644
index d62f9cf..0000000
--- a/jv3/study/wolfe/note_create.py~
+++ /dev/null
@@ -1,117 +0,0 @@
-import sys
-from django.contrib.auth.models import User
-from jv3.models import *
-from jv3.utils import *
-
-em = User.objects.filter(email="emax@csail.mit.edu")[0]
-emn = em.note_owner.all()
-dk = User.objects.filter(email='karger@mit.edu')[0]
-dkn = dk.note_owner.all()
-ws = User.objects.filter(email='wstyke@gmail.com')[0]
-wsn = ws.note_owner.all()
-kf = User.objects.filter(email='justacopy@gmail.com')[0]
-kfn = kf.note_owner.all()
-brenn = User.objects.filter(email="brennanmoore@gmail.com")[0]
-gv = User.objects.filter(email="gvargas@mit.edu")[0]
-
-def getSaveAge(notes):
-    delays = []
-    i = 0
-    for nn in notes:
-        i += 1
-        print 'Note #', i
-        logs = ActivityLog.objects.filter(owner=nn.owner, noteid=nn.jid, action='note-save')
-        for log in logs:
-            delays.append(log.when - nn.created)
-            pass
-        pass
-    delays.sort()
-    print "Median # Days:", delays[len(delays)/2] / (1000*60*60*24)
-    print 'Ave. # Days:', sum(delays)/(1000*60*60*24*len(delays))
-    return delays
-
-def get_save_age_by_users(users):
-    delays = []
-    i = 0
-    for usr in users:
-        i += 1
-        print 'User #', i
-        logs = ActivityLog.objects.filter(action='note-save', owner=usr)
-        print '# note saves: ', logs.count()
-        if logs.count() == 0:
-            continue
-        for log in logs:
-            note = Note.objects.filter(owner=usr, jid=log.noteid)
-            if len(note) > 0:
-                delays.append(log.when - note[0].created)
-            pass
-        pass
-    print "Median # Days:", delays[len(delays)/2] / (1000*60*60*24)
-    print 'Ave. # Days:', sum(delays)/(1000*60*60*24*len(delays))
-    return delays
-
-def get_save_age_by_users2(users):
-    delays = []
-    i = 0
-    for usr in users:
-        i += 1
-        print 'User #', i
-        for note in Note.objects.filter(owner=usr):
-            for log in ActivityLog.objects.filter(owner=note.owner, action='save-note', noteid=note.jid):
-                delays.append(log.when - note.created)
-                pass
-            pass
-        pass
-    print "Median # Days:", delays[len(delays)/2] / (1000*60*60*24)
-    print 'Ave. # Days:', sum(delays)/(1000*60*60*24*len(delays))
-    return delays
-
-def getEditDelays(users):
-    totalNotes = 0
-    editDelays = []
-    for user in users:
-        notes = Note.objects.filter(owner=user)
-        logs = ActivityLog.objects.filter(owner=user, action='note-save')
-        nc, lc = notes.count(), logs.count()
-        for note in notes:
-            notelogs = logs.filter(noteid=note.jid)
-            saveTimes = [l.when for l in notelogs]
-            if len(saveTimes) != 0:
-                editDelays.append(min(saveTimes) - note.created)
-            totalNotes += 1
-    return {'numNotes': totalNotes,
-            'editDelays': editDelays}
-
-# 39 of 
-
-"""
-def getNoteCreateTimes(info, ownerID, notes):
-    for note in notes:
-        info["%s-%s"%(ownerID, note.jid)] = {'created': note.created}
-    return info
-
-def addSaveTimeArr(info, ownerID):
-    for jid, infoObj in info.items():
-        infoObj['note-save'] = []
-    for log in ActivityLog.objects.filter(action='note-save'):
-        if log.noteid in info:
-            info[log.noteid]['note-save'].append(log.when)
-    return info
-
-def addMinSaveTime(info):
-    for log in ActivityLog.objects.filter(action='note-save'):
-        try:
-            jid = log.noteid
-            if jid in info:
-                if 'note-save-min' not in info[jid]:
-                    info[jid]['note-save-min'] = log.when
-                info[jid]['note-save-min'] = min(
-                    log.when,
-                    info[jid]['note-save-min'])
-                pass
-            pass
-        except:
-            continue
-        pass
-    return info
-"""
diff --git a/jv3/study/wolfe/tfidf.py~ b/jv3/study/wolfe/tfidf.py~
deleted file mode 100644
index 5424ebb..0000000
--- a/jv3/study/wolfe/tfidf.py~
+++ /dev/null
@@ -1,60 +0,0 @@
-import sys, math
-
-
-
-def wordCount(doc):
-    return len(doc.split(None))
-
-
-def freq(word, doc):
-    return doc.split(None).count(word)
-
-
-def tf(word, doc):
-    wc = wordCount(doc)
-    if wc == 0:
-        return 0
-    else:
-        return (freq(word, doc) / float(wc))
-
-
-def numDocsContaining(word, documentList):
-    count = 0
-    for document in documentList:
-       	if freq(word, document) > 0:
-            count += 1
-    return count
-
-
-def idf(word, docList):
-    return math.log(len(docList) / numDocsContaining(word, docList))
-
-cache_idf = {}
-def tfidf(word, document, documentList):
-    global cache_idf
-    if word not in cache_idf:
-        cache_idf[word] = idf(word, documentList)
-    return (tf(word, document) * cache_idf[word])
-
-
-
-
-def getUniqueWords(noteList):
-    uniqueWords = {}
-    for note in noteList:
-        for word in note.contents.split(None):
-            if word not in uniqueWords:
-                uniqueWords[word] = 0
-            uniqueWords[word] += 1
-    return uniqueWords
-
-def getAllTFIDF(uniqueWordDict, noteList):
-    contentList = [n.contents for n in noteList]
-    wordToNoteTFIDF = {}
-    for word in uniqueWordDict.keys():
-        wordToNoteTFIDF[word] = []
-        for note in noteList:
-            wordToNoteTFIDF[word].append((note.id, tfidf(word, note.contents, contentList)))
-    return wordToNoteTFIDF
-
-

From 6be38d3adc352261257aeb44fa6e251c265f3467 Mon Sep 17 00:00:00 2001
From: "Bruno P. Kinoshita" <brunodepaulak@yahoo.com.br>
Date: Sun, 16 Apr 2017 18:28:12 +1200
Subject: [PATCH 3/3] Fix typo (was wedmesday, is wednesday)

---
 jv3/study/#content_analysis.py#    | 2 +-
 jv3/study/content_analysis.py      | 2 +-
 jv3/study/wMaxEntFeatures.py       | 2 +-
 jv3/study/wolfe/wMaxEntFeatures.py | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/jv3/study/#content_analysis.py# b/jv3/study/#content_analysis.py#
index 5b14e05..fc435bf 100644
--- a/jv3/study/#content_analysis.py#
+++ b/jv3/study/#content_analysis.py#
@@ -128,7 +128,7 @@ note_owner = lambda note: {'note_owner': repr(note["owner"])}
 note_length = lambda x : {'note_length':len(x["contents"])}
 #note_words = lambda x : {'note_words':len(nltk.word_tokenize(eliminate_urls(x["contents"])))}
 
-DOWS=["mon","monday","tue","tuesday","wed","wedmesday","thu","thurs","thursday","fri","friday","sat","saturday","sun","sunday"]
+DOWS=["mon","monday","tue","tuesday","wed","wednesday","thu","thurs","thursday","fri","friday","sat","saturday","sun","sunday"]
 
 MONTHS=["jan","january",
         "feb","february",
diff --git a/jv3/study/content_analysis.py b/jv3/study/content_analysis.py
index a3a18be..a44293d 100644
--- a/jv3/study/content_analysis.py
+++ b/jv3/study/content_analysis.py
@@ -138,7 +138,7 @@ def time_of_activity(u):
 note_length = lambda x : {'note_length':len(x["contents"].strip())}
 #note_words = lambda x : {'note_words':len(nltk.word_tokenize(eliminate_urls(x["contents"])))}
 
-DOWS=["mon","monday","tue","tuesday","wed","wedmesday","thu","thurs","thursday","fri","friday","sat","saturday","sun","sunday"]
+DOWS=["mon","monday","tue","tuesday","wed","wednesday","thu","thurs","thursday","fri","friday","sat","saturday","sun","sunday"]
 
 MONTHS=["jan","january",
         "feb","february",
diff --git a/jv3/study/wMaxEntFeatures.py b/jv3/study/wMaxEntFeatures.py
index a0f8279..1b9696a 100644
--- a/jv3/study/wMaxEntFeatures.py
+++ b/jv3/study/wMaxEntFeatures.py
@@ -78,7 +78,7 @@ def makeLambda(f,i):
 contains_features = [contains_verbs,contains_adj]#, contains_url] ## 
 
 # Testing below features
-DOWS=["mon","monday","tue","tuesday","wed","wedmesday","thu","thurs","thursday","fri","friday","sat","saturday","su\
+DOWS=["mon","monday","tue","tuesday","wed","wednesday","thu","thurs","thursday","fri","friday","sat","saturday","su\
 n","sunday"]
 contains_dow = lambda notevals, words: ("1+_day_of_week", sum([word.lower() in DOWS for word in words]) > 0) #ca.daysofweek(notevals)['daysofweek'] > 0)
 contains_VBZ = lambda notevals, words: ("1+_VBZ", count_pos(words, ['VBZ']) > 0)
diff --git a/jv3/study/wolfe/wMaxEntFeatures.py b/jv3/study/wolfe/wMaxEntFeatures.py
index a0f8279..1b9696a 100644
--- a/jv3/study/wolfe/wMaxEntFeatures.py
+++ b/jv3/study/wolfe/wMaxEntFeatures.py
@@ -78,7 +78,7 @@ def makeLambda(f,i):
 contains_features = [contains_verbs,contains_adj]#, contains_url] ## 
 
 # Testing below features
-DOWS=["mon","monday","tue","tuesday","wed","wedmesday","thu","thurs","thursday","fri","friday","sat","saturday","su\
+DOWS=["mon","monday","tue","tuesday","wed","wednesday","thu","thurs","thursday","fri","friday","sat","saturday","su\
 n","sunday"]
 contains_dow = lambda notevals, words: ("1+_day_of_week", sum([word.lower() in DOWS for word in words]) > 0) #ca.daysofweek(notevals)['daysofweek'] > 0)
 contains_VBZ = lambda notevals, words: ("1+_VBZ", count_pos(words, ['VBZ']) > 0)