From b4c6a87c9ed8cc6223091678dd15ca3d42b858b9 Mon Sep 17 00:00:00 2001 From: "Bruno P. Kinoshita" Date: Sun, 16 Apr 2017 18:27:43 +1200 Subject: [PATCH 1/3] Add gitignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..38877ea --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +*~ +*.pyc +__pycache__/ From 60bd28cab0f3b8ba67231e5994300997863b7ce1 Mon Sep 17 00:00:00 2001 From: "Bruno P. Kinoshita" Date: Sun, 16 Apr 2017 18:27:57 +1200 Subject: [PATCH 2/3] Remove temporary files --- jv3/study/wolfe/note_create.py~ | 117 -------------------------------- jv3/study/wolfe/tfidf.py~ | 60 ---------------- 2 files changed, 177 deletions(-) delete mode 100644 jv3/study/wolfe/note_create.py~ delete mode 100644 jv3/study/wolfe/tfidf.py~ diff --git a/jv3/study/wolfe/note_create.py~ b/jv3/study/wolfe/note_create.py~ deleted file mode 100644 index d62f9cf..0000000 --- a/jv3/study/wolfe/note_create.py~ +++ /dev/null @@ -1,117 +0,0 @@ -import sys -from django.contrib.auth.models import User -from jv3.models import * -from jv3.utils import * - -em = User.objects.filter(email="emax@csail.mit.edu")[0] -emn = em.note_owner.all() -dk = User.objects.filter(email='karger@mit.edu')[0] -dkn = dk.note_owner.all() -ws = User.objects.filter(email='wstyke@gmail.com')[0] -wsn = ws.note_owner.all() -kf = User.objects.filter(email='justacopy@gmail.com')[0] -kfn = kf.note_owner.all() -brenn = User.objects.filter(email="brennanmoore@gmail.com")[0] -gv = User.objects.filter(email="gvargas@mit.edu")[0] - -def getSaveAge(notes): - delays = [] - i = 0 - for nn in notes: - i += 1 - print 'Note #', i - logs = ActivityLog.objects.filter(owner=nn.owner, noteid=nn.jid, action='note-save') - for log in logs: - delays.append(log.when - nn.created) - pass - pass - delays.sort() - print "Median # Days:", delays[len(delays)/2] / (1000*60*60*24) - print 'Ave. # Days:', sum(delays)/(1000*60*60*24*len(delays)) - return delays - -def get_save_age_by_users(users): - delays = [] - i = 0 - for usr in users: - i += 1 - print 'User #', i - logs = ActivityLog.objects.filter(action='note-save', owner=usr) - print '# note saves: ', logs.count() - if logs.count() == 0: - continue - for log in logs: - note = Note.objects.filter(owner=usr, jid=log.noteid) - if len(note) > 0: - delays.append(log.when - note[0].created) - pass - pass - print "Median # Days:", delays[len(delays)/2] / (1000*60*60*24) - print 'Ave. # Days:', sum(delays)/(1000*60*60*24*len(delays)) - return delays - -def get_save_age_by_users2(users): - delays = [] - i = 0 - for usr in users: - i += 1 - print 'User #', i - for note in Note.objects.filter(owner=usr): - for log in ActivityLog.objects.filter(owner=note.owner, action='save-note', noteid=note.jid): - delays.append(log.when - note.created) - pass - pass - pass - print "Median # Days:", delays[len(delays)/2] / (1000*60*60*24) - print 'Ave. # Days:', sum(delays)/(1000*60*60*24*len(delays)) - return delays - -def getEditDelays(users): - totalNotes = 0 - editDelays = [] - for user in users: - notes = Note.objects.filter(owner=user) - logs = ActivityLog.objects.filter(owner=user, action='note-save') - nc, lc = notes.count(), logs.count() - for note in notes: - notelogs = logs.filter(noteid=note.jid) - saveTimes = [l.when for l in notelogs] - if len(saveTimes) != 0: - editDelays.append(min(saveTimes) - note.created) - totalNotes += 1 - return {'numNotes': totalNotes, - 'editDelays': editDelays} - -# 39 of - -""" -def getNoteCreateTimes(info, ownerID, notes): - for note in notes: - info["%s-%s"%(ownerID, note.jid)] = {'created': note.created} - return info - -def addSaveTimeArr(info, ownerID): - for jid, infoObj in info.items(): - infoObj['note-save'] = [] - for log in ActivityLog.objects.filter(action='note-save'): - if log.noteid in info: - info[log.noteid]['note-save'].append(log.when) - return info - -def addMinSaveTime(info): - for log in ActivityLog.objects.filter(action='note-save'): - try: - jid = log.noteid - if jid in info: - if 'note-save-min' not in info[jid]: - info[jid]['note-save-min'] = log.when - info[jid]['note-save-min'] = min( - log.when, - info[jid]['note-save-min']) - pass - pass - except: - continue - pass - return info -""" diff --git a/jv3/study/wolfe/tfidf.py~ b/jv3/study/wolfe/tfidf.py~ deleted file mode 100644 index 5424ebb..0000000 --- a/jv3/study/wolfe/tfidf.py~ +++ /dev/null @@ -1,60 +0,0 @@ -import sys, math - - - -def wordCount(doc): - return len(doc.split(None)) - - -def freq(word, doc): - return doc.split(None).count(word) - - -def tf(word, doc): - wc = wordCount(doc) - if wc == 0: - return 0 - else: - return (freq(word, doc) / float(wc)) - - -def numDocsContaining(word, documentList): - count = 0 - for document in documentList: - if freq(word, document) > 0: - count += 1 - return count - - -def idf(word, docList): - return math.log(len(docList) / numDocsContaining(word, docList)) - -cache_idf = {} -def tfidf(word, document, documentList): - global cache_idf - if word not in cache_idf: - cache_idf[word] = idf(word, documentList) - return (tf(word, document) * cache_idf[word]) - - - - -def getUniqueWords(noteList): - uniqueWords = {} - for note in noteList: - for word in note.contents.split(None): - if word not in uniqueWords: - uniqueWords[word] = 0 - uniqueWords[word] += 1 - return uniqueWords - -def getAllTFIDF(uniqueWordDict, noteList): - contentList = [n.contents for n in noteList] - wordToNoteTFIDF = {} - for word in uniqueWordDict.keys(): - wordToNoteTFIDF[word] = [] - for note in noteList: - wordToNoteTFIDF[word].append((note.id, tfidf(word, note.contents, contentList))) - return wordToNoteTFIDF - - From 6be38d3adc352261257aeb44fa6e251c265f3467 Mon Sep 17 00:00:00 2001 From: "Bruno P. Kinoshita" Date: Sun, 16 Apr 2017 18:28:12 +1200 Subject: [PATCH 3/3] Fix typo (was wedmesday, is wednesday) --- jv3/study/#content_analysis.py# | 2 +- jv3/study/content_analysis.py | 2 +- jv3/study/wMaxEntFeatures.py | 2 +- jv3/study/wolfe/wMaxEntFeatures.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/jv3/study/#content_analysis.py# b/jv3/study/#content_analysis.py# index 5b14e05..fc435bf 100644 --- a/jv3/study/#content_analysis.py# +++ b/jv3/study/#content_analysis.py# @@ -128,7 +128,7 @@ note_owner = lambda note: {'note_owner': repr(note["owner"])} note_length = lambda x : {'note_length':len(x["contents"])} #note_words = lambda x : {'note_words':len(nltk.word_tokenize(eliminate_urls(x["contents"])))} -DOWS=["mon","monday","tue","tuesday","wed","wedmesday","thu","thurs","thursday","fri","friday","sat","saturday","sun","sunday"] +DOWS=["mon","monday","tue","tuesday","wed","wednesday","thu","thurs","thursday","fri","friday","sat","saturday","sun","sunday"] MONTHS=["jan","january", "feb","february", diff --git a/jv3/study/content_analysis.py b/jv3/study/content_analysis.py index a3a18be..a44293d 100644 --- a/jv3/study/content_analysis.py +++ b/jv3/study/content_analysis.py @@ -138,7 +138,7 @@ def time_of_activity(u): note_length = lambda x : {'note_length':len(x["contents"].strip())} #note_words = lambda x : {'note_words':len(nltk.word_tokenize(eliminate_urls(x["contents"])))} -DOWS=["mon","monday","tue","tuesday","wed","wedmesday","thu","thurs","thursday","fri","friday","sat","saturday","sun","sunday"] +DOWS=["mon","monday","tue","tuesday","wed","wednesday","thu","thurs","thursday","fri","friday","sat","saturday","sun","sunday"] MONTHS=["jan","january", "feb","february", diff --git a/jv3/study/wMaxEntFeatures.py b/jv3/study/wMaxEntFeatures.py index a0f8279..1b9696a 100644 --- a/jv3/study/wMaxEntFeatures.py +++ b/jv3/study/wMaxEntFeatures.py @@ -78,7 +78,7 @@ def makeLambda(f,i): contains_features = [contains_verbs,contains_adj]#, contains_url] ## # Testing below features -DOWS=["mon","monday","tue","tuesday","wed","wedmesday","thu","thurs","thursday","fri","friday","sat","saturday","su\ +DOWS=["mon","monday","tue","tuesday","wed","wednesday","thu","thurs","thursday","fri","friday","sat","saturday","su\ n","sunday"] contains_dow = lambda notevals, words: ("1+_day_of_week", sum([word.lower() in DOWS for word in words]) > 0) #ca.daysofweek(notevals)['daysofweek'] > 0) contains_VBZ = lambda notevals, words: ("1+_VBZ", count_pos(words, ['VBZ']) > 0) diff --git a/jv3/study/wolfe/wMaxEntFeatures.py b/jv3/study/wolfe/wMaxEntFeatures.py index a0f8279..1b9696a 100644 --- a/jv3/study/wolfe/wMaxEntFeatures.py +++ b/jv3/study/wolfe/wMaxEntFeatures.py @@ -78,7 +78,7 @@ def makeLambda(f,i): contains_features = [contains_verbs,contains_adj]#, contains_url] ## # Testing below features -DOWS=["mon","monday","tue","tuesday","wed","wedmesday","thu","thurs","thursday","fri","friday","sat","saturday","su\ +DOWS=["mon","monday","tue","tuesday","wed","wednesday","thu","thurs","thursday","fri","friday","sat","saturday","su\ n","sunday"] contains_dow = lambda notevals, words: ("1+_day_of_week", sum([word.lower() in DOWS for word in words]) > 0) #ca.daysofweek(notevals)['daysofweek'] > 0) contains_VBZ = lambda notevals, words: ("1+_VBZ", count_pos(words, ['VBZ']) > 0)