nmoya
diff --git a/‎wp_parser/ChatFeatures.py‎
Lines changed: 82 additions & 86 deletions b/‎wp_parser/ChatFeatures.py‎
Lines changed: 82 additions & 86 deletions
diff --git a/‎wp_parser/datelib.py‎
Lines changed: 9 additions & 6 deletions b/‎wp_parser/datelib.py‎
Lines changed: 9 additions & 6 deletions
diff --git a/‎wp_parser/parsers/facebook.py‎
Lines changed: 5 additions & 4 deletions b/‎wp_parser/parsers/facebook.py‎
Lines changed: 5 additions & 4 deletions
@@ -1,24 +1,27 @@
 # -*- coding: utf-8 -*-
 from __future__ import division
-import datelib
-import re
+
 import operator
 
-class ChatFeatures():
+import re
+from . import datelib
+
 
+class ChatFeatures:
     def __init__(self):
-        self.root_response_time    = []
+        self.root_response_time = []
         self.contact_response_time = []
-        self.root_burst            = []
-        self.contact_burst         = []
-        self.initiations           = {}
-        self.weekday               = {}
-        self.shifts                = {}
-        self.patterns              = {}
-        self.proportions           = {}
-        self.most_used_words       = {}
-
-    def compute_response_time_and_burst(self, list_of_messages, root_name, senders, initiation_thrs=(60*60*8), burst_thrs=3, response_thrs=(60*60*3)):
+        self.root_burst = []
+        self.contact_burst = []
+        self.initiations = {}
+        self.weekday = {}
+        self.shifts = {}
+        self.patterns = {}
+        self.proportions = {}
+        self.most_used_words = {}
+
+    def compute_response_time_and_burst(self, list_of_messages, root_name, senders, initiation_thrs=(60 * 60 * 8),
+                                        burst_thrs=3, response_thrs=(60 * 60 * 3)):
         # perform the operations that are dependant on multiple messages
         # (response time, bursts)
         self.initiations = {}
@@ -27,30 +30,30 @@ def compute_response_time_and_burst(self, list_of_messages, root_name, senders,
         t0 = list_of_messages[0].datetime_obj
         burst_count = 1
         for index, message in enumerate(list_of_messages):
-            #skip the first message since we are looking at differences; note this means we don't count first msg as init
+            # skip the first message since we are looking at differences; note this means we don't count first msg as init
             if index == 0:
                 continue
             t1 = message.datetime_obj
             dt = t1 - t0
             dt.total_seconds()
 
             # print "sender %s delta %s" % ( message.sender, dt.total_seconds() )
-            if (dt.total_seconds() > initiation_thrs):
+            if dt.total_seconds() > initiation_thrs:
                 self.initiations[message.sender] += 1
 
             # is sender the same as the last message?
-            if message.sender != list_of_messages[index-1].sender:
+            if message.sender != list_of_messages[index - 1].sender:
                 # sender changed, store the burst count and reset
-                #print "sender changed: %s" % ( message.sender )
-                #print "burst count: %s" % ( burst_count )
+                # print "sender changed: %s" % ( message.sender )
+                # print "burst count: %s" % ( burst_count )
 
-                #print("response time: %d\n" %(dt.total_seconds()) )
+                # print("response time: %d\n" %(dt.total_seconds()) )
                 # is sender the root?
                 if message.sender == root_name:
                     # store the burst count for the last sender, which is the
                     # opposite of current
                     if burst_count > burst_thrs:
-                        #print "BURST CONTACT ENDED: %s IN A ROW" % ( burst_count )
+                        # print "BURST CONTACT ENDED: %s IN A ROW" % ( burst_count )
                         self.contact_burst.append(burst_count)
                     if dt.total_seconds() < response_thrs:
                         self.root_response_time.append(dt.total_seconds())
@@ -59,24 +62,24 @@ def compute_response_time_and_burst(self, list_of_messages, root_name, senders,
                     # store the burst count for the last sender, which is the
                     # opposite of current
                     if burst_count > burst_thrs:
-                        #print "BURST ROOT ENDED: %s IN A ROW" % ( burst_count )
+                        # print "BURST ROOT ENDED: %s IN A ROW" % ( burst_count )
                         self.root_burst.append(burst_count)
                     if dt.total_seconds() < response_thrs:
                         self.contact_response_time.append(dt.total_seconds())
-                
+
                 # End of the first burst, restart the counter
                 burst_count = 1
 
             else:
                 # accumulate the number of messages sent in a row
                 burst_count += 1
             t0 = t1
-        if burst_count > burst_thrs: #catch a burst if at end of chat
-            #print "final burst: %s" % ( burst_count )
-            if  message.sender == root_name:
+        if burst_count > burst_thrs:  # catch a burst if at end of chat
+            # print "final burst: %s" % ( burst_count )
+            if message.sender == root_name:
                 self.root_burst.append(burst_count)
             else:
-                self.contact_burst.append(burst_count)                
+                self.contact_burst.append(burst_count)
 
     def compute_messages_per_weekday(self, list_of_messages):
         self.weekday = {
@@ -105,16 +108,16 @@ def compute_messages_per_shift(self, list_of_messages):
         }
         for msg in list_of_messages:
             hour = int(msg.time.split(":")[0])
-            if hour >= 0 and hour <= 6:
+            if 0 <= hour <= 6:
                 self.shifts["latenight"] += 1
 
-            elif hour > 6 and hour <= 11:
+            elif 6 < hour <= 11:
                 self.shifts["morning"] += 1
 
-            elif hour > 11 and hour <= 17:
+            elif 11 < hour <= 17:
                 self.shifts["afternoon"] += 1
 
-            elif hour > 17 and hour <= 23:
+            elif 17 < hour <= 23:
                 self.shifts["evening"] += 1
         return self.shifts
 
@@ -134,7 +137,7 @@ def compute_messages_pattern(self, list_of_messages, senders, pattern_list):
                 if length > 0:
                     if pattern not in self.patterns:
                         self.patterns[pattern][msg.sender] = length
-                        print "This should never happen"
+                        print("This should never happen")
                     else:
                         self.patterns[pattern][msg.sender] += length
         return self.patterns
@@ -149,10 +152,10 @@ def compute_message_proportions(self, list_of_messages, senders, root, contact):
                 self.proportions[i][s] = 0
         for msg in list_of_messages:
             self.proportions["messages"][msg.sender] += 1
-            self.proportions["words"][msg.sender]    += len(msg.content.split(" "))
-            self.proportions["chars"][msg.sender]    += len(msg.content.strip())
-            self.proportions["qmarks"][msg.sender]   += msg.content.count('?')
-            self.proportions["exclams"][msg.sender]  += msg.content.count('!')
+            self.proportions["words"][msg.sender] += len(msg.content.split(" "))
+            self.proportions["chars"][msg.sender] += len(msg.content.strip())
+            self.proportions["qmarks"][msg.sender] += msg.content.count('?')
+            self.proportions["exclams"][msg.sender] += msg.content.count('!')
             self.proportions["media"][msg.sender] += (
                 msg.content.count('<media omitted>') +
                 msg.content.count('<image omitted>') +
@@ -170,24 +173,24 @@ def compute_message_proportions(self, list_of_messages, senders, root, contact):
         self.proportions["avg_words"] = {}
         for s in senders:
             self.proportions["avg_words"][s] = self.proportions["words"][s] / self.proportions["messages"][s]
-        self.proportions["avg_words"]["ratio"] = self.proportions["avg_words"][root] / self.proportions["avg_words"][contact]
+        self.proportions["avg_words"]["ratio"] = self.proportions["avg_words"][root] / self.proportions["avg_words"][
+            contact]
 
         for c in categories:
             self.proportions[c]["total"] = 0
             for s in senders:
                 self.proportions[c]["total"] += self.proportions[c][s]
-        
+
         for c in categories:
-         
-            #if a value is 0, replace with a 1 to avoid zero erros in ratio calcs.
+
+            # if a value is 0, replace with a 1 to avoid zero erros in ratio calcs.
             if self.proportions[c][contact] == 0:
                 self.proportions[c][contact] = 1
             if self.proportions[c][root] == 0:
-                self.proportions[c][root] = 1                
+                self.proportions[c][root] = 1
 
             self.proportions[c]["ratio"] = self.proportions[c][root] / self.proportions[c][contact]
 
-
         return self.proportions
 
     def compute_most_used_words(self, list_of_messages, top=10, threshold=3):
@@ -204,37 +207,37 @@ def compute_most_used_words(self, list_of_messages, top=10, threshold=3):
                         words_counter[w] = 1
                     else:
                         words_counter[w] += 1
-        sorted_words = sorted(words_counter.iteritems(), key=operator.itemgetter(1), reverse=True)
+        sorted_words = sorted(words_counter.items(), key=operator.itemgetter(1), reverse=True)
         self.most_used_words = sorted_words[:top]
         return self.most_used_words
 
     def compute_avg_root_response_time(self):
-        if (len(self.root_response_time) != 0):
-            return sum(self.root_response_time)/len(self.root_response_time)
+        if len(self.root_response_time) != 0:
+            return sum(self.root_response_time) / len(self.root_response_time)
         return 0
 
     def compute_avg_contact_response_time(self):
-        if (len(self.contact_response_time) != 0):
-            return sum(self.contact_response_time)/len(self.contact_response_time)
+        if len(self.contact_response_time) != 0:
+            return sum(self.contact_response_time) / len(self.contact_response_time)
         return 0
 
     def compute_response_time_ratio(self, root, contact):
         avg_root = self.compute_avg_root_response_time()
         avg_contact = self.compute_avg_contact_response_time()
-        if (avg_contact != 0):
+        if avg_contact != 0:
             return avg_root / avg_contact
         return 0
 
     def compute_bursts_ratio(self, root, contact):
         if (len(self.contact_burst)) == 0:
             return len(self.root_burst) / 1
-        if (len(self.root_burst) == 0):
-            return ( 1/len(self.contact_burst))
-        return len(self.root_burst)/len(self.contact_burst)
+        if len(self.root_burst) == 0:
+            return 1 / len(self.contact_burst)
+        return len(self.root_burst) / len(self.contact_burst)
 
     def compute_nbr_root_burst(self):
         return len(self.root_burst)
-    
+
     def compute_nbr_contact_burst(self):
         return len(self.contact_burst)
 
@@ -244,48 +247,41 @@ def compute_nbr_contact_burst(self):
     #     return 0
 
     def compute_avg_contact_burst(self):
-        if (len(self.contact_burst) != 0):
-            return sum(self.contact_burst)/len(self.contact_burst)
+        if len(self.contact_burst) != 0:
+            return sum(self.contact_burst) / len(self.contact_burst)
         return 0
 
     def compute_root_initation_ratio(self, root, contact):
-        if (self.initiations[contact] == 0):
-            return self.initiations[root]/1
-        if (self.initiations[root] == 0):
-            return 1/self.initiations[contact] 
+        if self.initiations[contact] == 0:
+            return self.initiations[root] / 1
+        if self.initiations[root] == 0:
+            return 1 / self.initiations[contact]
         return self.initiations[root] / self.initiations[contact]
-        
+
     def generate_outcome(self, root, contact, methodology):
-        outcome = 99;
+        outcome = 99
         if methodology == 0:
-            if (self.compute_root_initation_ratio(root, contact) > 0.867):
-                outcome = 0 #"just not that into you"
-                #print "DOESNT INITIATE"
-            elif (self.proportions["qmarks"]["ratio"] > 0.87): #flipped the non-intutitive direction of inequality
-                outcome = 0 #"just not that into you"
-                #print "QUESTIONS FAIL"
+            if self.compute_root_initation_ratio(root, contact) > 0.867:
+                outcome = 0  # "just not that into you"
+                # print "DOESNT INITIATE"
+            elif self.proportions["qmarks"]["ratio"] > 0.87:  # flipped the non-intuitive direction of inequality
+                outcome = 0  # "just not that into you"
+                # print "QUESTIONS FAIL"
             else:
-                outcome = 1 #"definitely into you"
-                #print "ELSE" 
+                outcome = 1  # "definitely into you"
+                # print "ELSE"
         elif methodology == 1:
-            if (self.compute_root_initation_ratio(root, contact) > 0.83):
-                outcome = 0 #"just not that into you"
-                #print "DOESNT INITIATE"
-            elif (self.features.compute_avg_root_response_time() < 0.92): #flipped non-intuitive direction of inequality
-                outcome = 0 #"just not that into you"
-                #print "QUESTIONS FAIL"
+            if self.compute_root_initation_ratio(root, contact) > 0.83:
+                outcome = 0  # "just not that into you"
+                # print "DOESNT INITIATE"
+            elif self.features.compute_avg_root_response_time() < 0.92:  # flipped non-intuitive direction of inequality
+                outcome = 0  # "just not that into you"
+                # print "QUESTIONS FAIL"
             else:
-                outcome = 1 #"definitely into you"
-                #print "ELSE"
+                outcome = 1  # "definitely into you"
+                # print "ELSE"
 
         else:
-            outcome = 99;
-
-        return outcome         
-                        
-#        qMarksPerRoot = qmarksRoot/messagesRoot
- #       qMarksPerContact = qmarksContact/messagesContact
-        
-        
-        
-        
+            outcome = 99
+
+        return outcome
@@ -1,7 +1,7 @@
+import time
 from datetime import date
 from datetime import datetime
 from datetime import timedelta
-import time
 
 
 # get current ymd
@@ -37,11 +37,13 @@ def valid_date(date_str):
 
     return valid
 
+
 def date_diff(dateobj1, dateobj2):
     import math
     delta = dateobj2 - dateobj1
     return int(math.fabs(delta.days))
 
+
 def datecmp(date1, date2):
     year, month, day = date_split(date1)
     year_t, month_t, day_t = date_split(date2)
@@ -53,8 +55,8 @@ def datecmp(date1, date2):
         else:
             return 1
     except ValueError:
-        #misc.error("Fix me! Invalid date", "datecmp")
-        print "Fix me! Invalid date"
+        # misc.error("Fix me! Invalid date", "datecmp")
+        print("Fix me! Invalid date")
         return False
 
 
@@ -65,7 +67,7 @@ def date_operation(date_str, num):
     return end_date
 
 
-def date_to_str(date_str):
+def date_to_str():
     return date.strftime('%Y-%m-%d')
 
 
@@ -89,7 +91,7 @@ def date_interval(initial_date, length, step=1, separator="-"):
     output = []
     current = start_date
     while current < end_date:
-        output.append(date_to_str(current))
+        output.append(date_to_str())
         current += timedelta(days=step)
 
     return output
@@ -119,5 +121,6 @@ def weekday_portuguese_to_english(string):
     elif string == "sab" or string == "sabado":
         return "Saturday"
 
+
 if __name__ == "__main__":
-    print date_diff(datetime(2015, 6, 4), datetime(2015, 07, 7))
+    print(date_diff(datetime(2015, 6, 4), datetime(2015, 7, 7)))
@@ -1,12 +1,13 @@
 from datetime import datetime
-import message
 
-class ParserFacebook():
+from . import message
 
-    ''' A line is a dict object in this format:
+
+class ParserFacebook:
+    """ A line is a dict object in this format:
     {u'message': u'text text', u'from': u'Username One', u'id':
         u'3294659605566648_1432085429', u'datetime': u'2015-05-20T01:30:29+0000'}
-    '''
+    """
 
     def __init__(self, raw_messages):
         self.raw_messages = raw_messages