11# -*- coding: utf-8 -*-
22from __future__ import division
3- import datelib
4- import re
3+
54import operator
65
7- class ChatFeatures ():
6+ import re
7+ from . import datelib
8+
89
10+ class ChatFeatures :
911 def __init__ (self ):
10- self .root_response_time = []
12+ self .root_response_time = []
1113 self .contact_response_time = []
12- self .root_burst = []
13- self .contact_burst = []
14- self .initiations = {}
15- self .weekday = {}
16- self .shifts = {}
17- self .patterns = {}
18- self .proportions = {}
19- self .most_used_words = {}
20-
21- def compute_response_time_and_burst (self , list_of_messages , root_name , senders , initiation_thrs = (60 * 60 * 8 ), burst_thrs = 3 , response_thrs = (60 * 60 * 3 )):
14+ self .root_burst = []
15+ self .contact_burst = []
16+ self .initiations = {}
17+ self .weekday = {}
18+ self .shifts = {}
19+ self .patterns = {}
20+ self .proportions = {}
21+ self .most_used_words = {}
22+
23+ def compute_response_time_and_burst (self , list_of_messages , root_name , senders , initiation_thrs = (60 * 60 * 8 ),
24+ burst_thrs = 3 , response_thrs = (60 * 60 * 3 )):
2225 # perform the operations that are dependant on multiple messages
2326 # (response time, bursts)
2427 self .initiations = {}
@@ -27,30 +30,30 @@ def compute_response_time_and_burst(self, list_of_messages, root_name, senders,
2730 t0 = list_of_messages [0 ].datetime_obj
2831 burst_count = 1
2932 for index , message in enumerate (list_of_messages ):
30- #skip the first message since we are looking at differences; note this means we don't count first msg as init
33+ # skip the first message since we are looking at differences; note this means we don't count first msg as init
3134 if index == 0 :
3235 continue
3336 t1 = message .datetime_obj
3437 dt = t1 - t0
3538 dt .total_seconds ()
3639
3740 # print "sender %s delta %s" % ( message.sender, dt.total_seconds() )
38- if ( dt .total_seconds () > initiation_thrs ) :
41+ if dt .total_seconds () > initiation_thrs :
3942 self .initiations [message .sender ] += 1
4043
4144 # is sender the same as the last message?
42- if message .sender != list_of_messages [index - 1 ].sender :
45+ if message .sender != list_of_messages [index - 1 ].sender :
4346 # sender changed, store the burst count and reset
44- #print "sender changed: %s" % ( message.sender )
45- #print "burst count: %s" % ( burst_count )
47+ # print "sender changed: %s" % ( message.sender )
48+ # print "burst count: %s" % ( burst_count )
4649
47- #print("response time: %d\n" %(dt.total_seconds()) )
50+ # print("response time: %d\n" %(dt.total_seconds()) )
4851 # is sender the root?
4952 if message .sender == root_name :
5053 # store the burst count for the last sender, which is the
5154 # opposite of current
5255 if burst_count > burst_thrs :
53- #print "BURST CONTACT ENDED: %s IN A ROW" % ( burst_count )
56+ # print "BURST CONTACT ENDED: %s IN A ROW" % ( burst_count )
5457 self .contact_burst .append (burst_count )
5558 if dt .total_seconds () < response_thrs :
5659 self .root_response_time .append (dt .total_seconds ())
@@ -59,24 +62,24 @@ def compute_response_time_and_burst(self, list_of_messages, root_name, senders,
5962 # store the burst count for the last sender, which is the
6063 # opposite of current
6164 if burst_count > burst_thrs :
62- #print "BURST ROOT ENDED: %s IN A ROW" % ( burst_count )
65+ # print "BURST ROOT ENDED: %s IN A ROW" % ( burst_count )
6366 self .root_burst .append (burst_count )
6467 if dt .total_seconds () < response_thrs :
6568 self .contact_response_time .append (dt .total_seconds ())
66-
69+
6770 # End of the first burst, restart the counter
6871 burst_count = 1
6972
7073 else :
7174 # accumulate the number of messages sent in a row
7275 burst_count += 1
7376 t0 = t1
74- if burst_count > burst_thrs : # catch a burst if at end of chat
75- #print "final burst: %s" % ( burst_count )
76- if message .sender == root_name :
77+ if burst_count > burst_thrs : # catch a burst if at end of chat
78+ # print "final burst: %s" % ( burst_count )
79+ if message .sender == root_name :
7780 self .root_burst .append (burst_count )
7881 else :
79- self .contact_burst .append (burst_count )
82+ self .contact_burst .append (burst_count )
8083
8184 def compute_messages_per_weekday (self , list_of_messages ):
8285 self .weekday = {
@@ -105,16 +108,16 @@ def compute_messages_per_shift(self, list_of_messages):
105108 }
106109 for msg in list_of_messages :
107110 hour = int (msg .time .split (":" )[0 ])
108- if hour >= 0 and hour <= 6 :
111+ if 0 <= hour <= 6 :
109112 self .shifts ["latenight" ] += 1
110113
111- elif hour > 6 and hour <= 11 :
114+ elif 6 < hour <= 11 :
112115 self .shifts ["morning" ] += 1
113116
114- elif hour > 11 and hour <= 17 :
117+ elif 11 < hour <= 17 :
115118 self .shifts ["afternoon" ] += 1
116119
117- elif hour > 17 and hour <= 23 :
120+ elif 17 < hour <= 23 :
118121 self .shifts ["evening" ] += 1
119122 return self .shifts
120123
@@ -134,7 +137,7 @@ def compute_messages_pattern(self, list_of_messages, senders, pattern_list):
134137 if length > 0 :
135138 if pattern not in self .patterns :
136139 self .patterns [pattern ][msg .sender ] = length
137- print "This should never happen"
140+ print ( "This should never happen" )
138141 else :
139142 self .patterns [pattern ][msg .sender ] += length
140143 return self .patterns
@@ -149,10 +152,10 @@ def compute_message_proportions(self, list_of_messages, senders, root, contact):
149152 self .proportions [i ][s ] = 0
150153 for msg in list_of_messages :
151154 self .proportions ["messages" ][msg .sender ] += 1
152- self .proportions ["words" ][msg .sender ] += len (msg .content .split (" " ))
153- self .proportions ["chars" ][msg .sender ] += len (msg .content .strip ())
154- self .proportions ["qmarks" ][msg .sender ] += msg .content .count ('?' )
155- self .proportions ["exclams" ][msg .sender ] += msg .content .count ('!' )
155+ self .proportions ["words" ][msg .sender ] += len (msg .content .split (" " ))
156+ self .proportions ["chars" ][msg .sender ] += len (msg .content .strip ())
157+ self .proportions ["qmarks" ][msg .sender ] += msg .content .count ('?' )
158+ self .proportions ["exclams" ][msg .sender ] += msg .content .count ('!' )
156159 self .proportions ["media" ][msg .sender ] += (
157160 msg .content .count ('<media omitted>' ) +
158161 msg .content .count ('<image omitted>' ) +
@@ -170,24 +173,24 @@ def compute_message_proportions(self, list_of_messages, senders, root, contact):
170173 self .proportions ["avg_words" ] = {}
171174 for s in senders :
172175 self .proportions ["avg_words" ][s ] = self .proportions ["words" ][s ] / self .proportions ["messages" ][s ]
173- self .proportions ["avg_words" ]["ratio" ] = self .proportions ["avg_words" ][root ] / self .proportions ["avg_words" ][contact ]
176+ self .proportions ["avg_words" ]["ratio" ] = self .proportions ["avg_words" ][root ] / self .proportions ["avg_words" ][
177+ contact ]
174178
175179 for c in categories :
176180 self .proportions [c ]["total" ] = 0
177181 for s in senders :
178182 self .proportions [c ]["total" ] += self .proportions [c ][s ]
179-
183+
180184 for c in categories :
181-
182- #if a value is 0, replace with a 1 to avoid zero erros in ratio calcs.
185+
186+ # if a value is 0, replace with a 1 to avoid zero erros in ratio calcs.
183187 if self .proportions [c ][contact ] == 0 :
184188 self .proportions [c ][contact ] = 1
185189 if self .proportions [c ][root ] == 0 :
186- self .proportions [c ][root ] = 1
190+ self .proportions [c ][root ] = 1
187191
188192 self .proportions [c ]["ratio" ] = self .proportions [c ][root ] / self .proportions [c ][contact ]
189193
190-
191194 return self .proportions
192195
193196 def compute_most_used_words (self , list_of_messages , top = 10 , threshold = 3 ):
@@ -204,37 +207,37 @@ def compute_most_used_words(self, list_of_messages, top=10, threshold=3):
204207 words_counter [w ] = 1
205208 else :
206209 words_counter [w ] += 1
207- sorted_words = sorted (words_counter .iteritems (), key = operator .itemgetter (1 ), reverse = True )
210+ sorted_words = sorted (words_counter .items (), key = operator .itemgetter (1 ), reverse = True )
208211 self .most_used_words = sorted_words [:top ]
209212 return self .most_used_words
210213
211214 def compute_avg_root_response_time (self ):
212- if ( len (self .root_response_time ) != 0 ) :
213- return sum (self .root_response_time )/ len (self .root_response_time )
215+ if len (self .root_response_time ) != 0 :
216+ return sum (self .root_response_time ) / len (self .root_response_time )
214217 return 0
215218
216219 def compute_avg_contact_response_time (self ):
217- if ( len (self .contact_response_time ) != 0 ) :
218- return sum (self .contact_response_time )/ len (self .contact_response_time )
220+ if len (self .contact_response_time ) != 0 :
221+ return sum (self .contact_response_time ) / len (self .contact_response_time )
219222 return 0
220223
221224 def compute_response_time_ratio (self , root , contact ):
222225 avg_root = self .compute_avg_root_response_time ()
223226 avg_contact = self .compute_avg_contact_response_time ()
224- if ( avg_contact != 0 ) :
227+ if avg_contact != 0 :
225228 return avg_root / avg_contact
226229 return 0
227230
228231 def compute_bursts_ratio (self , root , contact ):
229232 if (len (self .contact_burst )) == 0 :
230233 return len (self .root_burst ) / 1
231- if ( len (self .root_burst ) == 0 ) :
232- return ( 1 / len (self .contact_burst ) )
233- return len (self .root_burst )/ len (self .contact_burst )
234+ if len (self .root_burst ) == 0 :
235+ return 1 / len (self .contact_burst )
236+ return len (self .root_burst ) / len (self .contact_burst )
234237
235238 def compute_nbr_root_burst (self ):
236239 return len (self .root_burst )
237-
240+
238241 def compute_nbr_contact_burst (self ):
239242 return len (self .contact_burst )
240243
@@ -244,48 +247,41 @@ def compute_nbr_contact_burst(self):
244247 # return 0
245248
246249 def compute_avg_contact_burst (self ):
247- if ( len (self .contact_burst ) != 0 ) :
248- return sum (self .contact_burst )/ len (self .contact_burst )
250+ if len (self .contact_burst ) != 0 :
251+ return sum (self .contact_burst ) / len (self .contact_burst )
249252 return 0
250253
251254 def compute_root_initation_ratio (self , root , contact ):
252- if ( self .initiations [contact ] == 0 ) :
253- return self .initiations [root ]/ 1
254- if ( self .initiations [root ] == 0 ) :
255- return 1 / self .initiations [contact ]
255+ if self .initiations [contact ] == 0 :
256+ return self .initiations [root ] / 1
257+ if self .initiations [root ] == 0 :
258+ return 1 / self .initiations [contact ]
256259 return self .initiations [root ] / self .initiations [contact ]
257-
260+
258261 def generate_outcome (self , root , contact , methodology ):
259- outcome = 99 ;
262+ outcome = 99
260263 if methodology == 0 :
261- if ( self .compute_root_initation_ratio (root , contact ) > 0.867 ) :
262- outcome = 0 # "just not that into you"
263- #print "DOESNT INITIATE"
264- elif ( self .proportions ["qmarks" ]["ratio" ] > 0.87 ): # flipped the non-intutitive direction of inequality
265- outcome = 0 # "just not that into you"
266- #print "QUESTIONS FAIL"
264+ if self .compute_root_initation_ratio (root , contact ) > 0.867 :
265+ outcome = 0 # "just not that into you"
266+ # print "DOESNT INITIATE"
267+ elif self .proportions ["qmarks" ]["ratio" ] > 0.87 : # flipped the non-intuitive direction of inequality
268+ outcome = 0 # "just not that into you"
269+ # print "QUESTIONS FAIL"
267270 else :
268- outcome = 1 # "definitely into you"
269- #print "ELSE"
271+ outcome = 1 # "definitely into you"
272+ # print "ELSE"
270273 elif methodology == 1 :
271- if ( self .compute_root_initation_ratio (root , contact ) > 0.83 ) :
272- outcome = 0 # "just not that into you"
273- #print "DOESNT INITIATE"
274- elif ( self .features .compute_avg_root_response_time () < 0.92 ): # flipped non-intuitive direction of inequality
275- outcome = 0 # "just not that into you"
276- #print "QUESTIONS FAIL"
274+ if self .compute_root_initation_ratio (root , contact ) > 0.83 :
275+ outcome = 0 # "just not that into you"
276+ # print "DOESNT INITIATE"
277+ elif self .features .compute_avg_root_response_time () < 0.92 : # flipped non-intuitive direction of inequality
278+ outcome = 0 # "just not that into you"
279+ # print "QUESTIONS FAIL"
277280 else :
278- outcome = 1 # "definitely into you"
279- #print "ELSE"
281+ outcome = 1 # "definitely into you"
282+ # print "ELSE"
280283
281284 else :
282- outcome = 99 ;
283-
284- return outcome
285-
286- # qMarksPerRoot = qmarksRoot/messagesRoot
287- # qMarksPerContact = qmarksContact/messagesContact
288-
289-
290-
291-
285+ outcome = 99
286+
287+ return outcome
0 commit comments