Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 13 additions & 12 deletions modules/TwintPool.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,15 @@ def __init__(self, fh_job=None, job_name='noname'):
self.config = twint.Config()
self.config.Limit = 100
self.config.Pandas = True
self.config.User_full = True
self.config.Hide_output = True
self.config.Verified = True
self.config.Verified = None
self.config.Username = None
#self.config.User_full = True
self.config.Proxy_host = "tor"
self.self.config.Proxy_port = "9050"
self.config.Proxy_type = "socks5"



def twint_loop(self, since, until, stride_sec=600, limit=None):
def get_unix_time(time_str):
return datetime.strptime(time_str, '%Y-%m-%d %H:%M:%S')
Expand Down Expand Up @@ -60,13 +59,15 @@ def _get_term(self, Search="IngSoc", Since="1984-04-20 13:00:00", Until="1984-04
yield (df, t0, t1)

def _get_timeline(self, username="lmeyerov"):
self.config.Username = username
self.config.Retweets = True
#self.config.Search = term
self.config.Search = "from:"+username
twint.run.Search(self.config)
tweets_df = twint.storage.panda.Tweets_df
return tweets_df





def _get_user_info(self, username):
self.config.Username = username
Expand Down Expand Up @@ -100,11 +101,11 @@ def row_to_tweet_type(row):
else:
raise('wat')

def row_to_quoted_status_id(row):
if row['quote_url'] and len(row['quote_url']) > 0:
return row['quote_url'].split('/')[-1]
else:
return None
#def row_to_quoted_status_id(row):
#if row['quote_url'] and len(row['quote_url']) > 0:
#return row['quote_url'].split('/')[-1]
#else:
#return None

def row_tweet_to_urls(row):
extractor = URLExtract()
Expand All @@ -124,8 +125,8 @@ def row_tweet_to_urls(row):

neo4j_df['created_at'] = (neo4j_df['created_at'] / 1000).apply(lambda n: datetime.fromtimestamp(n))

neo4j_df['quoted_status_id'] = df.apply(row_to_quoted_status_id, axis=1)
neo4j_df['is_quote_status'] = neo4j_df['quoted_status_id'] != None
#neo4j_df['quoted_status_id'] = df.apply(row_to_quoted_status_id, axis=1)
#neo4j_df['is_quote_status'] = neo4j_df['quoted_status_id'] != None
neo4j_df['in_reply_to_status_id'] = False
neo4j_df['urls'] = df.apply(row_tweet_to_urls, axis=1)

Expand Down