Commit 8a092360 authored by Alessandro Melchiorre's avatar Alessandro Melchiorre
Browse files

# index split pushed at the beginning

parent 6d2df439
......@@ -15,10 +15,33 @@ lfm1b_us_pc = pd.read_csv('./data/LFM-1b_users_playcounts_filtered.txt', delimit
# columns are: uid, username, country, max_ts, pc
lfm1b = pd.merge(lfm1b_users, lfm1b_ts).merge(lfm1b_us_pc)
lfm1b = lfm1b.sort_values(by='pc')
print(lfm1b.head())
print("Number of users in LFM-1b is: {}".format(len(lfm1b)))
# number_of_crawlers and this crawler are used to find only subset of users for the current execution (modulo)
# this_crawler is included in crawling_settings.py
# Don't change this!
number_of_crawlers = 3
### troete
this_crawler = 0
api_key_idx = 1
### kara
# this_crawler = 1
# api_key_idx = 2
### dragonforce
# this_crawler = 2
# api_key_idx = 3
lfm1b = lfm1b.reset_index(drop=True) # Generates new index
lfm1b = lfm1b[lfm1b.index % number_of_crawlers == this_crawler]
print(lfm1b.head())
print("Number of users to crawl is: {}".format(len(lfm1b)))
# timestamp for 20/03/2020 12:00:00 GMT
to_ts = 1584705600
......@@ -55,26 +78,6 @@ if not work_on_failures:
print("Number of users failed is: {}".format(len(failed_users)))
lfm1b = lfm1b[~lfm1b.username.isin(failed_users)]
# number_of_crawlers and this crawler are used to find only subset of users for the current execution (modulo)
# this_crawler is included in crawling_settings.py
# Don't change this!
number_of_crawlers = 3
### troete
this_crawler = 0
api_key_idx = 1
### kara
# this_crawler = 1
# api_key_idx = 2
### dragonforce
# this_crawler = 2
# api_key_idx = 3
lfm1b = lfm1b.reset_index(drop=True) # Generates new index
lfm1b = lfm1b[lfm1b.index % number_of_crawlers == this_crawler]
print("Number of users to crawl is: {}".format(len(lfm1b)))
# api_key_idx is included in crawling_settings
crawler = LastFMCrawler(api_key=API_KEYS[api_key_idx], api_secret=API_SECRETS[api_key_idx], to_ts=to_ts)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment