Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Alessandro Melchiorre
last_fm_crawler
Commits
8a092360
Commit
8a092360
authored
Jun 18, 2020
by
Alessandro Melchiorre
Browse files
# index split pushed at the beginning
parent
6d2df439
Changes
1
Hide whitespace changes
Inline
Side-by-side
src/main.py
View file @
8a092360
...
...
@@ -15,10 +15,33 @@ lfm1b_us_pc = pd.read_csv('./data/LFM-1b_users_playcounts_filtered.txt', delimit
# columns are: uid, username, country, max_ts, pc
lfm1b
=
pd
.
merge
(
lfm1b_users
,
lfm1b_ts
).
merge
(
lfm1b_us_pc
)
lfm1b
=
lfm1b
.
sort_values
(
by
=
'pc'
)
print
(
lfm1b
.
head
())
print
(
"Number of users in LFM-1b is: {}"
.
format
(
len
(
lfm1b
)))
# number_of_crawlers and this crawler are used to find only subset of users for the current execution (modulo)
# this_crawler is included in crawling_settings.py
# Don't change this!
number_of_crawlers
=
3
### troete
this_crawler
=
0
api_key_idx
=
1
### kara
# this_crawler = 1
# api_key_idx = 2
### dragonforce
# this_crawler = 2
# api_key_idx = 3
lfm1b
=
lfm1b
.
reset_index
(
drop
=
True
)
# Generates new index
lfm1b
=
lfm1b
[
lfm1b
.
index
%
number_of_crawlers
==
this_crawler
]
print
(
lfm1b
.
head
())
print
(
"Number of users to crawl is: {}"
.
format
(
len
(
lfm1b
)))
# timestamp for 20/03/2020 12:00:00 GMT
to_ts
=
1584705600
...
...
@@ -55,26 +78,6 @@ if not work_on_failures:
print
(
"Number of users failed is: {}"
.
format
(
len
(
failed_users
)))
lfm1b
=
lfm1b
[
~
lfm1b
.
username
.
isin
(
failed_users
)]
# number_of_crawlers and this crawler are used to find only subset of users for the current execution (modulo)
# this_crawler is included in crawling_settings.py
# Don't change this!
number_of_crawlers
=
3
### troete
this_crawler
=
0
api_key_idx
=
1
### kara
# this_crawler = 1
# api_key_idx = 2
### dragonforce
# this_crawler = 2
# api_key_idx = 3
lfm1b
=
lfm1b
.
reset_index
(
drop
=
True
)
# Generates new index
lfm1b
=
lfm1b
[
lfm1b
.
index
%
number_of_crawlers
==
this_crawler
]
print
(
"Number of users to crawl is: {}"
.
format
(
len
(
lfm1b
)))
# api_key_idx is included in crawling_settings
crawler
=
LastFMCrawler
(
api_key
=
API_KEYS
[
api_key_idx
],
api_secret
=
API_SECRETS
[
api_key_idx
],
to_ts
=
to_ts
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment