Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Alessandro Melchiorre
last_fm_crawler
Commits
38993970
Commit
38993970
authored
Apr 09, 2020
by
Alessandro Melchiorre
Browse files
- new api key
- outside settings
parent
e7a91b70
Changes
3
Hide whitespace changes
Inline
Side-by-side
src/conf.py
View file @
38993970
...
...
@@ -6,3 +6,9 @@ API_SECRET_2 = '59d4c1c67c3fad02f00456dad5df8f22'
API_KEY_3
=
'd6a28f74f0f8a8dc7d01c594a60c716c'
API_SECRET_3
=
'5e051193161130c797cd97eefe3170d7'
API_KEY_4
=
'5f89b409e9b3efad06bd212ca56d5aad'
API_SECRET_4
=
'1998115447a6483c4edb8d8ad75d8670'
API_KEYS
=
[
API_KEY_1
,
API_KEY_2
,
API_KEY_3
,
API_KEY_4
]
API_SECRETS
=
[
API_SECRET_1
,
API_SECRET_2
,
API_SECRET_3
,
API_SECRET_4
]
src/crawling_settings.py
0 → 100644
View file @
38993970
this_crawler
=
1
api_key_idx
=
0
src/main.py
View file @
38993970
...
...
@@ -3,7 +3,8 @@ import os
import
pandas
as
pd
from
LastFMCrawler
import
LastFMCrawler
from
conf
import
API_KEY_2
,
API_SECRET_2
from
conf
import
API_KEYS
,
API_SECRETS
from
crawling_settings
import
api_key_idx
,
this_crawler
lfm1b_users
=
pd
.
read_csv
(
"./data/LFM-1b_users.txt"
,
delimiter
=
"
\t
"
,
header
=
None
,
usecols
=
[
0
,
1
,
3
],
names
=
[
"uid"
,
"username"
,
"country"
])
...
...
@@ -47,13 +48,15 @@ if not work_on_failures:
print
(
"Number of users failed is: {}"
.
format
(
len
(
failed_users
)))
lfm1b
=
lfm1b
[
~
lfm1b
.
username
.
isin
(
failed_users
)]
# number_of_crawlers and this crawlers are used to find only subset of users for the current execution (modulo)sky
# number_of_crawlers and this crawler are used to find only subset of users for the current execution (modulo)
# this_crawler is included in crawling_settings.py
number_of_crawlers
=
3
this_crawler
=
2
lfm1b
=
lfm1b
[
lfm1b
.
uid
%
number_of_crawlers
==
this_crawler
]
print
(
"Number of users to crawl is: {}"
.
format
(
len
(
lfm1b
)))
crawler
=
LastFMCrawler
(
api_key
=
API_KEY_2
,
api_secret
=
API_SECRET_2
,
to_ts
=
to_ts
)
# api_key_idx is included in crawling_settings
crawler
=
LastFMCrawler
(
api_key
=
API_KEYS
[
api_key_idx
],
api_secret
=
API_SECRETS
[
api_key_idx
],
to_ts
=
to_ts
)
if
work_on_failures
:
crawler
.
crawl
(
lfm1b
[[
"username"
,
"max_ts"
]].
to_dict
(
"records"
),
folder_name
,
error_file
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment