Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
T
TFG-ACM-EmoRec
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Iterations
Wiki
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
EmoRec
TFG-ACM-EmoRec
Merge requests
!1
Deleted twitter-scrapper.py
Code
Review changes
Check out branch
Open in Workspace
Download
Patches
Plain diff
Expand sidebar
Open
Deleted twitter-scrapper.py
doc
into
master
Overview
0
Commits
23
Pipelines
0
Changes
1
Open
Deleted twitter-scrapper.py
albcalv
requested to merge
doc
into
master
Apr 18, 2020
Overview
0
Commits
23
Pipelines
0
Changes
1
0
0
Merge request reports
Compare
master
version 22
7903639a
Jun 9, 2020
version 21
7b7b7ee4
May 28, 2020
version 20
5d468066
May 28, 2020
version 19
39779139
May 22, 2020
version 18
bdf6932f
May 22, 2020
version 17
17b3c4f0
May 22, 2020
version 16
29165b8d
May 8, 2020
version 15
78f913fb
May 8, 2020
version 14
c88c01f1
May 8, 2020
version 13
c375e16d
May 8, 2020
version 12
0a350c0f
Apr 22, 2020
version 11
5334f2c3
Apr 22, 2020
version 10
eedcd3f0
Apr 21, 2020
version 9
4490e7cc
Apr 21, 2020
version 8
9230c4fd
Apr 21, 2020
version 7
a9f02671
Apr 20, 2020
version 6
ad7020e3
Apr 20, 2020
version 5
1bda9046
Apr 20, 2020
version 4
e14e3458
Apr 18, 2020
version 3
9c8ea3d9
Apr 18, 2020
version 2
eb8d2cfc
Apr 18, 2020
version 1
40701674
Apr 18, 2020
master (base)
and
version 1
latest version
1936fca1
23 commits,
Jun 9, 2020
version 22
7903639a
22 commits,
Jun 9, 2020
version 21
7b7b7ee4
21 commits,
May 28, 2020
version 20
5d468066
20 commits,
May 28, 2020
version 19
39779139
19 commits,
May 22, 2020
version 18
bdf6932f
18 commits,
May 22, 2020
version 17
17b3c4f0
17 commits,
May 22, 2020
version 16
29165b8d
16 commits,
May 8, 2020
version 15
78f913fb
15 commits,
May 8, 2020
version 14
c88c01f1
14 commits,
May 8, 2020
version 13
c375e16d
13 commits,
May 8, 2020
version 12
0a350c0f
12 commits,
Apr 22, 2020
version 11
5334f2c3
11 commits,
Apr 22, 2020
version 10
eedcd3f0
10 commits,
Apr 21, 2020
version 9
4490e7cc
9 commits,
Apr 21, 2020
version 8
9230c4fd
8 commits,
Apr 21, 2020
version 7
a9f02671
7 commits,
Apr 20, 2020
version 6
ad7020e3
6 commits,
Apr 20, 2020
version 5
1bda9046
5 commits,
Apr 20, 2020
version 4
e14e3458
4 commits,
Apr 18, 2020
version 3
9c8ea3d9
3 commits,
Apr 18, 2020
version 2
eb8d2cfc
2 commits,
Apr 18, 2020
version 1
40701674
1 commit,
Apr 18, 2020
1 file
+
0
−
87
Inline
Compare changes
Side-by-side
Inline
Show whitespace changes
Show one file at a time
twitter-scrapper.py deleted
100644 → 0
+
0
−
87
View file @ 4c0529a0
import
json
import
csv
import
tweepy
import
re
import
os
import
pandas
as
pd
import
datetime
import
claves_twitter
as
ct
#Función que elimina emojis de las cadenas de texto
def
remove_emoji
(
string
):
patron_emoji
=
re
.
compile
(
"
[
"
u
"
\U0001F600
-
\U0001F64F
"
# emoticons
u
"
\U0001F300
-
\U0001F5FF
"
# symbols & pictographs
u
"
\U0001F680
-
\U0001F6FF
"
# transport & map symbols
u
"
\U0001F1E0
-
\U0001F1FF
"
# flags (iOS)
u
"
\U00002702
-
\U000027B0
"
u
"
\U000024C2
-
\U0001F251
"
"
]+
"
,
flags
=
re
.
UNICODE
)
return
patron_emoji
.
sub
(
r
''
,
string
)
def
clean_tweet
(
text
):
text
=
remove_emoji
(
text
)
text
=
re
.
sub
(
r
'
http\S+
'
,
''
,
text
)
#Eliminación de urls
#text=re.sub(r'@[A-Za-z0-9]+','',text) #Eliminación de menciones, ¿necesario?
return
text
def
busqueda_por_hashtag
(
consumer_key
,
consumer_secret
,
access_token
,
access_token_secret
,
hash
):
#Autenticación para acceder a la API de Twitter
auth
=
tweepy
.
OAuthHandler
(
consumer_key
,
consumer_secret
)
auth
.
set_access_token
(
access_token
,
access_token_secret
)
#Inicialización de la API de Twitter
api
=
tweepy
.
API
(
auth
)
#Creación del dataframe donde se almacenarán las búsquedas
df
=
pd
.
DataFrame
({
'
Fecha
'
:[],
'
Mensaje
'
:[]
})
ind
=
0
#Recorrido de tweets que coincidan con el hashtag
for
tweet
in
tweepy
.
Cursor
(
api
.
search
,
q
=
hash
+
'
-filter:retweets
'
,
\
lang
=
"
es
"
,
tweet_mode
=
'
extended
'
).
items
(
1000
):
#Tratamiento fecha
fecha
=
tweet
.
created_at
#dia=fecha.split()[0]
#hora=fecha.split()[1] Si quisiéramos separarlo en día y hora
#Tratamiento de texto
texto
=
tweet
.
full_text
.
replace
(
'
\n
'
,
'
'
)
texto
=
clean_tweet
(
texto
)
df
.
loc
[
ind
]
=
[
fecha
]
+
[
texto
]
ind
+=
1
#Almacenamiento de los datos recogidos
hoy
=
datetime
.
date
.
today
()
try
:
os
.
mkdir
(
'
datos
'
)
except
:
pass
nombre_archivo
=
'
datos/twitter-
'
+
str
(
hoy
)
+
'
.xlsx
'
try
:
with
pd
.
ExcelWriter
(
nombre_archivo
,
mode
=
'
a
'
)
as
writer
:
df
.
to_excel
(
writer
,
sheet_name
=
'
hoja
'
,
index
=
None
)
except
:
df
.
to_excel
(
nombre_archivo
,
index
=
None
,
header
=
True
,
sheet_name
=
'
hoja
'
)
def
coloca_hashtags
(
lista
):
final
=
lista
[
0
]
lista
.
pop
(
0
)
for
i
in
lista
:
final
=
final
+
'
OR
'
+
i
return
final
#Hashtag a buscar (si no pones el hastahg busca palabras) y con OR o AND se puede meter en la variable q
lista
=
[
'
alegria
'
,
'
feliz
'
,
'
felicidad
'
,
'
alegre
'
,
'
diversión
'
,
'
divertido
'
]
if
__name__
==
'
__main__
'
:
busqueda_por_hashtag
(
ct
.
consumer_key
,
ct
.
consumer_secret
,
ct
.
access_token
,
ct
.
access_token_secret
,
coloca_hashtags
(
lista
))
Loading