Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
tap1012
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
3
Merge Requests
3
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Mario Chirinos Colunga
tap1012
Commits
f700ccfb
Commit
f700ccfb
authored
Mar 23, 2019
by
geobumac
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
examen
parent
34e70cbc
Changes
3
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
235 additions
and
6 deletions
+235
-6
08-Test2-checkpoint.ipynb
.ipynb_checkpoints/08-Test2-checkpoint.ipynb
+77
-2
08-Test2.ipynb
08-Test2.ipynb
+158
-4
examen2_wordcloud.png
examen2_wordcloud.png
+0
-0
No files found.
.ipynb_checkpoints/08-Test2-checkpoint.ipynb
View file @
f700ccfb
...
...
@@ -59,7 +59,7 @@
},
{
"cell_type": "code",
"execution_count": 1
05
,
"execution_count": 1
33
,
"metadata": {},
"outputs": [
{
...
...
@@ -180,7 +180,7 @@
"Documento3 0.0 "
]
},
"execution_count": 1
05
,
"execution_count": 1
33
,
"metadata": {},
"output_type": "execute_result"
}
...
...
@@ -193,6 +193,11 @@
"\n",
"class MatrizDT:\n",
" def __init__(self, listaDocs):\n",
" \"\"\"Constructor de la clase\n",
"\n",
" Args:\n",
" listaDocs [(string)]: lista de documentos (Textos a interpretar)\n",
" \"\"\"\n",
" self.listaDocs = listaDocs\n",
" self.cleanDocs = self.getArrayCleanDocs()\n",
" self.totalDocs = len(listaDocs)\n",
...
...
@@ -200,11 +205,26 @@
" self.listNameDoc = self.getListNameDoc()\n",
" \n",
" def removerpunt(self, s):\n",
" \"\"\"Funcion que elimina caracteres no alpaheticos\n",
"\n",
" Args:\n",
" s (string): palabra a la cual se eliminaran los caracteres\n",
"\n",
" Examples:\n",
" >>> self.removerpunt(\"este es un texto con texto que es repetido y repetido, \")\n",
" este es un texto con texto que es repetido y repetido\n",
" \"\"\"\n",
" s = re.sub(r\"[,|\\.|:|?|=|]\", \"\", s)\n",
" s = re.sub(r\"[^\\w\\s]\", '', s)\n",
" return s\n",
" \n",
" def getListNameDoc(self):\n",
" \"\"\"Funcion que devuelve un listado de nombres de documentos tomando de referencia la cantidad de documentos\n",
" \n",
" Examples:\n",
" >>> self.getListNameDoc()\n",
" [\"Documento1\", \"Documento2\", ..., \"DocumentoN\"]\n",
" \"\"\"\n",
" aux = []\n",
" iCont = 1\n",
" for i in self.listaDocs:\n",
...
...
@@ -213,6 +233,8 @@
" return aux\n",
"\n",
" def getArrayCleanDocs(self):\n",
" \"\"\"Funcion que devueleve el arreglo de documentos al cual se le han eliminado los caracteres no deseados\n",
" \"\"\"\n",
" aux = []\n",
" for i in self.listaDocs:\n",
" texto = self.removerpunt(i)\n",
...
...
@@ -221,6 +243,12 @@
" return np.array(aux)\n",
" \n",
" def getTerminos(self):\n",
" \"\"\"Funcion que devueleve el liestado completo de terminos existentes en los documentos\n",
" \n",
" Examples:\n",
" >>> self.getTerminos()\n",
" [\"este\", \"es\", \"un\", \"ejemplo\"]\n",
" \"\"\"\n",
" listaDocs = self.listaDocs\n",
" texto = ' '.join(listaDocs)\n",
" texto = texto.lower()\n",
...
...
@@ -228,6 +256,14 @@
" return np.array(list(set(arrayTexto)))\n",
" \n",
" def tf(self):\n",
" \"\"\"Funcion que devueleve una matriz documento-termino en la cual cada celda es la frecuencia del termino\n",
" \n",
" Examples:\n",
" >>> self.getTerminos()\n",
" array([[0., 0., 1., 0., 1.69314718, 1., 0., 0., 0., 1., 0., 1., 0., 1., 0., 0.],\n",
" [1., 0., 0., 1., 1., 1., 0., 0., 0., 0., 1., 0., 0., 1., 0., 1.],\n",
" [0., 1., 0., 0., 1., 1., 1., 0., 1., 0., 0., 0., 0., 0., 1., 0.]])\n",
" \"\"\"\n",
" aux = []\n",
" iCont = 1\n",
" for iDoc in self.cleanDocs:\n",
...
...
@@ -236,6 +272,14 @@
" return np.array(aux)\n",
" \n",
" def idf(self):\n",
" \"\"\"Funcion que devueleve el liestado completo de terminos existentes en los documentos\n",
" \n",
" Examples:\n",
" >>> self.getTerminos()\n",
" array([[0., 0., 1., 0., 1.69314718, 1., 0., 0., 0., 1., 0., 1., 0., 1., 0., 0.],\n",
" [1., 0., 0., 1., 1., 1., 0., 0., 0., 0., 1., 0., 0., 1., 0., 1.],\n",
" [0., 1., 0., 0., 1., 1., 1., 0., 1., 0., 0., 0., 0., 0., 1., 0.]])\n",
" \"\"\"\n",
" aux = []\n",
" data = self.tf()\n",
" #print(self.tf())\n",
...
...
@@ -280,6 +324,37 @@
"mdt.getDF(tf)"
]
},
{
"cell_type": "code",
"execution_count": 134,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[0. , 0. , 1. , 0. , 1.69314718,\n",
" 1. , 0. , 0. , 0. , 1. ,\n",
" 0. , 1. , 0. , 1. , 0. ,\n",
" 0. ],\n",
" [1. , 0. , 0. , 1. , 1. ,\n",
" 1. , 0. , 0. , 0. , 0. ,\n",
" 1. , 0. , 0. , 1. , 0. ,\n",
" 1. ],\n",
" [0. , 1. , 0. , 0. , 1. ,\n",
" 1. , 1. , 0. , 1. , 0. ,\n",
" 0. , 0. , 0. , 0. , 1. ,\n",
" 0. ]])"
]
},
"execution_count": 134,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tf"
]
},
{
"cell_type": "code",
"execution_count": 106,
...
...
08-Test2.ipynb
View file @
f700ccfb
This diff is collapsed.
Click to expand it.
examen2_wordcloud.png
View replaced file @
34e70cbc
View file @
f700ccfb
50.7 KB
|
W:
|
H:
53.2 KB
|
W:
|
H:
2-up
Swipe
Onion skin
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment