Commit f700ccfb authored by geobumac's avatar geobumac

examen

parent 34e70cbc
......@@ -59,7 +59,7 @@
},
{
"cell_type": "code",
"execution_count": 105,
"execution_count": 133,
"metadata": {},
"outputs": [
{
......@@ -180,7 +180,7 @@
"Documento3 0.0 "
]
},
"execution_count": 105,
"execution_count": 133,
"metadata": {},
"output_type": "execute_result"
}
......@@ -193,6 +193,11 @@
"\n",
"class MatrizDT:\n",
" def __init__(self, listaDocs):\n",
" \"\"\"Constructor de la clase\n",
"\n",
" Args:\n",
" listaDocs [(string)]: lista de documentos (Textos a interpretar)\n",
" \"\"\"\n",
" self.listaDocs = listaDocs\n",
" self.cleanDocs = self.getArrayCleanDocs()\n",
" self.totalDocs = len(listaDocs)\n",
......@@ -200,11 +205,26 @@
" self.listNameDoc = self.getListNameDoc()\n",
" \n",
" def removerpunt(self, s):\n",
" \"\"\"Funcion que elimina caracteres no alpaheticos\n",
"\n",
" Args:\n",
" s (string): palabra a la cual se eliminaran los caracteres\n",
"\n",
" Examples:\n",
" >>> self.removerpunt(\"este es un texto con texto que es repetido y repetido, \")\n",
" este es un texto con texto que es repetido y repetido\n",
" \"\"\"\n",
" s = re.sub(r\"[,|\\.|:|?|=|​]\", \"\", s)\n",
" s = re.sub(r\"[^\\w\\s]\", '', s)\n",
" return s\n",
" \n",
" def getListNameDoc(self):\n",
" \"\"\"Funcion que devuelve un listado de nombres de documentos tomando de referencia la cantidad de documentos\n",
" \n",
" Examples:\n",
" >>> self.getListNameDoc()\n",
" [\"Documento1\", \"Documento2\", ..., \"DocumentoN\"]\n",
" \"\"\"\n",
" aux = []\n",
" iCont = 1\n",
" for i in self.listaDocs:\n",
......@@ -213,6 +233,8 @@
" return aux\n",
"\n",
" def getArrayCleanDocs(self):\n",
" \"\"\"Funcion que devueleve el arreglo de documentos al cual se le han eliminado los caracteres no deseados\n",
" \"\"\"\n",
" aux = []\n",
" for i in self.listaDocs:\n",
" texto = self.removerpunt(i)\n",
......@@ -221,6 +243,12 @@
" return np.array(aux)\n",
" \n",
" def getTerminos(self):\n",
" \"\"\"Funcion que devueleve el liestado completo de terminos existentes en los documentos\n",
" \n",
" Examples:\n",
" >>> self.getTerminos()\n",
" [\"este\", \"es\", \"un\", \"ejemplo\"]\n",
" \"\"\"\n",
" listaDocs = self.listaDocs\n",
" texto = ' '.join(listaDocs)\n",
" texto = texto.lower()\n",
......@@ -228,6 +256,14 @@
" return np.array(list(set(arrayTexto)))\n",
" \n",
" def tf(self):\n",
" \"\"\"Funcion que devueleve una matriz documento-termino en la cual cada celda es la frecuencia del termino\n",
" \n",
" Examples:\n",
" >>> self.getTerminos()\n",
" array([[0., 0., 1., 0., 1.69314718, 1., 0., 0., 0., 1., 0., 1., 0., 1., 0., 0.],\n",
" [1., 0., 0., 1., 1., 1., 0., 0., 0., 0., 1., 0., 0., 1., 0., 1.],\n",
" [0., 1., 0., 0., 1., 1., 1., 0., 1., 0., 0., 0., 0., 0., 1., 0.]])\n",
" \"\"\"\n",
" aux = []\n",
" iCont = 1\n",
" for iDoc in self.cleanDocs:\n",
......@@ -236,6 +272,14 @@
" return np.array(aux)\n",
" \n",
" def idf(self):\n",
" \"\"\"Funcion que devueleve el liestado completo de terminos existentes en los documentos\n",
" \n",
" Examples:\n",
" >>> self.getTerminos()\n",
" array([[0., 0., 1., 0., 1.69314718, 1., 0., 0., 0., 1., 0., 1., 0., 1., 0., 0.],\n",
" [1., 0., 0., 1., 1., 1., 0., 0., 0., 0., 1., 0., 0., 1., 0., 1.],\n",
" [0., 1., 0., 0., 1., 1., 1., 0., 1., 0., 0., 0., 0., 0., 1., 0.]])\n",
" \"\"\"\n",
" aux = []\n",
" data = self.tf()\n",
" #print(self.tf())\n",
......@@ -280,6 +324,37 @@
"mdt.getDF(tf)"
]
},
{
"cell_type": "code",
"execution_count": 134,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[0. , 0. , 1. , 0. , 1.69314718,\n",
" 1. , 0. , 0. , 0. , 1. ,\n",
" 0. , 1. , 0. , 1. , 0. ,\n",
" 0. ],\n",
" [1. , 0. , 0. , 1. , 1. ,\n",
" 1. , 0. , 0. , 0. , 0. ,\n",
" 1. , 0. , 0. , 1. , 0. ,\n",
" 1. ],\n",
" [0. , 1. , 0. , 0. , 1. ,\n",
" 1. , 1. , 0. , 1. , 0. ,\n",
" 0. , 0. , 0. , 0. , 1. ,\n",
" 0. ]])"
]
},
"execution_count": 134,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tf"
]
},
{
"cell_type": "code",
"execution_count": 106,
......
This source diff could not be displayed because it is too large. You can view the blob instead.
examen2_wordcloud.png

50.7 KB | W: | H:

examen2_wordcloud.png

53.2 KB | W: | H:

examen2_wordcloud.png
examen2_wordcloud.png
examen2_wordcloud.png
examen2_wordcloud.png
  • 2-up
  • Swipe
  • Onion skin
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment