Tarea del periodico Interactivo

parent 50e07385
{
"cells": [
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'arte'"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"from io import StringIO\n",
"import matplotlib.pyplot as plt\n",
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
"from nltk.corpus import stopwords\n",
"from sklearn.feature_selection import chi2\n",
"import numpy as np\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.feature_extraction.text import CountVectorizer\n",
"from sklearn.feature_extraction.text import TfidfTransformer\n",
"from sklearn.naive_bayes import MultinomialNB\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn.svm import LinearSVC\n",
"from sklearn.model_selection import cross_val_score\n",
"from IPython.display import display\n",
"from sklearn import metrics\n",
"import seaborn as sns\n",
"from sklearn.metrics import confusion_matrix\n",
"\n",
"class ia:\n",
" '''\n",
" Clase que se encarga de crear la inteligencia artificial para el periodico interactivo\n",
" '''\n",
" \n",
" def __init__(self,text):\n",
" '''\n",
" Contructor que toma el archivo .csv y entrena a la inteligencia artificial con este\n",
" \n",
" Args: pd.read_csv()\n",
" '''\n",
" #self.df = pd.read_csv('../data/filtrados15/todojunto.csv',encoding=\"latin\")\n",
" df=text\n",
" col = ['tipo', 'text']\n",
" df = df[col]\n",
" df = df[pd.notnull(df['text'])]\n",
" df.columns = ['tipo', 'text']\n",
" df['category_id'] = df['tipo'].factorize()[0]\n",
" category_id_df = df[['tipo', 'category_id']].drop_duplicates().sort_values('category_id')\n",
" category_to_id = dict(category_id_df.values)\n",
" id_to_category = dict(category_id_df[['category_id', 'tipo']].values)\n",
" X_train, X_test, y_train, y_test = train_test_split(df['text'], df['tipo'], random_state = 0)\n",
" count_vect = CountVectorizer()\n",
" X_train_counts = count_vect.fit_transform(X_train)\n",
" tfidf_transformer = TfidfTransformer()\n",
" X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)\n",
" clf = MultinomialNB().fit(X_train_tfidf, y_train)\n",
" tfidf = TfidfVectorizer (sublinear_tf = True, min_df = 5, norm = 'l2', encoding = 'latin-1', ngram_range = (1, 2), stop_words = stopwords.words('spanish'))\n",
" features = tfidf.fit_transform(df.text).toarray() \n",
" labels = df.category_id\n",
" models = [\n",
" RandomForestClassifier(\n",
" n_estimators=200, \n",
" max_depth=3, \n",
" random_state=0\n",
" ),\n",
" LinearSVC(),\n",
" MultinomialNB(),\n",
" LogisticRegression(random_state=0),\n",
" ]\n",
" CV = 5\n",
" entries = []\n",
" cv_df = pd.DataFrame(\n",
" entries, \n",
" columns=['model_name', 'fold_idx', 'accuracy'])\n",
" \n",
" self.df=df\n",
" self.category_id_df=category_id_df\n",
" self.category_to_id=category_to_id\n",
" self.id_to_category=id_to_category\n",
" self.X_train=X_train\n",
" self.X_test=X_test\n",
" self.y_train=y_train\n",
" self.y_test=y_test\n",
" self.count_vect=count_vect\n",
" self.X_train_counts=X_train_counts\n",
" self.tfidf_transformer=tfidf_transformer\n",
" self.X_train_tfidf=X_train_tfidf\n",
" self.clf=clf\n",
" self.tfidf=tfidf\n",
" self.features=features\n",
" self.labels=labels\n",
" self.models=models\n",
" self.CV=CV\n",
" self.entries=entries\n",
" self.cv_df=cv_df\n",
" \n",
" def predict(self,texto):\n",
" '''\n",
" Predice el genero de la noticia en cinco diferentes (Turismo, Deportes, Arte, Seguridad y Política)\n",
" \n",
" Args: texto (string)\n",
" \n",
" Ejemplo:\n",
" >>>text=pd.read_csv('../data/filtrados15/todojunto.csv',encoding=\"latin\")\n",
" >>>c=ia(text)\n",
" >>>c.predict('Maravillas del septimo arte')\n",
" 'arte '\n",
" '''\n",
" count_vect=self.count_vect\n",
" clf=self.clf\n",
" p=clf.predict(count_vect.transform([texto]))\n",
" return p[0]\n",
"\n",
" def totaldata(self):\n",
" '''\n",
" Muestra el total de datos que se han cargado en una tabla de barras\n",
" \n",
" Args: NA\n",
" \n",
" Ejemplo:\n",
" >>>text=pd.read_csv('../data/filtrados15/todojunto.csv',encoding=\"latin\")\n",
" >>>c=ia(text)\n",
" >>>c.totaldata()\n",
" \n",
" *Tabla de tipo matplotlib de barras*\n",
" \n",
" '''\n",
" df=self.df\n",
" fig = plt.figure(figsize=(8,6))\n",
" df.groupby('tipo').text.count().plot.bar(ylim=0)\n",
" plt.show()\n",
"\n",
" def numtotaldata(self):\n",
" '''\n",
" Muestra el total de datos que se han cargado en un número\n",
" \n",
" Args: NA\n",
" \n",
" Ejemplo:\n",
" >>>text=pd.read_csv('../data/filtrados15/todojunto.csv',encoding=\"latin\")\n",
" >>>c=ia(text)\n",
" >>>c.numtotaldata()\n",
" 75\n",
" '''\n",
" #df=self.df\n",
" return self.features.shape[0]\n",
"\n",
" def ngramas(self):\n",
" '''\n",
" Muestra el total de ngramas que se puede tener despues de cargar el archivo que sirve como entrenador (hasta dos palabras)\n",
" \n",
" Args: NA\n",
" \n",
" Ejemplo:\n",
" >>>text=pd.read_csv('../data/filtrados15/todojunto.csv',encoding=\"latin\")\n",
" >>>c=ia(text)\n",
" >>>c.ngramas()\n",
" # 'arte':\n",
" . Most correlated unigrams:\n",
" . artes\n",
" . arte\n",
" . Most correlated bigrams:\n",
" . secretaría cultura\n",
" . bellas artes\n",
" # 'deporte':\n",
" . Most correlated unigrams:\n",
" . futbol\n",
" . deporte\n",
" . Most correlated bigrams:\n",
" . 30 años\n",
" . comisión nacional\n",
" # 'politica':\n",
" . Most correlated unigrams:\n",
" . político\n",
" . política\n",
" . Most correlated bigrams:\n",
" . cada vez\n",
" . banco méxico\n",
" # 'seguridad':\n",
" . Most correlated unigrams:\n",
" . seguridad\n",
" . víctimas\n",
" . Most correlated bigrams:\n",
" . subsecretario derechos\n",
" . derechos humanos\n",
" # 'turismo':\n",
" . Most correlated unigrams:\n",
" . roo\n",
" . turismo\n",
" . Most correlated bigrams:\n",
" . secretaría turismo\n",
" . quintana roo\n",
" '''\n",
" df=self.df\n",
" features=self.features\n",
" labels=self.labels\n",
" tfidf=self.tfidf\n",
" \n",
" category_to_id=self.category_to_id\n",
" N = 2\n",
" for tipo, category_id in sorted(category_to_id.items()):\n",
" features_chi2 = chi2(features, labels == category_id)\n",
" indices = np.argsort(features_chi2[0])\n",
" feature_names = np.array(tfidf.get_feature_names())[indices]\n",
" unigrams = [v for v in feature_names if len(v.split(' ')) == 1]\n",
" bigrams = [v for v in feature_names if len(v.split(' ')) == 2]\n",
" print(\"# '{}':\".format(tipo))\n",
" print(\" . Most correlated unigrams:\\n. {}\".format('\\n. '.join(unigrams[-N:])))\n",
" print(\" . Most correlated bigrams:\\n. {}\".format('\\n. '.join(bigrams[-N:])))\n",
"\n",
" def diamodelos(self):\n",
" '''\n",
" (0)Muestra el diagrama de accuracy para diferentes tipos de IA (LinearSVC,MultinomialNB, Regresión logística y Bosque de clasificación aleatoria) y muestra los datos escritos\n",
" \n",
" Args: NA\n",
" \n",
" Ejemplo:\n",
" >>>text=pd.read_csv('../data/filtrados15/todojunto.csv',encoding=\"latin\")\n",
" >>>c=ia(text)\n",
" >>>c.numtotaldata(0)\n",
" \n",
" *Tabla matplotlib*\n",
" \n",
" model_name\n",
" LinearSVC 0.853333\n",
" LogisticRegression 0.880000\n",
" MultinomialNB 0.853333\n",
" RandomForestClassifier 0.933333\n",
" Name: accuracy, dtype: float64\n",
" '''\n",
" features=self.features\n",
" labels=self.labels\n",
" cv_df=self.cv_df\n",
" models=self.models\n",
" CV=self.CV\n",
" entries=self.entries\n",
" \n",
" for model in models:\n",
" model_name = model.__class__.__name__\n",
" accuracies = cross_val_score(model, features, labels, scoring='accuracy', cv=CV)\n",
" for fold_idx, accuracy in enumerate(accuracies):\n",
" entries.append((model_name, fold_idx, accuracy))\n",
" \n",
" \n",
" #self.cv_df=cv_df\n",
" cv_df = pd.DataFrame(entries, columns=['model_name', 'fold_idx', 'accuracy'])\n",
" \n",
" \n",
" sns.boxplot(x='model_name', y='accuracy', data=cv_df)\n",
" sns.stripplot(\n",
" x='model_name', \n",
" y='accuracy', \n",
" data=cv_df, \n",
" size=8, \n",
" jitter=True, \n",
" edgecolor=\"gray\", \n",
" linewidth=2\n",
" )\n",
" plt.show()\n",
" #cv_df = pd.DataFrame(entries, columns=['model_name', 'fold_idx', 'accuracy'])\n",
" return cv_df.groupby('model_name').accuracy.mean()\n",
"\n",
" def matrizconf(self,num=0):\n",
" '''\n",
" (0)Muestra un diagrama de la matriz de confusión despues de agregar el csv\n",
" (1)Muestra un listado con los errores que se han encontrado en la matriz de confusión\n",
" (2)Muestra un listado con las categorias y su precision, recall, f1-score entre otros datos\n",
" \n",
" Args: 0,1,2\n",
" \n",
" Ejemplo:\n",
" >>>text=pd.read_csv('../data/filtrados15/todojunto.csv',encoding=\"latin\")\n",
" >>>c=ia(text)\n",
" >>>c.matrizconf(0)\n",
" *Tabla de matriz de confusión en matplotlib*\n",
" \n",
" >>>text=pd.read_csv('../data/filtrados15/todojunto.csv',encoding=\"latin\")\n",
" >>>c=ia(text)\n",
" >>>c.matrizconf(1)\n",
" 'turismo' predicted as 'arte' : 1 examples.\n",
" tipo\ttext\n",
" 60\tturismo\tEl Museo de la Luz es un proyecto impulsado de...\n",
"\n",
" 'arte' predicted as 'politica' : 1 examples.\n",
" tipo\ttext\n",
" 6\tarte\tEl diputado local de Morena por Tabasco, Charl...\n",
"\n",
" 'politica' predicted as 'seguridad' : 1 examples.\n",
" tipo\ttext\n",
" 41\tpolitica\tRepresentantes de la sociedad civil en el cons...\n",
"\n",
" 'seguridad' predicted as 'turismo' : 1 examples.\n",
" tipo\ttext\n",
" 56\tseguridad\tMérida es una ciudad en crecimiento, con más d...\n",
" \n",
" >>>text=pd.read_csv('../data/filtrados15/todojunto.csv',encoding=\"latin\")\n",
" >>>c=ia(text)\n",
" >>>c.matrizconf(2)\n",
" precision recall f1-score support\n",
"\n",
" arte 0.67 0.67 0.67 3\n",
" deporte 1.00 1.00 1.00 4\n",
" politica 0.80 0.80 0.80 5\n",
" seguridad 0.86 0.86 0.86 7\n",
" turismo 0.83 0.83 0.83 6\n",
"\n",
" micro avg 0.84 0.84 0.84 25\n",
" macro avg 0.83 0.83 0.83 25\n",
" weighted avg 0.84 0.84 0.84 25\n",
" '''\n",
" df=self.df\n",
" X_train=self.X_train \n",
" X_test=self.X_test\n",
" y_train=self.y_train\n",
" y_test=self.y_test\n",
" features=self.features\n",
" labels=self.labels\n",
" category_id_df=self.category_id_df\n",
" id_to_category=self.id_to_category\n",
" \n",
" model = LinearSVC()\n",
" X_train, X_test, y_train, y_test, indices_train, indices_test = train_test_split(features, labels, df.index, test_size=0.33, random_state=0)\n",
" model.fit(X_train, y_train)\n",
" y_pred = model.predict(X_test)\n",
" conf_mat = confusion_matrix(y_test, y_pred)\n",
" \n",
" if(num==0):\n",
" fig, ax = plt.subplots(figsize=(10,10))\n",
" sns.heatmap(conf_mat, annot=True, fmt='d',\n",
" xticklabels=category_id_df.tipo.values, \n",
" yticklabels=category_id_df.tipo.values)\n",
" plt.ylabel('Actual')\n",
" plt.xlabel('Predicted')\n",
" plt.show()\n",
" elif(num==1):\n",
" for predicted in category_id_df.category_id:\n",
" for actual in category_id_df.category_id:\n",
" if predicted != actual and conf_mat[actual, predicted] >= 1:\n",
" print(\"'{}' predicted as '{}' : {} examples.\".format(id_to_category[actual], id_to_category[predicted], conf_mat[actual, predicted]))\n",
" display(df.loc[indices_test[(y_test == actual) & (y_pred == predicted)]][['tipo', 'text']])\n",
" print('')\n",
" elif(num==2):\n",
" print(metrics.classification_report(y_test, y_pred, target_names=df['tipo'].unique()))\n",
" \n",
"text=pd.read_csv('data/todojunto.csv',encoding=\"latin\")\n",
"c=ia(text)\n",
"c.predict('Textiles, pintura popular, alfarería, cerámica, vidrio y otras artesanías se reúnen en la exhibición "Orgullo de mi país", con materiales de toda la República Mexicana y que está montada en el Museo de Arte Popular de Yucatán, ubicado en el centro de Mérida."Por fin tengo una blusa en una exposición, me da mucha alegría exponer mi trabajo. La blusa me tardó un mes, porque es la primera vez que realizo corazones, mayormente solo he bordado flores", comentó María Rita Con Uc, artesana originaria del municipio Subincancab, Yucatán, quien aprendió a "costurar" desde pequeña, con solo observar a su abuela.Su bordado es una de las integrantes de la muestra, compuesta por 59 piezas de diferentes estados de la República."En esta exhibición como (Mérida) es ciudad anfitriona, hay varias artesanías, representando Muna, Yaxuná, y Dzitya. La idea es que se incorporen a la exhibición y van a seguir itinerando, representando a Yucatán", comentó Judith Figueroa, encargada de la propuesta museográfica.Asimismo, esta convocatoria lanzada por Ricardo Salinas Pliego, presidente de Grupo Salinas, tiene el propósito de mostrar la pasión que se tiene por México, por eso el eje de las piezas es el corazón y cómo representarlo a través del arte."Está muy bien plasmado, esta creatividad, esta originalidad de todos los artesanos de cómo tal vez no es un elemento que usan normalmente en su artesanía, pero la manera en que lo trabajan y lo llevan aquí terminan siendo trabajos muy bellos", comentó Ana Méndez Patterson, directora general de museos y patrimonio de la Secretaría de la Cultura y las Artes de Yucatán. "Orgullo de mi país", que permanecerá del 26 de marzo al 26 de mayo, es la primera de un nuevo ciclo de exhibiciones del Museo de Arte Popular de Yucatán, con lo que se busca promocionarlo para atraer nuevos visitantes y turistas, añadió Méndez Patterson.La planta baja del recinto se dedicará a las exposiciones temporales, mientras que en la parte alta presentará una muestra permanente de arte popular de todo el país, conformada por colecciones del Fomento Cultural Banamex y del Instituto Nacional de los Pueblos Indígenas, anteriormente Comisión Nacional para el Desarrollo de los Pueblos Indígenas (CDI), comentó Ana Mendez Patterson.') #Adjuntar texto de publicación\n",
"#c.totaldata()\n",
"#c.numtotaldata()\n",
"#c.ngramas()\n",
"#c.diamodelos(1)\n",
"#c.matrizconf(2)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 576x432 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"c.totaldata()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"75"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"c.numtotaldata()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"# 'arte':\n",
" . Most correlated unigrams:\n",
". artes\n",
". arte\n",
" . Most correlated bigrams:\n",
". secretaría cultura\n",
". bellas artes\n",
"# 'deporte':\n",
" . Most correlated unigrams:\n",
". futbol\n",
". deporte\n",
" . Most correlated bigrams:\n",
". 30 años\n",
". comisión nacional\n",
"# 'politica':\n",
" . Most correlated unigrams:\n",
". político\n",
". política\n",
" . Most correlated bigrams:\n",
". cada vez\n",
". banco méxico\n",
"# 'seguridad':\n",
" . Most correlated unigrams:\n",
". seguridad\n",
". víctimas\n",
" . Most correlated bigrams:\n",
". subsecretario derechos\n",
". derechos humanos\n",
"# 'turismo':\n",
" . Most correlated unigrams:\n",
". roo\n",
". turismo\n",
" . Most correlated bigrams:\n",
". secretaría turismo\n",
". quintana roo\n"
]
}
],
"source": [
"c.ngramas()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\victo\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
" FutureWarning)\n",
"C:\\Users\\victo\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:460: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
" \"this warning.\", FutureWarning)\n",
"C:\\Users\\victo\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
" FutureWarning)\n",
"C:\\Users\\victo\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:460: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
" \"this warning.\", FutureWarning)\n",
"C:\\Users\\victo\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
" FutureWarning)\n",
"C:\\Users\\victo\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:460: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
" \"this warning.\", FutureWarning)\n",
"C:\\Users\\victo\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
" FutureWarning)\n",
"C:\\Users\\victo\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:460: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
" \"this warning.\", FutureWarning)\n",
"C:\\Users\\victo\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
" FutureWarning)\n",
"C:\\Users\\victo\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:460: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
" \"this warning.\", FutureWarning)\n"
]
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"model_name\n",
"LinearSVC 0.853333\n",
"LogisticRegression 0.880000\n",
"MultinomialNB 0.853333\n",
"RandomForestClassifier 0.933333\n",
"Name: accuracy, dtype: float64"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"c.diamodelos()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\victo\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
" FutureWarning)\n",
"C:\\Users\\victo\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:460: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
" \"this warning.\", FutureWarning)\n",
"C:\\Users\\victo\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
" FutureWarning)\n",
"C:\\Users\\victo\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:460: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
" \"this warning.\", FutureWarning)\n",
"C:\\Users\\victo\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
" FutureWarning)\n",
"C:\\Users\\victo\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:460: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
" \"this warning.\", FutureWarning)\n",
"C:\\Users\\victo\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
" FutureWarning)\n",
"C:\\Users\\victo\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:460: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
" \"this warning.\", FutureWarning)\n",
"C:\\Users\\victo\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
" FutureWarning)\n",
"C:\\Users\\victo\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:460: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
" \"this warning.\", FutureWarning)\n",
"C:\\Users\\victo\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
" FutureWarning)\n",
"C:\\Users\\victo\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:460: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
" \"this warning.\", FutureWarning)\n",
"C:\\Users\\victo\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
" FutureWarning)\n",
"C:\\Users\\victo\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:460: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
" \"this warning.\", FutureWarning)\n",
"C:\\Users\\victo\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
" FutureWarning)\n",
"C:\\Users\\victo\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:460: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
" \"this warning.\", FutureWarning)\n",
"C:\\Users\\victo\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
" FutureWarning)\n",
"C:\\Users\\victo\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:460: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
" \"this warning.\", FutureWarning)\n",
"C:\\Users\\victo\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
" FutureWarning)\n",
"C:\\Users\\victo\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:460: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
" \"this warning.\", FutureWarning)\n"
]
},
{
"data": {
"text/plain": [
"model_name\n",
"LinearSVC 0.853333\n",
"LogisticRegression 0.880000\n",
"MultinomialNB 0.853333\n",
"RandomForestClassifier 0.933333\n",
"Name: accuracy, dtype: float64"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": []
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 720x720 with 2 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"c.matrizconf(0)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"'turismo' predicted as 'arte' : 1 examples.\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>tipo</th>\n",
" <th>text</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>60</th>\n",
" <td>turismo</td>\n",
" <td>El Museo de la Luz es un proyecto impulsado de...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" tipo text\n",
"60 turismo El Museo de la Luz es un proyecto impulsado de..."
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"'arte' predicted as 'politica' : 1 examples.\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>tipo</th>\n",
" <th>text</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>arte</td>\n",
" <td>El diputado local de Morena por Tabasco, Charl...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" tipo text\n",
"6 arte El diputado local de Morena por Tabasco, Charl..."
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"'politica' predicted as 'seguridad' : 1 examples.\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>tipo</th>\n",
" <th>text</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>41</th>\n",
" <td>politica</td>\n",
" <td>Representantes de la sociedad civil en el cons...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" tipo text\n",
"41 politica Representantes de la sociedad civil en el cons..."
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"'seguridad' predicted as 'turismo' : 1 examples.\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>tipo</th>\n",
" <th>text</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>56</th>\n",
" <td>seguridad</td>\n",
" <td>Mérida es una ciudad en crecimiento, con más d...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" tipo text\n",
"56 seguridad Mérida es una ciudad en crecimiento, con más d..."
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"c.matrizconf(1)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" precision recall f1-score support\n",
"\n",
" arte 0.67 0.67 0.67 3\n",
" deporte 1.00 1.00 1.00 4\n",
" politica 0.80 0.80 0.80 5\n",
" seguridad 0.86 0.86 0.86 7\n",
" turismo 0.83 0.83 0.83 6\n",
"\n",
" micro avg 0.84 0.84 0.84 25\n",
" macro avg 0.83 0.83 0.83 25\n",
"weighted avg 0.84 0.84 0.84 25\n",
"\n"
]
}
],
"source": [
"c.matrizconf(2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment