Commit 41591c1d authored by geobumac's avatar geobumac

Proyecto final

parent 84448156
This diff is collapsed.
{
"cells": [],
"metadata": {},
"nbformat": 4,
"nbformat_minor": 2
}
This diff is collapsed.
This diff is collapsed.
{
"cells": [
{
"cell_type": "code",
"execution_count": 89,
"metadata": {},
"outputs": [],
"source": [
"from sklearn import datasets\n",
"from sklearn.feature_extraction.text import CountVectorizer\n",
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
"from sklearn.neighbors import KNeighborsClassifier\n",
"import pickle\n",
"from sklearn import metrics\n",
"\n",
"categories = [\n",
" 'cine',\n",
" 'deportes'\n",
" ]\n",
"with open(\"data/datatrain.json\") as f:\n",
" for line in f:\n",
" data_train = json.loads(line)\n",
"\n",
"documentos_train = []\n",
"target_train = []\n",
"for j in data_train:\n",
" documentos_train.append(j[\"text\"])\n",
" target_train.append(j[\"iclass\"])\n",
"\n",
"vectorizer = TfidfVectorizer()\n",
"\n",
"X_train = vectorizer.fit_transform(documentos_train)\n",
"y_train = target_train;\n",
"features_names = vectorizer.get_feature_names()\n",
"\n",
"clf, name = (KNeighborsClassifier(n_neighbors=3), \"kNN\")\n",
"clf.fit(X_train, y_train)\n",
"\n",
"with open(\"data/datatest.json\") as f:\n",
" for line in f:\n",
" data_test = json.loads(line)\n",
"\n",
"documentos_test = []\n",
"target_test = []\n",
"for j in data_test:\n",
" documentos_test.append(j[\"text\"])\n",
" target_test.append(j[\"iclass\"])\n",
"\n",
"X_test = vectorizer.transform(documentos_test)\n",
"y_test = target_test\n",
"\n",
"pred = clf.predict(X_test)\n",
"features_names = vectorizer.get_feature_names()\n",
"#print(pred)\n",
"\n",
"\n",
"\n",
"conti = 0\n",
"for i in data_test:\n",
" #print(categories[ int(pred[int(conti)]) ] )\n",
" i[\"class\"] = categories[ int(pred[int(conti)]) ]\n",
" conti = conti + 1\n",
"with open(\"data/data.json\", 'w') as outfile:\n",
" json.dump(datatest, outfile)\n",
" \n",
"#print(X_test)"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"\n",
"extension = '.json'\n",
"file_path = 'timeline'\n",
"\n",
"datatrain = []\n",
"datatest = []\n",
"with open(\"D:\\descargas\\cine\\data.json\") as f:\n",
" for line in f:\n",
" j_content = json.loads(line)\n",
" icont = 0\n",
" for i in j_content:\n",
" if icont < 20:\n",
" i[\"class\"] = \"cine\"\n",
" i[\"iclass\"] = \"0\"\n",
" datatrain.append(i)\n",
" if icont > 19:\n",
" i[\"class\"] = \"cine\"\n",
" i[\"iclass\"] = \"0\"\n",
" datatest.append(i)\n",
" if icont > 40:\n",
" break\n",
" icont += 1\n",
"\n",
"with open(\"D:\\descargas\\deportes\\data.json\") as f:\n",
" for line in f:\n",
" j_content = json.loads(line)\n",
" icont = 0\n",
" for i in j_content:\n",
" if icont < 20:\n",
" i[\"class\"] = \"deportes\"\n",
" i[\"iclass\"] = \"1\"\n",
" datatrain.append(i)\n",
" if icont > 19:\n",
" i[\"class\"] = \"deportes\"\n",
" i[\"iclass\"] = \"1\"\n",
" datatest.append(i)\n",
" if icont > 40:\n",
" break\n",
" icont += 1\n",
"with open(\"data/datatrain.json\", 'w') as outfile:\n",
" json.dump(datatrain, outfile)\n",
" \n",
"with open(\"data/datatest.json\", 'w') as outfile:\n",
" json.dump(datatest, outfile)"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<a href=\"http://example.com\">link</a>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from IPython.core.display import HTML\n",
"HTML('<a href=\"http://example.com\">link</a>')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment