Archivo de pruebas, se logro transferir información a Mongo con archivos...

Archivo de pruebas, se logro transferir información a Mongo con archivos .json, sin embargo para el análisis se utilizaron los archivos .csv
parent 59b16af1
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from pymongo import MongoClient\n",
"import csv\n",
"import json\n",
"\n",
"#Se crea un cliente para el servidor de mongodb\n",
"client = MongoClient('mongodb://localhost:27017/')\n",
"\n",
"#Se crea una base de datos\n",
"db = client['PeriodicoInteligente']\n",
"\n",
"#crear la colección\n",
"collection_train = db['noticias_train']\n",
"collection_test = db['noticias_test']\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Convierte un archivo CSV a JSON\n",
"import csv\n",
"import json\n",
"\n",
"csvfile = open('noticias_test.csv', 'r')\n",
"jsonfile = open('noticias_test.json', 'w')\n",
"\n",
"reader = csv.DictReader( csvfile)\n",
"count=0\n",
"for row in reader:\n",
"\tjson.dump(row, jsonfile)\n",
"\tjsonfile.write('\\n')\n",
"\tcount+=1\n",
"print('Se convirtieron ', count, 'registros, archivo .JSON creado ')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"########Se insertan registros de TEST a la colección#########\n",
"file_name = './noticias_test.json'\n",
"cont = 0\n",
"with open(file_name) as f:\n",
"\tfor line in f:\n",
"\t\tj_content = json.loads(line)\n",
"\t\t#Agregar un dato a la colección\n",
"\t\tmydict = {\"FECHA\": j_content['FECHA'], \"PERIODICO\": j_content['PERIODICO'], \"TITULO\": j_content['TITULO'], \"NOTA\": j_content['NOTA'], \"URL\": j_content['URL']}\n",
"\t\tx = collection.insert_one(mydict)\n",
"\t\tcont = cont+1\n",
"\n",
"f.close()\n",
"print(\"Se ingresaron \"+str(cont)+\" registros\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"########Se insertan registros de TRAIN a la colección#########\n",
"\n",
"file_name = './noticias_train.json'\n",
"cont = 0\n",
"with open(file_name) as f:\n",
"\tfor line in f:\n",
"\t\tj_content = json.loads(line)\n",
"\t\t#Agregar un dato a la colección\n",
"\t\tmydict = {\"data_news\": j_content['data_news'], \"target_news\": j_content['target_news'], \"target_names_news\": j_content['target_names_news']}\n",
"\t\tx = collection.insert_one(mydict)\n",
"\t\tcont = cont+1\n",
"\n",
"f.close()\n",
"print(\"Se ingresaron \"+str(cont)+\" registros\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment