Commit b4fbac6a authored by geobumac's avatar geobumac

ad

parent f700ccfb
......@@ -793,7 +793,7 @@
},
{
"cell_type": "code",
"execution_count": 185,
"execution_count": 190,
"metadata": {},
"outputs": [
{
......@@ -824,6 +824,7 @@
" s = s.replace(\"<START:location>\", \"\")\n",
" s = s.replace(\"<END>\", \"\")\n",
" s = re.sub(r\"[^\\w\\s]\", '', s)\n",
" s = re.sub(r\"[^\\w\\s]\", '', s)\n",
" #s = re.sub(r\"[<START:location>|<END>]\", '', s)\n",
" return s\n",
"f = open(\"./data/named_entity_recognition_sp_MX_locations.JSON\", \"r\", encoding=\"utf8\")\n",
......@@ -833,6 +834,7 @@
"jsonDoc = json.loads(jsonDoc)\n",
"for i in jsonDoc:\n",
" texto = removerpunt(i[\"doc_locations\"])\n",
" ##texto = re.findall(r'([A-Za-z][a-z]{2,15})', texto)\n",
" texto = texto.split(\" \")\n",
" listaPalabras.extend(list(set(texto)))\n",
"\n",
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment