Commit b4fbac6a authored by geobumac's avatar geobumac

ad

parent f700ccfb
...@@ -793,7 +793,7 @@ ...@@ -793,7 +793,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 185, "execution_count": 190,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
...@@ -824,6 +824,7 @@ ...@@ -824,6 +824,7 @@
" s = s.replace(\"<START:location>\", \"\")\n", " s = s.replace(\"<START:location>\", \"\")\n",
" s = s.replace(\"<END>\", \"\")\n", " s = s.replace(\"<END>\", \"\")\n",
" s = re.sub(r\"[^\\w\\s]\", '', s)\n", " s = re.sub(r\"[^\\w\\s]\", '', s)\n",
" s = re.sub(r\"[^\\w\\s]\", '', s)\n",
" #s = re.sub(r\"[<START:location>|<END>]\", '', s)\n", " #s = re.sub(r\"[<START:location>|<END>]\", '', s)\n",
" return s\n", " return s\n",
"f = open(\"./data/named_entity_recognition_sp_MX_locations.JSON\", \"r\", encoding=\"utf8\")\n", "f = open(\"./data/named_entity_recognition_sp_MX_locations.JSON\", \"r\", encoding=\"utf8\")\n",
...@@ -833,6 +834,7 @@ ...@@ -833,6 +834,7 @@
"jsonDoc = json.loads(jsonDoc)\n", "jsonDoc = json.loads(jsonDoc)\n",
"for i in jsonDoc:\n", "for i in jsonDoc:\n",
" texto = removerpunt(i[\"doc_locations\"])\n", " texto = removerpunt(i[\"doc_locations\"])\n",
" ##texto = re.findall(r'([A-Za-z][a-z]{2,15})', texto)\n",
" texto = texto.split(\" \")\n", " texto = texto.split(\" \")\n",
" listaPalabras.extend(list(set(texto)))\n", " listaPalabras.extend(list(set(texto)))\n",
"\n", "\n",
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment