Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- {
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [],
- "source": [
- "import langdetect"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 55,
- "metadata": {},
- "outputs": [],
- "source": [
- "string_test = \"\"\"import sudomin\n",
- "\n",
- "def funcionAProbar:\n",
- " a = 2313\n",
- " return a*5\n",
- "\n",
- "thisIsAFunction(2)\n",
- "draw_me_a_sheep\n",
- "dibujame_una_cabra\n",
- "\n",
- "\"\"\""
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 56,
- "metadata": {},
- "outputs": [],
- "source": [
- "import nltk"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 57,
- "metadata": {},
- "outputs": [],
- "source": [
- "code_tokens = nltk.tokenize.word_tokenize(string_test)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 58,
- "metadata": {},
- "outputs": [],
- "source": [
- "def try_detection(token):\n",
- " try:\n",
- " return langdetect.detect(token)\n",
- " except:\n",
- " return None"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 59,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "[('sudomin', 'lt'),\n",
- " ('funcionAProbar', 'es'),\n",
- " ('thisIsAFunction', 'en'),\n",
- " ('draw_me_a_sheep', 'en'),\n",
- " ('dibujame_una_cabra', 'es')]"
- ]
- },
- "execution_count": 59,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "[(token, try_detection(token)) for token in code_tokens if consider_token(token) and try_detection(token)]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 60,
- "metadata": {},
- "outputs": [],
- "source": [
- "reserved_words = [\"False\",\"class\",\"finally\",\"is\",\"return\",\"None\",\"continue\",\"for\",\"lambda\",\"try\",\"True\",\"def\",\"from\",\"nonlocal\",\"while\",\"and\",\"del\",\"global\",\"not\",\"with\",\"as\",\"elif\",\"if\",\"or\",\"yield\",\"assert\",\"else\",\"import\",\"pass\",\"break\",\"except\",\"in\",\"raise\"]\n",
- "mathematical_ops = [\"+\", \"-\", \"*\", \"/\", \"=\"]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 61,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "True"
- ]
- },
- "execution_count": 61,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "\"*\" in \"a*2\""
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 62,
- "metadata": {},
- "outputs": [],
- "source": [
- "def consider_token(token):\n",
- " if len([op for op in mathematical_ops if op in token])>0:\n",
- " return False\n",
- " if token in reserved_words:\n",
- " return False\n",
- " if len(token)==1:\n",
- " return False\n",
- " return True"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 63,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Detectar lo que si se debe de checar es el truco de esto\n",
- "# Saber un poquito del contexto puede ayudar (como un import o el archivo del que viene la expresión)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.6.3"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
- }
Add Comment
Please, Sign In to add comment