Created
April 7, 2017 12:31
-
-
Save ak314/fc6c6f911cb4f39453b575854cdc4869 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"%reload_ext autoreload\n", | |
"%autoreload 2" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<style>\n", | |
"/*\n", | |
"Placeholder for custom user CSS\n", | |
"\n", | |
"mainly to be overridden in profile/static/custom/custom.css\n", | |
"\n", | |
"This will always be an empty file in IPython\n", | |
"*/\n", | |
"\n", | |
"@import url('http://fonts.googleapis.com/css?family=Crimson+Text');\n", | |
"@import url('http://fonts.googleapis.com/css?family=Kameron');\n", | |
"@import url('http://fonts.googleapis.com/css?family=Lato:200');\n", | |
"@import url('http://fonts.googleapis.com/css?family=Lato:300');\n", | |
"@import url('http://fonts.googleapis.com/css?family=Lato:400');\n", | |
"\n", | |
"\n", | |
"/* Change code font */\n", | |
".CodeMirror pre {\n", | |
" font-family: Monaco;\n", | |
" font-size: 14pt;\n", | |
"}\n", | |
"\n", | |
"div.output pre{\n", | |
" font-family: Monaco;\n", | |
" font-size: 14pt;\n", | |
"}\n", | |
"\n", | |
"div.output_html td{\n", | |
" font-family: Monaco;\n", | |
" font-size: 10pt;\n", | |
"}\n", | |
"\n", | |
"div.prompt{\n", | |
" font-family: Monaco;\n", | |
" font-size: 10pt;\n", | |
"}\n", | |
"\n", | |
"div.completions select{\n", | |
" font-family: Monaco;\n", | |
" font-size: 12pt;\n", | |
"}\n", | |
"\n", | |
"div.container pre{\n", | |
" font-family: Monaco;\n", | |
" font-size: 12pt;\n", | |
"}\n", | |
"\n", | |
"div.tooltiptext pre{\n", | |
" font-family: Monaco;\n", | |
" font-size: 10pt;\n", | |
"}\n", | |
"\n", | |
"div.input_area {\n", | |
" border-color: rgba(0,0,0,0.10);\n", | |
" background: rbga(0,0,0,0.5);\n", | |
"}\n", | |
"\n", | |
"div.text_cell {\n", | |
" max-width: 105ex; /* instead of 100%, */\n", | |
"}\n", | |
"\n", | |
"div.text_cell_render {\n", | |
" font-family: lato;\n", | |
" font-size: 14pt;\n", | |
" line-height: 145%; /* added for some line spacing of text. */\n", | |
"}\n", | |
"\n", | |
"div.text_cell_render code{\n", | |
" font-family: Monaco;\n", | |
" font-size: 2pt;\n", | |
"}\n", | |
"\n", | |
"div.text_cell_render h1,\n", | |
"div.text_cell_render h2,\n", | |
"div.text_cell_render h3,\n", | |
"div.text_cell_render h4,\n", | |
"div.text_cell_render h5,\n", | |
"div.text_cell_render h6 {\n", | |
" font-family: lato, 'HelveticaNeue-Light';\n", | |
" font-weight: 300;\n", | |
"}\n", | |
"\n", | |
"div.text_cell_render h1 {\n", | |
" font-size: 30pt;\n", | |
"}\n", | |
"\n", | |
"div.text_cell_render h2 {\n", | |
" font-size: 24pt;\n", | |
"}\n", | |
"\n", | |
"div.text_cell_render h3 {\n", | |
" font-size: 28pt;\n", | |
"}\n", | |
"\n", | |
".rendered_html pre,\n", | |
".rendered_html code {\n", | |
" font-size: medium;\n", | |
"}\n", | |
"\n", | |
".rendered_html ol {\n", | |
" list-style:decimal;\n", | |
" margin: 1em 2em;\n", | |
"}\n", | |
"\n", | |
".prompt.input_prompt {\n", | |
" color: rgba(0,0,0,0.5);\n", | |
"}\n", | |
"\n", | |
".cell.command_mode.selected {\n", | |
" border-color: rgba(0,0,0,0.1);\n", | |
"}\n", | |
"\n", | |
".cell.edit_mode.selected {\n", | |
" border-color: rgba(0,0,0,0.15);\n", | |
" box-shadow: 0px 0px 5px #f0f0f0;\n", | |
" -webkit-box-shadow: 0px 0px 5px #f0f0f0;\n", | |
"}\n", | |
"\n", | |
"div.output_scroll {\n", | |
" -webkit-box-shadow: inset 0 2px 8px rgba(0,0,0,0.1);\n", | |
" box-shadow: inset 0 2px 8px rgba(0,0,0,0.1);\n", | |
" border-radious: 2px;\n", | |
"}\n", | |
"\n", | |
"#menubar .navbar-inner {\n", | |
" background: #fff;\n", | |
" -webkit-box-shadow: none;\n", | |
" box-shadow: none;\n", | |
" border-radius: 0;\n", | |
" border: none;\n", | |
" font-family: lato;\n", | |
" font-weight: 400;\n", | |
"}\n", | |
"\n", | |
".navbar-fixed-top .navbar-inner,\n", | |
".navbar-static-top .navbar-inner {\n", | |
" box-shadow: none;\n", | |
" -webkit-box-shadow: none;\n", | |
" border: none;\n", | |
"}\n", | |
"\n", | |
"div#notebook_panel {\n", | |
" box-shadow: none;\n", | |
" -webkit-box-shadow: none;\n", | |
" border-top: none;\n", | |
"}\n", | |
"\n", | |
"div#notebook {\n", | |
" border-top: 1px solid rgba(0,0,0,0.15);\n", | |
"}\n", | |
"\n", | |
"#menubar .navbar .navbar-inner,\n", | |
".toolbar-inner {\n", | |
" padding-left: 0;\n", | |
" padding-right: 0;\n", | |
"}\n", | |
"\n", | |
"#checkpoint_status,\n", | |
"#autosave_status {\n", | |
" color: rgba(0,0,0,0.5);\n", | |
"}\n", | |
"\n", | |
"#header {\n", | |
" font-family: lato;\n", | |
"}\n", | |
"\n", | |
"#notebook_name {\n", | |
" font-weight: 200;\n", | |
"}\n", | |
"</style>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"from IPython import utils \n", | |
"from IPython.core.display import HTML \n", | |
"import os\n", | |
"config_filename = \"custom.css\"\n", | |
"styles = \"<style>\\n%s\\n</style>\" % (open(config_filename,'r').read())\n", | |
"HTML(styles)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# WikiTrivia code" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Some init" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import requests\n", | |
"import time\n", | |
"from time import sleep\n", | |
"import json\n", | |
"from random import shuffle\n", | |
"import codecs\n", | |
"import os\n", | |
"import urllib\n", | |
"import dill\n", | |
"import errno\n", | |
"\n", | |
"import credentials # python module containing the definition API_KEY = \"your_own_api_key\"\n", | |
"\n", | |
"import networkx as nx" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"# some initial data\n", | |
"\n", | |
"# qid of the series, folder where stuff is saved, path where analysis is saved\n", | |
"SERIES_QID, RESULTS_FOLDER, SAVED_ANALYSES, SAVED_GRAPH = (\n", | |
" \"Q886\", \"simpsons\", \"analyses/simpsons_syntax.pickle\", \"graph/simpsons_graph.pickle\"\n", | |
")\n", | |
"#SERIES_QID, RESULTS_FOLDER, SAVED_ANALYSES, SAVED_GRAPH = (\"Q8539\", \"bigbangtheory\", \"analyses/bbt_syntax.pickle\", \"graph/bbt_graph.pickle\")\n", | |
"#SERIES_QID, RESULTS_FOLDER, SAVED_ANALYSES = (\"Q5930\", \"familyguy\", \"analyses/familyguy_syntax.pickle\")\n", | |
"\n", | |
"EPISODE_SERIES_PREDICATE = \"P179\"\n", | |
"CHARACTER_SERIES_PREDICATE = \"P1441\"\n", | |
"\n", | |
"GENDER_PREDICATE = \"P21\"\n", | |
"MALE_GENDER = \"Q6581097\"\n", | |
"FEMALE_GENDER = \"Q6581072\"" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def read_resource(path):\n", | |
" with open(path, \"rb\") as file_in:\n", | |
" objj = dill.load(file_in)\n", | |
" return objj\n", | |
"\n", | |
"def write_resource(resource, path):\n", | |
" try:\n", | |
" os.makedirs(os.path.dirname(path))\n", | |
" except OSError as exc: # in case the fodler already exists\n", | |
" if exc.errno != errno.EEXIST:\n", | |
" raise\n", | |
" with open(path, \"wb\") as file_out:\n", | |
" dill.dump(resource, file_out)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# classes to store and manipulate analysis results\n", | |
"\n", | |
"class AnalysisResult(object):\n", | |
" def __init__(self, sentences, references=None):\n", | |
" self.sentences = sentences\n", | |
" if not references:\n", | |
" self.references = [None] * len(self.sentences)\n", | |
" else:\n", | |
" self.references = references\n", | |
" \n", | |
" @staticmethod\n", | |
" def merge(one, two):\n", | |
" return AnalysisResult(one.sentences + two.sentences, one.references + two.references)\n", | |
"\n", | |
" \n", | |
"class AnalysisNode(object):\n", | |
" def __init__(self, token_analysis):\n", | |
" self.text = token_analysis[\"text\"][\"content\"]\n", | |
" self.lemma = token_analysis[\"lemma\"]\n", | |
" self.pos = token_analysis[\"partOfSpeech\"][\"tag\"]\n", | |
" self.match = token_analysis[\"text\"][\"beginOffset\"]\n", | |
" self.data = token_analysis[\"partOfSpeech\"]\n", | |
" self.father = None\n", | |
" self.left_children = []\n", | |
" self.right_children = []\n", | |
" self.entity = None\n", | |
"\n", | |
" def traverse(self, block_nodes=frozenset()):\n", | |
" left_nodes = [node for link in self.left_children for node in link.node.traverse(block_nodes=block_nodes)]\n", | |
" right_nodes = [node for link in self.right_children for node in link.node.traverse(block_nodes=block_nodes)]\n", | |
" current = [self] if self not in block_nodes else []\n", | |
" return left_nodes + current + right_nodes\n", | |
" \n", | |
" def string(self, block_nodes=frozenset()):\n", | |
" return \" \".join(self.strings(block_nodes=block_nodes))\n", | |
" \n", | |
" def strings(self, block_nodes=frozenset()):\n", | |
" return [node.text for node in self.traverse(block_nodes=block_nodes)]\n", | |
" \n", | |
" def __str__(self):\n", | |
" return \"[%s, (%s), %s, %s]\" % (self.text, self.lemma, self.pos, self.entity)\n", | |
"\n", | |
" \n", | |
"class AnalysisLink(object):\n", | |
" def __init__(self, node, link_type):\n", | |
" self.node = node\n", | |
" self.link_type = link_type\n", | |
"\n", | |
"\n", | |
"class NEREntity(object):\n", | |
" def __init__(self, name, typee, mentions=[], link=None):\n", | |
" self._name = name\n", | |
" self._typee = typee\n", | |
" self._mentions = []\n", | |
" self._link = link\n", | |
" \n", | |
" def __str__(self):\n", | |
" return \"{%s / %s / %s }\" % (self._name, self._typee, self._link)\n", | |
"\n", | |
"class Mention(object):\n", | |
" def __init__(self, text, match):\n", | |
" self._text = text\n", | |
" self._match = match" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# function to retrieve relations for a given \"predicate\", knowing the start or end node\n", | |
"def get_linked_nodes(subject_id, predicate_id, object_id, raw_json=False, with_articles=False, gender=None):\n", | |
" query = 'SELECT ?cid ?article WHERE {'\n", | |
" if not subject_id and object_id is not None:\n", | |
" query += ' ?cid wdt:%s wd:%s .' % (predicate_id, object_id)\n", | |
" elif subject_id is not None and not object_id:\n", | |
" query += ' wd:%s wdt:%s ?cid .' % (subject_id, predicate_id)\n", | |
" else:\n", | |
" raise ValueError(\"no parameters on which to perform the query\")\n", | |
" if gender:\n", | |
" query += ' ?cid wdt:%s wd:%s .' % (\"P21\", gender)\n", | |
" query += '''\n", | |
" OPTIONAL {\n", | |
" ?article schema:about ?cid .\n", | |
" ?article schema:inLanguage \"en\" .\n", | |
" FILTER (SUBSTR(str(?article), 1, 25) = \"https://en.wikipedia.org/\")\n", | |
" }\n", | |
" }\n", | |
" '''\n", | |
" headers = {\"Accept\": \"application/sparql-results+json\"}\n", | |
" response = requests.get(\"https://query.wikidata.org/bigdata/namespace/wdq/sparql?query=%s\" % query, headers=headers)\n", | |
" if raw_json:\n", | |
" return response.json()\n", | |
" jsonn = response.json()\n", | |
" if \"results\" not in jsonn or \"bindings\" not in jsonn[\"results\"]:\n", | |
" return []\n", | |
" if with_articles:\n", | |
" return (\n", | |
" [item[\"cid\"][\"value\"].split(\"/\")[-1] for item in response.json()[\"results\"][\"bindings\"]],\n", | |
" [item[\"article\"][\"value\"] for item in response.json()[\"results\"][\"bindings\"] if \"article\" in item]\n", | |
" )\n", | |
" return [item[\"cid\"][\"value\"].split(\"/\")[-1] for item in response.json()[\"results\"][\"bindings\"]]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# functions to generate and manipulate local wikidata subgraph\n", | |
"\n", | |
"def generate_graph_for_father_and_predicate(father, predicate):\n", | |
" G = nx.DiGraph()\n", | |
" male_qids, male_names = get_linked_nodes(None, predicate, father, with_articles=True, gender=MALE_GENDER)\n", | |
" female_qids, female_names = get_linked_nodes(None, predicate, father, with_articles=True, gender=FEMALE_GENDER)\n", | |
" qids, names = get_linked_nodes(None, predicate, father, with_articles=True)\n", | |
" for qid, name in zip(qids, names):\n", | |
"\n", | |
" mod_name = urllib.unquote(name).replace(\" \", \"_\").replace(\"https\", \"http\")\n", | |
"\n", | |
" G.add_edge(qid, father, p=predicate)\n", | |
" G.add_edge(father, qid, p=\"-\"+predicate)\n", | |
" G.add_edge(qid, mod_name, p=\"name\")\n", | |
" G.add_edge(mod_name, qid, p=\"qid\")\n", | |
" \n", | |
" if qid in female_qids:\n", | |
" G.add_edge(qid, \"gender_female\", p=\"gender\")\n", | |
" elif qid in male_qids:\n", | |
" G.add_edge(qid, \"gender_male\", p=\"gender\")\n", | |
" \n", | |
" return G\n", | |
"\n", | |
"def neighbors_for_type_for_node(kgraph, link_type, node_name):\n", | |
" if not node_name in kgraph.nodes():\n", | |
" return None\n", | |
" neighbors = [node \n", | |
" for node, props in kgraph[node_name].iteritems() \n", | |
" if props.get(\"p\") == link_type]\n", | |
" return neighbors if neighbors else None\n", | |
"\n", | |
"def neighbor_for_type_for_node(kgraph, link_type, node_name):\n", | |
" neighs = neighbors_for_type_for_node(kgraph, link_type, node_name)\n", | |
" if neighs:\n", | |
" return neighs[0]\n", | |
" return None\n", | |
"\n", | |
"def get_qid_from_name(kgraph, name):\n", | |
" if not name in kgraph.nodes():\n", | |
" return None\n", | |
" qids = [node \n", | |
" for node, props in kgraph[name].iteritems() \n", | |
" if props.get(\"p\") == \"qid\"]\n", | |
" return qids[0] if qids else None\n", | |
"\n", | |
"def get_name_from_qid(kgraph, qid):\n", | |
" if not qid in kgraph.nodes():\n", | |
" return None\n", | |
" names = [node \n", | |
" for node, props in kgraph[qid].iteritems() \n", | |
" if props.get(\"p\") == \"name\"]\n", | |
" return names[0] if names else None" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def generate_entities_structures(entities):\n", | |
" entity_map = {}\n", | |
" entity_matches = {}\n", | |
" for entity in entities[\"entities\"]:\n", | |
" for mention in entity[\"mentions\"]:\n", | |
" if not entity[\"name\"] in entity_map:\n", | |
" link = None\n", | |
" if \"metadata\" in entity and \"wikipedia_url\" in entity[\"metadata\"]:\n", | |
" link = entity[\"metadata\"][\"wikipedia_url\"]\n", | |
" entity_map[entity[\"name\"]] = NEREntity(entity[\"name\"], entity[\"type\"], mentions=[Mention(mention[\"text\"][\"content\"], mention[\"text\"][\"beginOffset\"])], link=link)\n", | |
" entity_matches[mention[\"text\"][\"beginOffset\"]] = NEREntity(entity[\"name\"], entity[\"type\"], mentions=[Mention(mention[\"text\"][\"content\"], mention[\"text\"][\"beginOffset\"])], link=link)\n", | |
" return entity_matches, entity_map" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"# function to retrieve NLP analysis for a given text from remote API \n", | |
"\n", | |
"# document = {\"text\": \"A simple example.\", \"link\": \"Something\"}\n", | |
"def analyze_text(document):\n", | |
" base = \"https://language.googleapis.com\"\n", | |
" \n", | |
" doc = {\"type\": \"PLAIN_TEXT\", \"content\": document[\"text\"]}\n", | |
" request_data = {\"document\": doc, \"encodingType\": \"UTF8\"}\n", | |
"\n", | |
" analysis_endpoint = \"/v1/documents:analyzeSyntax\"\n", | |
" analysis_url = base + analysis_endpoint + \"?key=\" + credentials.NLP_API_KEY\n", | |
" \n", | |
" entities_endpoint = \"/v1/documents:analyzeEntities\"\n", | |
" entities_url = base + entities_endpoint + \"?key=\" + credentials.NLP_API_KEY\n", | |
" \n", | |
" \n", | |
" # syntax\n", | |
" response = requests.post(analysis_url, data=json.dumps(request_data))\n", | |
" analysis_results = json.loads(response.text)\n", | |
" \n", | |
" nodes = [AnalysisNode(token) for token in analysis_results.get(\"tokens\")]\n", | |
"\n", | |
" roots = []\n", | |
" references = []\n", | |
" \n", | |
" # collect the syntax links\n", | |
" for index, token in enumerate(nodes):\n", | |
" data = analysis_results.get(\"tokens\")[index]\n", | |
" father_node = nodes[data.get(\"dependencyEdge\").get(\"headTokenIndex\")]\n", | |
" token.father = AnalysisLink(father_node, data.get(\"dependencyEdge\").get(\"label\"))\n", | |
" if data.get(\"dependencyEdge\").get(\"label\") != \"ROOT\":\n", | |
" if token.match < father_node.match:\n", | |
" father_node.left_children.append(AnalysisLink(token, data.get(\"dependencyEdge\").get(\"label\")))\n", | |
" else:\n", | |
" father_node.right_children.append(AnalysisLink(token, data.get(\"dependencyEdge\").get(\"label\")))\n", | |
" else:\n", | |
" roots.append(token)\n", | |
" if document.get(\"link\"):\n", | |
" references.append(document.get(\"link\"))\n", | |
" else:\n", | |
" None\n", | |
" \n", | |
" # entities\n", | |
" entities_response = requests.post(entities_url, data=json.dumps(request_data))\n", | |
" entities = json.loads(entities_response.text)\n", | |
" entity_matches, _ = generate_entities_structures(entities)\n", | |
" \n", | |
" for node in nodes:\n", | |
" if node.match in entity_matches:\n", | |
" node.entity = entity_matches.get(node.match)\n", | |
"\n", | |
" return AnalysisResult(roots, references)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Quick example" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[Homer, (Homer), NOUN, {Homer Simpson / PERSON / http://en.wikipedia.org/wiki/Homer_Simpson }] NN\n", | |
"[Simpson, (Simpson), NOUN, None] NSUBJ\n", | |
"[stole, (steal), VERB, None] ROOT\n", | |
"[Ned, (Ned), NOUN, {Ned / PERSON / http://en.wikipedia.org/wiki/Ned_Flanders }] POSS\n", | |
"['s, ('s), PRT, None] PS\n", | |
"[air, (air), NOUN, {air conditioner / OTHER / None }] NN\n", | |
"[conditioner, (conditioner), NOUN, None] DOBJ\n", | |
"[., (.), PUNCT, None] P\n", | |
"[Homer, (Homer), NOUN, {Homer Simpson / PERSON / http://en.wikipedia.org/wiki/Homer_Simpson }] NSUBJ\n", | |
"[did, (do), VERB, None] ROOT\n", | |
"[it, (it), PRON, None] DOBJ\n", | |
"[again, (again), ADV, None] ADVMOD\n", | |
"[., (.), PUNCT, None] P\n" | |
] | |
} | |
], | |
"source": [ | |
"document = {\n", | |
" \"text\": \"Homer Simpson stole Ned's air conditioner. Homer did it again.\", \n", | |
" \"link\": \"Some/path/ola\",\n", | |
"}\n", | |
"result = analyze_text(document)\n", | |
"for sentence in result.sentences:\n", | |
" for node in sentence.traverse():\n", | |
" print node, node.father.link_type" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{u'article': {u'type': u'uri',\n", | |
" u'value': u'https://en.wikipedia.org/wiki/Homer%20Goes%20to%20College'},\n", | |
" u'cid': {u'type': u'uri', u'value': u'http://www.wikidata.org/entity/Q94318'}}" | |
] | |
}, | |
"execution_count": 12, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# retrieve list of qids that are episodes of the series, together with their wikipedia link\n", | |
"predicate = EPISODE_SERIES_PREDICATE\n", | |
"supertype = SERIES_QID\n", | |
"\n", | |
"query = \"\"\"\n", | |
"SELECT ?cid ?article WHERE {\n", | |
" ?cid wdt:%s wd:%s .\n", | |
" OPTIONAL {\n", | |
" ?article schema:about ?cid .\n", | |
" ?article schema:inLanguage \"en\" .\n", | |
" FILTER (SUBSTR(str(?article), 1, 25) = \"https://en.wikipedia.org/\")\n", | |
" }\n", | |
"} \n", | |
"\"\"\" % (predicate, supertype)\n", | |
"\n", | |
"headers = {\"Accept\": \"application/sparql-results+json\"}\n", | |
"response = requests.get(\"https://query.wikidata.org/bigdata/namespace/wdq/sparql?query=%s\" % query, headers=headers)\n", | |
"res_obj = response.json()\n", | |
"\n", | |
"res_obj.get(\"results\").get(\"bindings\")[0]" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Retrieve needed data" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### to dump texts of wikipedia xml exports obtained via api GET request, parsed by using the wikiextractor lib USING wikiextractor" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"export_api = \"https://en.wikipedia.org/wiki/Special:Export/\"\n", | |
"FILE_NAME = \"test.page\"\n", | |
"\n", | |
"for result in res_obj[\"results\"][\"bindings\"][:20]:\n", | |
" # pass\n", | |
" try:\n", | |
" print result[\"cid\"][\"value\"], result[\"article\"][\"value\"]\n", | |
" page_name = result[\"article\"][\"value\"].split(\"/\")[-1]\n", | |
" print \" \", export_api + page_name\n", | |
" # print result[\"item\"][\"value\"], result[\"itemLabel\"][\"value\"]\n", | |
"\n", | |
" result_text = requests.get(export_api + result[\"article\"][\"value\"].split(\"/\")[-1]).text\n", | |
" with codecs.open(FILE_NAME, \"w\", encoding=\"utf-8\") as file_out:\n", | |
" file_out.write(result_text)\n", | |
" os.system(\"python wikiextractor/WikiExtractor.py test.page -s -o %s/%s\" % (RESULTS_FOLDER, page_name))\n", | |
" except Exception as e:\n", | |
" print str(e)\n", | |
" sleep(1)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### extra processing step to get a section of the document, here set to retrieve \"plot\" and \"cultural references\" for selected pages" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"PLOT = \"Plot.\"\n", | |
"CULTURAL_REFERENCES = \"Cultural references.\\n\"\n", | |
"\n", | |
"PRODUCTION = \"Production.\"\n", | |
"RECEPTION = \"Reception.\"\n", | |
"CRITICAL_RECEPTION = \"Critical reception.\"\n", | |
"\n", | |
"documents_to_analyze = []\n", | |
"for folder in os.listdir(RESULTS_FOLDER):\n", | |
" for filee in os.listdir(\"%s/%s/AA\" % (RESULTS_FOLDER, folder)):\n", | |
" with codecs.open(\"%s/%s/AA/%s\" % (RESULTS_FOLDER, folder, filee)) as file_in:\n", | |
" total_text = \"\"\n", | |
" \n", | |
" text = file_in.read()\n", | |
" print folder\n", | |
" beginning1 = text.find(PLOT) + len(PLOT)\n", | |
" end1 = min([ndx for ndx in (\n", | |
" text.find(PRODUCTION),\n", | |
" text.find(RECEPTION),\n", | |
" text.find(CRITICAL_RECEPTION),\n", | |
" text.find(CULTURAL_REFERENCES),\n", | |
" text.find(\"</doc>\"),\n", | |
" len(text),\n", | |
" ) if ndx > beginning1])\n", | |
" beginning2 = text.find(CULTURAL_REFERENCES) + len(CULTURAL_REFERENCES)\n", | |
" end2 = min([ndx for ndx in (\n", | |
" text.find(PRODUCTION),\n", | |
" text.find(RECEPTION),\n", | |
" text.find(CRITICAL_RECEPTION),\n", | |
" text.find(\"</doc>\"),\n", | |
" len(text),\n", | |
" ) if ndx > beginning2])\n", | |
" total_text += text[beginning1:end1] + \"\\n\\n\"\n", | |
" if text.find(CULTURAL_REFERENCES) > 0:\n", | |
" print \"----------\"\n", | |
" print text[beginning2:end2]\n", | |
" total_text += text[beginning2:end2] + \"\\n\\n\"\n", | |
" \n", | |
" documents_to_analyze.append({\"text\": total_text, \"link\": \"https://en.wikipedia.org/wiki/%s\" % folder})" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"# price estimate\n", | |
"units = 0\n", | |
"for doc in documents_to_analyze:\n", | |
" units += len(doc[\"text\"]) / 1000\n", | |
"print float(units) / 1000 * 1.5" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"# launch analysis of preprocessed documents\n", | |
"analyzed_documents = [analyze_text(document) for document in documents_to_analyze]\n", | |
"merged = analyzed_documents[0]\n", | |
"for ad in analyzed_documents[1:]:\n", | |
" merged = AnalysisResult.merge(merged, ad)\n", | |
"\n", | |
"write_resource(merged, SAVED_ANALYSES)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"# generate knowledge subgraph for the chosen topic\n", | |
"kgraph = generate_graph_for_father_and_predicate(SERIES_QID, CHARACTER_SERIES_PREDICATE)\n", | |
"write_resource(kgraph, SAVED_GRAPH)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Pre-analyzed quiz generation" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"# reload saved analyses\n", | |
"merged_analyses = read_resource(SAVED_ANALYSES)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"# generate knowledge subgraph for the chosen topic\n", | |
"kgraph = read_resource(SAVED_GRAPH)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"class Quiz(object):\n", | |
" def __init__(self):\n", | |
" self.explanation = None\n", | |
" self.question = None\n", | |
" self.correct_answers = []\n", | |
" self.wrong_answers = []\n", | |
" \n", | |
" def __unicode__(self):\n", | |
" return \"S: %s\\nQ: %s\\nA: %s\\nW: %s\" % (\n", | |
" self.explanation,\n", | |
" self.question,\n", | |
" \", \".join(self.correct_answers),\n", | |
" \", \".join(self.wrong_answers),\n", | |
" )\n", | |
" \n", | |
" def __str__(self):\n", | |
" return unicode(self).encode(\"utf-8\")\n", | |
" \n", | |
"\n", | |
"class PersonSubjectQuestioner(object):\n", | |
" TO_BE_REMOVED_KEYWORDS = {\"also\", \"then\", \"meanwhile\", \"later\", \"however\", \"soon\", \"afterwards\"}\n", | |
" \n", | |
" def __init__(self):\n", | |
" pass\n", | |
" \n", | |
" # TODO extra parameters here should be moved in context or something else\n", | |
" def process(self, analysis, kgraph):\n", | |
" questioner_results = []\n", | |
" for ndx in range(len(analysis.sentences)):\n", | |
" quiz = Quiz()\n", | |
"\n", | |
" sentence_root = analysis.sentences[ndx]\n", | |
"\n", | |
" if sentence_root.pos != \"VERB\":\n", | |
" continue\n", | |
"\n", | |
" # get the subject\n", | |
" subj = None\n", | |
" listt = [node \n", | |
" for node in sentence_root.traverse()\n", | |
" if node.father and (node.father.link_type == \"NSUBJ\" or node.father.link_type == \"NSUBJPASS\")]\n", | |
"\n", | |
" if not listt:\n", | |
" continue\n", | |
" \n", | |
" objs = [node\n", | |
" for node in sentence_root.traverse()\n", | |
" if node.father and (node.father.link_type == \"DOBJ\")]\n", | |
" bad_dobj = False\n", | |
" if objs:\n", | |
" for obj in objs:\n", | |
" if obj.father.node == sentence_root and obj.text.lower() in {\"him\", \"her\", \"them\"}:\n", | |
" bad_dobj = True\n", | |
" if obj.father.node == sentence_root and \"the\" in obj.strings():\n", | |
" bad_dobj = True\n", | |
" if bad_dobj:\n", | |
" continue\n", | |
"\n", | |
" for subjj in listt:\n", | |
" if subjj.father.node == sentence_root:\n", | |
" subj = subjj\n", | |
" if not subj:\n", | |
" continue\n", | |
" if subj.text.lower() in {\"you\", \"we\"}:\n", | |
" continue\n", | |
"\n", | |
" # collect entities in the subject nodes\n", | |
" subject_nodes = subj.traverse()\n", | |
" if any(node.father.link_type not in {\"NSUBJ\", \"NN\", \"NSUBJPASS\"} for node in subject_nodes):\n", | |
" continue\n", | |
" \n", | |
" the_entities = [node.entity for node in subj.traverse() if node.entity]\n", | |
"\n", | |
" if the_entities and the_entities[0]._typee == \"PERSON\" and the_entities[0]._link:\n", | |
"\n", | |
" quiz.explanation = sentence_root.string()\n", | |
"\n", | |
" lemmas_to_be_filtered = list(\n", | |
" filter(lambda nodee: nodee.lemma.lower() in PersonSubjectQuestioner.TO_BE_REMOVED_KEYWORDS,\n", | |
" sentence_root.traverse()))\n", | |
"\n", | |
" # add punctuation just-after-a-lemma-to-be-removed to list of stuff to be removed\n", | |
" # e.g. \"He is clever, too, and very smart.\" -> I want to remove \"too,\"\n", | |
" lemmas_to_be_filtered += list(\n", | |
" map(\n", | |
" lambda nodee: nodee[1],\n", | |
" filter(\n", | |
" lambda nodee: nodee[0] in lemmas_to_be_filtered and nodee[1].pos == \"PUNCT\",\n", | |
" zip(sentence_root.traverse()[:-1], sentence_root.traverse()[1:])\n", | |
" )\n", | |
" )\n", | |
" )\n", | |
"\n", | |
" parataxis_and_conj_roots = [\n", | |
" node \n", | |
" for root in sentence_root.traverse()\n", | |
" for node in root.traverse()\n", | |
" if (\n", | |
" root.father.link_type == \"PARATAXIS\" \n", | |
" or root.father.link_type == \"CONJ\") \n", | |
" and root.pos == \"VERB\" and \"NSUBJ\" in set(\n", | |
" [link.link_type for link in root.left_children]\n", | |
" )\n", | |
" ]\n", | |
"\n", | |
" post_who_string = sentence_root.string(\n", | |
" block_nodes=set(subject_nodes + lemmas_to_be_filtered + parataxis_and_conj_roots),\n", | |
" )\n", | |
"\n", | |
" # generate the question and clean it\n", | |
" if any(link.link_type not in {\"NSUBJ\", \"NSUBJPASS\"} for link in sentence_root.left_children):\n", | |
" qtext = \" \" + \"who, \" + \" \" + post_who_string[0].lower() + post_who_string[1:]\n", | |
" else:\n", | |
" qtext = \" \" + \"who\" + \" \" + post_who_string\n", | |
"\n", | |
" if qtext[-1] in {\".\", \",\", \";\"}:\n", | |
" qtext = qtext[:-1].strip()\n", | |
" while qtext[-1:] in {\".\", \",\", \";\"}:\n", | |
" qtext = qtext[:-1].strip() # assuming space-separated tokens\n", | |
" for particle in [\" and\", \" so\", \" but\"]:\n", | |
" if qtext.endswith(particle):\n", | |
" qtext = qtext[:-len(particle)].strip()\n", | |
" while qtext[-1:] in {\".\", \",\", \";\"}:\n", | |
" qtext = qtext[:-1].strip() # assuming space-separated tokens\n", | |
" \n", | |
" episode_context = 'In the episode \"%s\",' % urllib.unquote(analysis.references[ndx].split(\"/\")[-1])#.replace(\"%20\", \" \")\n", | |
" qtext = episode_context + \" \" + qtext + \"?\"\n", | |
" quiz.question = qtext\n", | |
"\n", | |
" # generate the correct answer\n", | |
" correct_answer_link = the_entities[0]._link\n", | |
" correct_answer = correct_answer_link.split(\"/\")[-1].replace(\"_\", \" \")\n", | |
" quiz.correct_answers.append(correct_answer)\n", | |
"\n", | |
" # start checks over wikidata identity of the correct answer to generate wrong answers\n", | |
" qid = neighbor_for_type_for_node(kgraph, \"qid\", correct_answer_link)\n", | |
" if qid and (\"gender_female\" in kgraph[qid] or \"gender_male\" in kgraph[qid]):\n", | |
" if \"gender_female\" in kgraph[qid]:\n", | |
" gender = \"gender_female\"\n", | |
" else:\n", | |
" gender = \"gender_male\"\n", | |
" \n", | |
" # generate wrong answers (distractors)\n", | |
" predicate = CHARACTER_SERIES_PREDICATE\n", | |
"\n", | |
" type_qid = neighbor_for_type_for_node(kgraph, predicate, qid)\n", | |
"\n", | |
" other_qid_same_type = neighbors_for_type_for_node(\n", | |
" kgraph, \"-\" + predicate, type_qid,\n", | |
" )\n", | |
" shuffle(other_qid_same_type)\n", | |
" other_qid_same_type = filter(lambda x : gender in kgraph[x], other_qid_same_type)\n", | |
" \n", | |
" random_sample = []\n", | |
" for other_qid in other_qid_same_type:\n", | |
" wrong_name = neighbor_for_type_for_node(kgraph, \"name\", other_qid).split(\"/\")[-1].replace(\"_\", \" \")\n", | |
" if \"list of\" in wrong_name.lower():\n", | |
" continue\n", | |
" if \"the simpsons\" in wrong_name.lower():\n", | |
" continue\n", | |
" if \"database\" in wrong_name.lower():\n", | |
" continue\n", | |
" random_sample.append(wrong_name)\n", | |
" for wrong in random_sample[:3]:\n", | |
" if wrong == correct_answer:\n", | |
" quiz.wrong_answers.append(random_sample[-1])\n", | |
" else:\n", | |
" quiz.wrong_answers.append(wrong)\n", | |
"\n", | |
" if len(quiz.wrong_answers) < 3:\n", | |
" continue\n", | |
"\n", | |
" questioner_results.append(quiz)\n", | |
"\n", | |
" return questioner_results" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\n", | |
"S: Marge throws a dinner party to escape from the doldrums at the Simpson house , so she decides to invite the Flanders , the Lovejoys , the Hibberts and the Van Houtens .\n", | |
"Q: In the episode \"A Milhouse Divided\", who throws a dinner party to escape from the doldrums at the Simpson house?\n", | |
"A: Marge Simpson\n", | |
"W: Maggie Simpson, Lisa Simpson, Edna Krabappel\n", | |
"\n", | |
"S: Homer tries to perform selfless gestures for Marge , such as making soothing ocean noises to lull her to sleep and cutting her hair , but they only serve to annoy her .\n", | |
"Q: In the episode \"A Milhouse Divided\", who tries to perform selfless gestures for Marge , such as making soothing ocean noises to lull her to sleep and cutting her hair?\n", | |
"A: Homer Simpson\n", | |
"W: Barney Gumble, Grampa Simpson, Lionel Hutz\n", | |
"\n", | |
"S: Deciding at that point that Marge deserves a fresh start , Homer secretly files for a divorce .\n", | |
"Q: In the episode \"A Milhouse Divided\", who, deciding at that point that Marge deserves a fresh start , secretly files for a divorce?\n", | |
"A: Homer Simpson\n", | |
"W: Mayor Quimby, Groundskeeper Willie, Barney Gumble\n", | |
"\n", | |
"S: Lisa starts to video her family doing their everyday activities .\n", | |
"Q: In the episode \"Any Given Sundance\", who starts to video her family doing their everyday activities?\n", | |
"A: Lisa Simpson\n", | |
"W: Marge Simpson, Edna Krabappel, Maggie Simpson\n", | |
"\n", | |
"S: Lisa shows her family in all of its dysfunction .\n", | |
"Q: In the episode \"Any Given Sundance\", who shows her family in all of its dysfunction?\n", | |
"A: Lisa Simpson\n", | |
"W: Maggie Simpson, Edna Krabappel, Marge Simpson\n", | |
"\n", | |
"S: Marge cleans up after them and Lisa wishes her a \" Happy Birthday , \" embarrassing the entire family .\n", | |
"Q: In the episode \"Any Given Sundance\", who cleans up after them?\n", | |
"A: Marge Simpson\n", | |
"W: Edna Krabappel, Maggie Simpson, Lisa Simpson\n", | |
"\n", | |
"S: Lisa feels sorry for what she did to the family , and while deep in thought , Jim Jarmusch approaches her and says he can relate because his movies are also about \" social misfits experiencing the dark side of the American dream \" .\n", | |
"Q: In the episode \"Any Given Sundance\", who feels sorry for what she did to the family?\n", | |
"A: Lisa Simpson\n", | |
"W: Marge Simpson, Maggie Simpson, Edna Krabappel\n", | |
"\n", | |
"S: Lisa however , feels that she may have , deep inside , humiliated her family on purpose .\n", | |
"Q: In the episode \"Any Given Sundance\", who, feels that she may have , deep inside , humiliated her family on purpose?\n", | |
"A: Lisa Simpson\n", | |
"W: Edna Krabappel, Marge Simpson, Maggie Simpson\n", | |
"\n", | |
"S: When his film ends , Lisa learns that although her family may embarrass her , humiliate her , or infuriate her , there are other families with tougher problems .\n", | |
"Q: In the episode \"Any Given Sundance\", who, when his film ends , learns that although her family may embarrass her , humiliate her , or infuriate her , there are other families with tougher problems?\n", | |
"A: Lisa Simpson\n", | |
"W: Edna Krabappel, Marge Simpson, Maggie Simpson\n", | |
"\n", | |
"S: Marge delivers a heart - felt plea to the residents to let the Simpsons live in the one place they call home , but they refuse , with Quimby declaring her the \" worst Simpson \" for always trying to see the family in a positive light .\n", | |
"Q: In the episode \"At Long Last Leave\", who delivers a heart - felt plea to the residents to let the Simpsons live in the one place they call home?\n", | |
"A: Marge Simpson\n", | |
"W: Maggie Simpson, Lisa Simpson, Edna Krabappel\n" | |
] | |
} | |
], | |
"source": [ | |
"questioner = PersonSubjectQuestioner()\n", | |
"quizzes = questioner.process(merged_analyses, kgraph)\n", | |
"for quiz in quizzes[:10]:\n", | |
" print\n", | |
" print quiz" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"collapsed": true | |
}, | |
"source": [ | |
"# Lambda code for Alexa" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"\"\"\"\n", | |
"Simple Alexa Skill to demonstrate quizzing.\n", | |
"\"\"\"\n", | |
"\n", | |
"from __future__ import print_function\n", | |
"import random\n", | |
"\n", | |
"\n", | |
"quizzes = [\n", | |
" {\n", | |
" \"question\": \"In which city do the Simpsons live?\",\n", | |
" \"correct_answer\": \"Springfield\",\n", | |
" \"wrong_answers\": [\"Chicago\", \"New York\", \"Florence\"]\n", | |
" },\n", | |
" {\n", | |
" \"question\": \"In the episode \\\"Bart the Fink\\\", who kills off his pseudonym in a boating accident in order to collect the life insurance, thus ending his tax woes?\",\n", | |
" \"correct_answer\": \"Krusty the Clown\",\n", | |
" \"wrong_answers\": [\"Reverend Lovejoy\", \"Kang and Kodos\", \"Lionel Hutz\"]\n", | |
" },\n", | |
" {\n", | |
" \"question\": \"In the episode \\\"Brush with Greatness\\\", who, once home, announces to his family that he will go on a diet and exercise more?\",\n", | |
" \"correct_answer\": \"Homer Simpson\",\n", | |
" \"wrong_answers\": [\"Chief Wiggum\", \"Martin Prince\", \"Principal Skinner\"]\n", | |
" }\n", | |
"]\n", | |
"\n", | |
"opening_sentences = [\n", | |
" \"Sure, I have one for you!\",\n", | |
" \"Here you are one!\",\n", | |
" \"I've just found a new one! Here it is ...\"\n", | |
"]\n", | |
"\n", | |
"def build_random_question_sentence():\n", | |
" quiz = random.choice(quizzes)\n", | |
" total_answers = [quiz[\"correct_answer\"]] + quiz[\"wrong_answers\"]\n", | |
" random.shuffle(total_answers)\n", | |
" \n", | |
" answers_sentence = \"\"\n", | |
" correct_number = None\n", | |
" correct_answer = quiz[\"correct_answer\"]\n", | |
" for index, ans in enumerate(total_answers):\n", | |
" answers_sentence += \"%s %s %s \" % (str(index + 1), ans, \"...\")\n", | |
" if ans == correct_answer:\n", | |
" correct_number = str(index + 1)\n", | |
" \n", | |
" return (\n", | |
" \"%s %s %s\" % (\n", | |
" random.choice(opening_sentences),\n", | |
" quiz[\"question\"] + \" ... \",\n", | |
" answers_sentence,\n", | |
" ),\n", | |
" correct_number,\n", | |
" correct_answer,\n", | |
" )\n", | |
" \n", | |
"\n", | |
"\n", | |
"# speech response builder\n", | |
"def build_speechlet_response(title, output, reprompt_text, should_end_session):\n", | |
" return {\n", | |
" 'outputSpeech': {\n", | |
" 'type': 'PlainText',\n", | |
" 'text': output\n", | |
" },\n", | |
" 'card': {\n", | |
" 'type': 'Simple',\n", | |
" 'title': \"SessionSpeechlet - \" + title,\n", | |
" 'content': \"SessionSpeechlet - \" + output\n", | |
" },\n", | |
" 'reprompt': {\n", | |
" 'outputSpeech': {\n", | |
" 'type': 'PlainText',\n", | |
" 'text': reprompt_text\n", | |
" }\n", | |
" },\n", | |
" 'shouldEndSession': should_end_session\n", | |
" }\n", | |
"\n", | |
"\n", | |
"def build_response(session_attributes, speechlet_response):\n", | |
" return {\n", | |
" 'version': '1.0',\n", | |
" 'sessionAttributes': session_attributes,\n", | |
" 'response': speechlet_response\n", | |
" }\n", | |
"\n", | |
"\n", | |
"# --------------- Functions that control the skill's behavior ------------------\n", | |
"\n", | |
"def get_question():\n", | |
" \"\"\" Immediately ask a question, populating session attributes so that a\n", | |
" specific correct answer is expected.\n", | |
" \"\"\"\n", | |
"\n", | |
" card_title = \"Question\"\n", | |
" speech_output, correct_number, correct_answer = build_random_question_sentence()\n", | |
" # If the user either does not reply to the welcome message or says something\n", | |
" # that is not understood, they will be prompted again with this text.\n", | |
" reprompt_text = \"You can answer the question by saying 'number two'.\"\n", | |
" should_end_session = False\n", | |
" session_attributes = {\"correct_number\": correct_number, \"correct_answer\": correct_answer}\n", | |
" return build_response(\n", | |
" session_attributes, \n", | |
" build_speechlet_response(\n", | |
" card_title, \n", | |
" speech_output, \n", | |
" reprompt_text, \n", | |
" should_end_session\n", | |
" ),\n", | |
" )\n", | |
"\n", | |
"\n", | |
"def handle_session_end_request():\n", | |
" card_title = \"Session Ended\"\n", | |
" speech_output = \"Thank you for playing!\"\n", | |
" should_end_session = True\n", | |
" return build_response({}, build_speechlet_response(\n", | |
" card_title, \n", | |
" speech_output, \n", | |
" None, \n", | |
" should_end_session\n", | |
" ))\n", | |
"\n", | |
"\n", | |
"# --------------- Events ------------------\n", | |
"\n", | |
"def on_session_started(session_started_request, session):\n", | |
" \"\"\" Called when the session starts \"\"\"\n", | |
"\n", | |
" print(\"on_session_started requestId=\" + session_started_request['requestId']\n", | |
" + \", sessionId=\" + session['sessionId'])\n", | |
"\n", | |
"\n", | |
"def on_launch(launch_request, session):\n", | |
"\n", | |
" print(\"on_launch requestId=\" + launch_request['requestId'] +\n", | |
" \", sessionId=\" + session['sessionId'])\n", | |
" return get_question()\n", | |
"\n", | |
"\n", | |
"def check_answer_and_conclude(intent, session):\n", | |
" session_attributes = {}\n", | |
" card_title = \"Answer\"\n", | |
" if session.get('attributes', {}) and \"correct_number\" in session.get('attributes', {}) and \"correct_answer\" in session.get('attributes', {}) and 'AnswerNumber' in intent['slots']:\n", | |
" answer_number = intent['slots']['AnswerNumber']['value']\n", | |
" correct_number = session['attributes']['correct_number']\n", | |
" correct_answer = session['attributes']['correct_answer']\n", | |
" if answer_number == correct_number:\n", | |
" speech_output = \"You're right! Great job!\"\n", | |
" else:\n", | |
" speech_output = \"I'm sorry. The correct answer was \" + correct_answer +\".\" \n", | |
" return build_response(session_attributes, build_speechlet_response(\n", | |
" card_title, speech_output, None, True))\n", | |
" else:\n", | |
" speech_output = \"I'm not sure what your answer was. \"\n", | |
" should_end_session = False\n", | |
" return build_response(session_attributes, build_speechlet_response(\n", | |
" card_title, speech_output, None, True))\n", | |
"\n", | |
"\n", | |
"def on_intent(intent_request, session):\n", | |
" \"\"\" called when an intent has been detected \"\"\"\n", | |
"\n", | |
" print(\"on_intent requestId=\" + intent_request['requestId'] +\n", | |
" \", sessionId=\" + session['sessionId'])\n", | |
"\n", | |
" intent = intent_request['intent']\n", | |
" intent_name = intent_request['intent']['name']\n", | |
"\n", | |
" # dispatch based on the intent\n", | |
" if intent_name == \"Answer\":\n", | |
" return check_answer_and_conclude(intent, session)\n", | |
" elif intent_name == \"AMAZON.HelpIntent\":\n", | |
" return get_question()\n", | |
" elif intent_name == \"AMAZON.CancelIntent\" or intent_name == \"AMAZON.StopIntent\":\n", | |
" return handle_session_end_request()\n", | |
" else:\n", | |
" raise ValueError(\"Invalid intent\")\n", | |
"\n", | |
"\n", | |
"def on_session_ended(session_ended_request, session):\n", | |
" \"\"\" called when the user ends the session.\n", | |
" not called when the skill returns should_end_session=true\n", | |
" \"\"\"\n", | |
" print(\"on_session_ended requestId=\" + session_ended_request['requestId'] +\n", | |
" \", sessionId=\" + session['sessionId'])\n", | |
"\n", | |
"\n", | |
"# --------------- Main handler ------------------\n", | |
"\n", | |
"def lambda_handler(event, context):\n", | |
" \"\"\" Route the incoming request based on type (LaunchRequest, IntentRequest,\n", | |
" etc.) The JSON body of the request is provided in the event parameter.\n", | |
" \"\"\"\n", | |
" print(\"event.session.application.applicationId=\" +\n", | |
" event['session']['application']['applicationId'])\n", | |
"\n", | |
" \"\"\"\n", | |
" Uncomment this if statement and populate with your skill's application ID to\n", | |
" prevent someone else from configuring a skill that sends requests to this\n", | |
" function.\n", | |
" \"\"\"\n", | |
" # if (event['session']['application']['applicationId'] !=\n", | |
" # \"amzn1.echo-sdk-ams.app.[unique-value-here]\"):\n", | |
" # raise ValueError(\"Invalid Application ID\")\n", | |
"\n", | |
" if event['session']['new']:\n", | |
" on_session_started({'requestId': event['request']['requestId']},\n", | |
" event['session'])\n", | |
"\n", | |
" if event['request']['type'] == \"LaunchRequest\":\n", | |
" return on_launch(event['request'], event['session'])\n", | |
" elif event['request']['type'] == \"IntentRequest\":\n", | |
" return on_intent(event['request'], event['session'])\n", | |
" elif event['request']['type'] == \"SessionEndedRequest\":\n", | |
" return on_session_ended(event['request'], event['session'])\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.10" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
hi @cknoll, thanks for your interest! Feel free to include it. However, there's currently no public follow-up work on this.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This looks really interesting. I want to include that in https://github.com/cknoll/semantic-python-overview. Is there any follow-up project for this? Or is there more information available?