Package rivescript :: Module rivescript
[hide private]
[frames] | no frames]

Source Code for Module rivescript.rivescript

   1  #!/usr/bin/env python 
   2   
   3  # The MIT License (MIT) 
   4  # 
   5  # Copyright (c) 2016 Noah Petherbridge 
   6  # 
   7  # Permission is hereby granted, free of charge, to any person obtaining a copy 
   8  # of this software and associated documentation files (the "Software"), to deal 
   9  # in the Software without restriction, including without limitation the rights 
  10  # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 
  11  # copies of the Software, and to permit persons to whom the Software is 
  12  # furnished to do so, subject to the following conditions: 
  13  # 
  14  # The above copyright notice and this permission notice shall be included in all 
  15  # copies or substantial portions of the Software. 
  16  # 
  17  # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
  18  # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
  19  # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
  20  # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
  21  # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
  22  # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 
  23  # SOFTWARE. 
  24   
  25  from __future__ import unicode_literals 
  26  from six import text_type 
  27  import sys 
  28  import os 
  29  import re 
  30  import string 
  31  import random 
  32  import pprint 
  33  import copy 
  34  import codecs 
  35   
  36  from . import __version__ 
  37  from . import python 
38 39 # Common regular expressions. 40 -class RE(object):
41 equals = re.compile('\s*=\s*') 42 ws = re.compile('\s+') 43 objend = re.compile('^\s*<\s*object') 44 weight = re.compile('\{weight=(\d+)\}') 45 inherit = re.compile('\{inherits=(\d+)\}') 46 wilds = re.compile('[\s\*\#\_]+') 47 nasties = re.compile('[^A-Za-z0-9 ]') 48 crlf = re.compile('<crlf>') 49 literal_w = re.compile(r'\\w') 50 array = re.compile(r'\@(.+?)\b') 51 def_syntax = re.compile(r'^.+(?:\s+.+|)\s*=\s*.+?$') 52 name_syntax = re.compile(r'[^a-z0-9_\-\s]') 53 utf8_trig = re.compile(r'[A-Z\\.]') 54 trig_syntax = re.compile(r'[^a-z0-9(\|)\[\]*_#@{}<>=\s]') 55 cond_syntax = re.compile(r'^.+?\s*(?:==|eq|!=|ne|<>|<|<=|>|>=)\s*.+?=>.+?$') 56 utf8_meta = re.compile(r'[\\<>]') 57 utf8_punct = re.compile(r'[.?,!;:@#$%^&*()]') 58 cond_split = re.compile(r'\s*=>\s*') 59 cond_parse = re.compile(r'^(.+?)\s+(==|eq|!=|ne|<>|<|<=|>|>=)\s+(.+?)$') 60 topic_tag = re.compile(r'\{topic=(.+?)\}') 61 set_tag = re.compile(r'<set (.+?)=(.+?)>') 62 bot_tag = re.compile(r'<bot (.+?)>') 63 get_tag = re.compile(r'<get (.+?)>') 64 star_tags = re.compile(r'<star(\d+)>') 65 botstars = re.compile(r'<botstar(\d+)>') 66 input_tags = re.compile(r'<input([1-9])>') 67 reply_tags = re.compile(r'<reply([1-9])>') 68 random_tags = re.compile(r'\{random\}(.+?)\{/random\}') 69 redir_tag = re.compile(r'\{@(.+?)\}') 70 tag_search = re.compile(r'<([^<]+?)>') 71 placeholder = re.compile(r'\x00(\d+)\x00') 72 zero_star = re.compile(r'^\*$') 73 optionals = re.compile(r'\[(.+?)\]')
74 75 # Version of RiveScript we support. 76 rs_version = 2.0 77 78 # Exportable constants. 79 RS_ERR_MATCH = "[ERR: No reply matched]" 80 RS_ERR_REPLY = "[ERR: No reply found]" 81 RS_ERR_DEEP_RECURSION = "[ERR: Deep recursion detected]" 82 RS_ERR_OBJECT = "[ERR: Error when executing Python object]" 83 RS_ERR_OBJECT_HANDLER = "[ERR: No Object Handler]" 84 RS_ERR_OBJECT_MISSING = "[ERR: Object Not Found]"
85 86 87 -class RiveScript(object):
88 """A RiveScript interpreter for Python 2 and 3.""" 89 90 # Concatenation mode characters. 91 _concat_modes = dict( 92 none="", 93 space=" ", 94 newline="\n", 95 ) 96 97 ############################################################################ 98 # Initialization and Utility Methods # 99 ############################################################################ 100
101 - def __init__(self, debug=False, strict=True, depth=50, log="", utf8=False):
102 """Initialize a new RiveScript interpreter. 103 104 bool debug: Specify a debug mode. 105 bool strict: Strict mode (RS syntax errors are fatal) 106 str log: Specify a log file for debug output to go to (instead of STDOUT). 107 int depth: Specify the recursion depth limit. 108 bool utf8: Enable UTF-8 support.""" 109 110 ### 111 # User configurable fields. 112 ### 113 114 # Debugging 115 self._debug = debug # Debug mode 116 self._log = log # Debug log file 117 118 # Unicode stuff 119 self._utf8 = utf8 # UTF-8 mode 120 self.unicode_punctuation = re.compile(r'[.,!?;:]') 121 122 # Misc. 123 self._strict = strict # Strict mode 124 self._depth = depth # Recursion depth limit 125 126 ### 127 # Internal fields. 128 ### 129 self._gvars = {} # 'global' variables 130 self._bvars = {} # 'bot' variables 131 self._subs = {} # 'sub' variables 132 self._person = {} # 'person' variables 133 self._arrays = {} # 'array' variables 134 self._users = {} # 'user' variables 135 self._freeze = {} # frozen 'user' variables 136 self._includes = {} # included topics 137 self._lineage = {} # inherited topics 138 self._handlers = {} # Object handlers 139 self._objlangs = {} # Languages of objects used 140 self._topics = {} # Main reply structure 141 self._thats = {} # %Previous reply structure 142 self._sorted = {} # Sorted buffers 143 self._syntax = {} # Syntax tracking (filenames & line no.'s) 144 self._regexc = { # Precomputed regexes for speed optimizations. 145 "trigger": {}, 146 "subs": {}, 147 "person": {}, 148 } 149 150 # "Current request" variables. 151 self._current_user = None # The current user ID. 152 153 # Define the default Python language handler. 154 self._handlers["python"] = python.PyRiveObjects() 155 156 self._say("Interpreter initialized.")
157 158 @classmethod
159 - def VERSION(self=None):
160 """Return the version number of the RiveScript library. 161 162 This may be called as either a class method or a method of a RiveScript object.""" 163 return __version__
164
165 - def _say(self, message):
166 if self._debug: 167 print("[RS] {}".format(message)) 168 if self._log: 169 # Log it to the file. 170 fh = open(self._log, 'a') 171 fh.write("[RS] " + message + "\n") 172 fh.close()
173
174 - def _warn(self, message, fname='', lineno=0):
175 header = "[RS]" 176 if self._debug: 177 header = "[RS::Warning]" 178 if len(fname) and lineno > 0: 179 print(header, message, "at", fname, "line", lineno) 180 else: 181 print(header, message)
182 183 ############################################################################ 184 # Loading and Parsing Methods # 185 ############################################################################ 186
187 - def load_directory(self, directory, ext=None):
188 """Load RiveScript documents from a directory. 189 190 Provide `ext` as a list of extensions to search for. The default list 191 is `.rive`, `.rs`""" 192 self._say("Loading from directory: " + directory) 193 194 if ext is None: 195 # Use the default extensions - .rive is preferable. 196 ext = ['.rive', '.rs'] 197 elif type(ext) == str: 198 # Backwards compatibility for ext being a string value. 199 ext = [ext] 200 201 if not os.path.isdir(directory): 202 self._warn("Error: " + directory + " is not a directory.") 203 return 204 205 for item in os.listdir(directory): 206 for extension in ext: 207 if item.lower().endswith(extension): 208 # Load this file. 209 self.load_file(os.path.join(directory, item)) 210 break
211
212 - def load_file(self, filename):
213 """Load and parse a RiveScript document.""" 214 self._say("Loading file: " + filename) 215 216 fh = codecs.open(filename, 'r', 'utf-8') 217 lines = fh.readlines() 218 fh.close() 219 220 self._say("Parsing " + str(len(lines)) + " lines of code from " + filename) 221 self._parse(filename, lines)
222
223 - def stream(self, code):
224 """Stream in RiveScript source code dynamically. 225 226 `code` can either be a string containing RiveScript code or an array 227 of lines of RiveScript code.""" 228 self._say("Streaming code.") 229 if type(code) in [str, text_type]: 230 code = code.split("\n") 231 self._parse("stream()", code)
232
233 - def _parse(self, fname, code):
234 """Parse RiveScript code into memory.""" 235 self._say("Parsing code") 236 237 # Track temporary variables. 238 topic = 'random' # Default topic=random 239 lineno = 0 # Line numbers for syntax tracking 240 comment = False # In a multi-line comment 241 inobj = False # In an object 242 objname = '' # The name of the object we're in 243 objlang = '' # The programming language of the object 244 objbuf = [] # Object contents buffer 245 ontrig = '' # The current trigger 246 repcnt = 0 # Reply counter 247 concnt = 0 # Condition counter 248 isThat = '' # Is a %Previous trigger 249 250 # Local (file scoped) parser options. 251 local_options = dict( 252 concat="none", # Concat mode for ^Continue command 253 ) 254 255 # Read each line. 256 for lp, line in enumerate(code): 257 lineno += 1 258 259 self._say("Line: " + line + " (topic: " + topic + ") incomment: " + str(inobj)) 260 if len(line.strip()) == 0: # Skip blank lines 261 continue 262 263 # In an object? 264 if inobj: 265 if re.match(RE.objend, line): 266 # End the object. 267 if len(objname): 268 # Call the object's handler. 269 if objlang in self._handlers: 270 self._objlangs[objname] = objlang 271 self._handlers[objlang].load(objname, objbuf) 272 else: 273 self._warn("Object creation failed: no handler for " + objlang, fname, lineno) 274 objname = '' 275 objlang = '' 276 objbuf = [] 277 inobj = False 278 else: 279 objbuf.append(line) 280 continue 281 282 line = line.strip() # Trim excess space. We do it down here so we 283 # don't mess up python objects! 284 285 # Look for comments. 286 if line[:2] == '//': # A single-line comment. 287 continue 288 elif line[0] == '#': 289 self._warn("Using the # symbol for comments is deprecated", fname, lineno) 290 elif line[:2] == '/*': # Start of a multi-line comment. 291 if '*/' not in line: # Cancel if the end is here too. 292 comment = True 293 continue 294 elif '*/' in line: 295 comment = False 296 continue 297 if comment: 298 continue 299 300 # Separate the command from the data. 301 if len(line) < 2: 302 self._warn("Weird single-character line '" + line + "' found.", fname, lineno) 303 continue 304 cmd = line[0] 305 line = line[1:].strip() 306 307 # Ignore inline comments if there's a space before and after 308 # the // symbols. 309 if " // " in line: 310 line = line.split(" // ")[0].strip() 311 312 # Run a syntax check on this line. 313 syntax_error = self.check_syntax(cmd, line) 314 if syntax_error: 315 # There was a syntax error! Are we enforcing strict mode? 316 syntax_error = "Syntax error in " + fname + " line " + str(lineno) + ": " \ 317 + syntax_error + " (near: " + cmd + " " + line + ")" 318 if self._strict: 319 raise Exception(syntax_error) 320 else: 321 self._warn(syntax_error) 322 return # Don't try to continue 323 324 # Reset the %Previous state if this is a new +Trigger. 325 if cmd == '+': 326 isThat = '' 327 328 # Do a lookahead for ^Continue and %Previous commands. 329 for i in range(lp + 1, len(code)): 330 lookahead = code[i].strip() 331 if len(lookahead) < 2: 332 continue 333 lookCmd = lookahead[0] 334 lookahead = lookahead[1:].strip() 335 336 # Only continue if the lookahead line has any data. 337 if len(lookahead) != 0: 338 # The lookahead command has to be either a % or a ^. 339 if lookCmd != '^' and lookCmd != '%': 340 break 341 342 # If the current command is a +, see if the following is 343 # a %. 344 if cmd == '+': 345 if lookCmd == '%': 346 isThat = lookahead 347 break 348 else: 349 isThat = '' 350 351 # If the current command is a ! and the next command(s) are 352 # ^, we'll tack each extension on as a line break (which is 353 # useful information for arrays). 354 if cmd == '!': 355 if lookCmd == '^': 356 line += "<crlf>" + lookahead 357 continue 358 359 # If the current command is not a ^ and the line after is 360 # not a %, but the line after IS a ^, then tack it on to the 361 # end of the current line. 362 if cmd != '^' and lookCmd != '%': 363 if lookCmd == '^': 364 line += self._concat_modes.get( 365 local_options["concat"], "" 366 ) + lookahead 367 else: 368 break 369 370 self._say("Command: " + cmd + "; line: " + line) 371 372 # Handle the types of RiveScript commands. 373 if cmd == '!': 374 # ! DEFINE 375 halves = re.split(RE.equals, line, 2) 376 left = re.split(RE.ws, halves[0].strip(), 2) 377 value, type, var = '', '', '' 378 if len(halves) == 2: 379 value = halves[1].strip() 380 if len(left) >= 1: 381 type = left[0].strip() 382 if len(left) >= 2: 383 var = ' '.join(left[1:]).strip() 384 385 # Remove 'fake' line breaks unless this is an array. 386 if type != 'array': 387 value = re.sub(RE.crlf, '', value) 388 389 # Handle version numbers. 390 if type == 'version': 391 # Verify we support it. 392 try: 393 if float(value) > rs_version: 394 self._warn("Unsupported RiveScript version. We only support " + rs_version, fname, lineno) 395 return 396 except: 397 self._warn("Error parsing RiveScript version number: not a number", fname, lineno) 398 continue 399 400 # All other types of defines require a variable and value name. 401 if len(var) == 0: 402 self._warn("Undefined variable name", fname, lineno) 403 continue 404 elif len(value) == 0: 405 self._warn("Undefined variable value", fname, lineno) 406 continue 407 408 # Handle the rest of the types. 409 if type == 'local': 410 # Local file-scoped parser options. 411 self._say("\tSet parser option " + var + " = " + value) 412 local_options[var] = value 413 elif type == 'global': 414 # 'Global' variables 415 self._say("\tSet global " + var + " = " + value) 416 417 if value == '<undef>': 418 try: 419 del(self._gvars[var]) 420 except: 421 self._warn("Failed to delete missing global variable", fname, lineno) 422 else: 423 self._gvars[var] = value 424 425 # Handle flipping debug and depth vars. 426 if var == 'debug': 427 if value.lower() == 'true': 428 value = True 429 else: 430 value = False 431 self._debug = value 432 elif var == 'depth': 433 try: 434 self._depth = int(value) 435 except: 436 self._warn("Failed to set 'depth' because the value isn't a number!", fname, lineno) 437 elif var == 'strict': 438 if value.lower() == 'true': 439 self._strict = True 440 else: 441 self._strict = False 442 elif type == 'var': 443 # Bot variables 444 self._say("\tSet bot variable " + var + " = " + value) 445 446 if value == '<undef>': 447 try: 448 del(self._bvars[var]) 449 except: 450 self._warn("Failed to delete missing bot variable", fname, lineno) 451 else: 452 self._bvars[var] = value 453 elif type == 'array': 454 # Arrays 455 self._say("\tArray " + var + " = " + value) 456 457 if value == '<undef>': 458 try: 459 del(self._arrays[var]) 460 except: 461 self._warn("Failed to delete missing array", fname, lineno) 462 continue 463 464 # Did this have multiple parts? 465 parts = value.split("<crlf>") 466 467 # Process each line of array data. 468 fields = [] 469 for val in parts: 470 if '|' in val: 471 fields.extend(val.split('|')) 472 else: 473 fields.extend(re.split(RE.ws, val)) 474 475 # Convert any remaining '\s' escape codes into spaces. 476 for f in fields: 477 f = f.replace('\s', ' ') 478 479 self._arrays[var] = fields 480 elif type == 'sub': 481 # Substitutions 482 self._say("\tSubstitution " + var + " => " + value) 483 484 if value == '<undef>': 485 try: 486 del(self._subs[var]) 487 except: 488 self._warn("Failed to delete missing substitution", fname, lineno) 489 else: 490 self._subs[var] = value 491 492 # Precompile the regexp. 493 self._precompile_substitution("subs", var) 494 elif type == 'person': 495 # Person Substitutions 496 self._say("\tPerson Substitution " + var + " => " + value) 497 498 if value == '<undef>': 499 try: 500 del(self._person[var]) 501 except: 502 self._warn("Failed to delete missing person substitution", fname, lineno) 503 else: 504 self._person[var] = value 505 506 # Precompile the regexp. 507 self._precompile_substitution("person", var) 508 else: 509 self._warn("Unknown definition type '" + type + "'", fname, lineno) 510 elif cmd == '>': 511 # > LABEL 512 temp = re.split(RE.ws, line) 513 type = temp[0] 514 name = '' 515 fields = [] 516 if len(temp) >= 2: 517 name = temp[1] 518 if len(temp) >= 3: 519 fields = temp[2:] 520 521 # Handle the label types. 522 if type == 'begin': 523 # The BEGIN block. 524 self._say("\tFound the BEGIN block.") 525 type = 'topic' 526 name = '__begin__' 527 if type == 'topic': 528 # Starting a new topic. 529 self._say("\tSet topic to " + name) 530 ontrig = '' 531 topic = name 532 533 # Does this topic include or inherit another one? 534 mode = '' # or 'inherits' or 'includes' 535 if len(fields) >= 2: 536 for field in fields: 537 if field == 'includes': 538 mode = 'includes' 539 elif field == 'inherits': 540 mode = 'inherits' 541 elif mode != '': 542 # This topic is either inherited or included. 543 if mode == 'includes': 544 if name not in self._includes: 545 self._includes[name] = {} 546 self._includes[name][field] = 1 547 else: 548 if name not in self._lineage: 549 self._lineage[name] = {} 550 self._lineage[name][field] = 1 551 elif type == 'object': 552 # If a field was provided, it should be the programming 553 # language. 554 lang = None 555 if len(fields) > 0: 556 lang = fields[0].lower() 557 558 # Only try to parse a language we support. 559 ontrig = '' 560 if lang is None: 561 self._warn("Trying to parse unknown programming language", fname, lineno) 562 lang = 'python' # Assume it's Python. 563 564 # See if we have a defined handler for this language. 565 if lang in self._handlers: 566 # We have a handler, so start loading the code. 567 objname = name 568 objlang = lang 569 objbuf = [] 570 inobj = True 571 else: 572 # We don't have a handler, just ignore it. 573 objname = '' 574 objlang = '' 575 objbuf = [] 576 inobj = True 577 else: 578 self._warn("Unknown label type '" + type + "'", fname, lineno) 579 elif cmd == '<': 580 # < LABEL 581 type = line 582 583 if type == 'begin' or type == 'topic': 584 self._say("\tEnd topic label.") 585 topic = 'random' 586 elif type == 'object': 587 self._say("\tEnd object label.") 588 inobj = False 589 elif cmd == '+': 590 # + TRIGGER 591 self._say("\tTrigger pattern: " + line) 592 if len(isThat): 593 self._initTT('thats', topic, isThat, line) 594 self._initTT('syntax', topic, line, 'thats') 595 self._syntax['thats'][topic][line]['trigger'] = (fname, lineno) 596 else: 597 self._initTT('topics', topic, line) 598 self._initTT('syntax', topic, line, 'topic') 599 self._syntax['topic'][topic][line]['trigger'] = (fname, lineno) 600 ontrig = line 601 repcnt = 0 602 concnt = 0 603 604 # Pre-compile the trigger's regexp if possible. 605 self._precompile_regexp(ontrig) 606 elif cmd == '-': 607 # - REPLY 608 if ontrig == '': 609 self._warn("Response found before trigger", fname, lineno) 610 continue 611 self._say("\tResponse: " + line) 612 if len(isThat): 613 self._thats[topic][isThat][ontrig]['reply'][repcnt] = line 614 self._syntax['thats'][topic][ontrig]['reply'][repcnt] = (fname, lineno) 615 else: 616 self._topics[topic][ontrig]['reply'][repcnt] = line 617 self._syntax['topic'][topic][ontrig]['reply'][repcnt] = (fname, lineno) 618 repcnt += 1 619 elif cmd == '%': 620 # % PREVIOUS 621 pass # This was handled above. 622 elif cmd == '^': 623 # ^ CONTINUE 624 pass # This was handled above. 625 elif cmd == '@': 626 # @ REDIRECT 627 self._say("\tRedirect response to " + line) 628 if len(isThat): 629 self._thats[topic][isThat][ontrig]['redirect'] = line 630 self._syntax['thats'][topic][ontrig]['redirect'] = (fname, lineno) 631 else: 632 self._topics[topic][ontrig]['redirect'] = line 633 self._syntax['topic'][topic][ontrig]['redirect'] = (fname, lineno) 634 elif cmd == '*': 635 # * CONDITION 636 self._say("\tAdding condition: " + line) 637 if len(isThat): 638 self._thats[topic][isThat][ontrig]['condition'][concnt] = line 639 self._syntax['thats'][topic][ontrig]['condition'][concnt] = (fname, lineno) 640 else: 641 self._topics[topic][ontrig]['condition'][concnt] = line 642 self._syntax['topic'][topic][ontrig]['condition'][concnt] = (fname, lineno) 643 concnt += 1 644 else: 645 self._warn("Unrecognized command \"" + cmd + "\"", fname, lineno) 646 continue
647
648 - def check_syntax(self, cmd, line):
649 """Syntax check a RiveScript command and line. 650 651 Returns a syntax error string on error; None otherwise.""" 652 653 # Run syntax checks based on the type of command. 654 if cmd == '!': 655 # ! Definition 656 # - Must be formatted like this: 657 # ! type name = value 658 # OR 659 # ! type = value 660 match = re.match(RE.def_syntax, line) 661 if not match: 662 return "Invalid format for !Definition line: must be '! type name = value' OR '! type = value'" 663 elif cmd == '>': 664 # > Label 665 # - The "begin" label must have only one argument ("begin") 666 # - "topic" labels must be lowercased but can inherit other topics (a-z0-9_\s) 667 # - "object" labels must follow the same rules as "topic", but don't need to be lowercase 668 parts = re.split(" ", line, 2) 669 if parts[0] == "begin" and len(parts) > 1: 670 return "The 'begin' label takes no additional arguments, should be verbatim '> begin'" 671 elif parts[0] == "topic": 672 match = re.match(RE.name_syntax, line) 673 if match: 674 return "Topics should be lowercased and contain only numbers and letters" 675 elif parts[0] == "object": 676 match = re.match(RE.name_syntax, line) 677 if match: 678 return "Objects can only contain numbers and letters" 679 elif cmd == '+' or cmd == '%' or cmd == '@': 680 # + Trigger, % Previous, @ Redirect 681 # This one is strict. The triggers are to be run through the regexp engine, 682 # therefore it should be acceptable for the regexp engine. 683 # - Entirely lowercase 684 # - No symbols except: ( | ) [ ] * _ # @ { } < > = 685 # - All brackets should be matched 686 parens = 0 # Open parenthesis 687 square = 0 # Open square brackets 688 curly = 0 # Open curly brackets 689 angle = 0 # Open angled brackets 690 691 # Count brackets. 692 for char in line: 693 if char == '(': 694 parens += 1 695 elif char == ')': 696 parens -= 1 697 elif char == '[': 698 square += 1 699 elif char == ']': 700 square -= 1 701 elif char == '{': 702 curly += 1 703 elif char == '}': 704 curly -= 1 705 elif char == '<': 706 angle += 1 707 elif char == '>': 708 angle -= 1 709 710 # Any mismatches? 711 if parens != 0: 712 return "Unmatched parenthesis brackets" 713 elif square != 0: 714 return "Unmatched square brackets" 715 elif curly != 0: 716 return "Unmatched curly brackets" 717 elif angle != 0: 718 return "Unmatched angle brackets" 719 720 # In UTF-8 mode, most symbols are allowed. 721 if self._utf8: 722 match = re.match(RE.utf8_trig, line) 723 if match: 724 return "Triggers can't contain uppercase letters, backslashes or dots in UTF-8 mode." 725 else: 726 match = re.match(RE.trig_syntax, line) 727 if match: 728 return "Triggers may only contain lowercase letters, numbers, and these symbols: ( | ) [ ] * _ # @ { } < > =" 729 elif cmd == '-' or cmd == '^' or cmd == '/': 730 # - Trigger, ^ Continue, / Comment 731 # These commands take verbatim arguments, so their syntax is loose. 732 pass 733 elif cmd == '*': 734 # * Condition 735 # Syntax for a conditional is as follows: 736 # * value symbol value => response 737 match = re.match(RE.cond_syntax, line) 738 if not match: 739 return "Invalid format for !Condition: should be like '* value symbol value => response'" 740 741 return None
742
743 - def deparse(self):
744 """Return the in-memory RiveScript document as a Python data structure. 745 746 This would be useful for developing a user interface for editing 747 RiveScript replies without having to edit the RiveScript code 748 manually.""" 749 750 # Data to return. 751 result = { 752 "begin": { 753 "global": {}, 754 "var": {}, 755 "sub": {}, 756 "person": {}, 757 "array": {}, 758 "triggers": {}, 759 "that": {}, 760 }, 761 "topic": {}, 762 "that": {}, 763 "inherit": {}, 764 "include": {}, 765 } 766 767 # Populate the config fields. 768 if self._debug: 769 result["begin"]["global"]["debug"] = self._debug 770 if self._depth != 50: 771 result["begin"]["global"]["depth"] = 50 772 773 # Definitions 774 result["begin"]["var"] = self._bvars.copy() 775 result["begin"]["sub"] = self._subs.copy() 776 result["begin"]["person"] = self._person.copy() 777 result["begin"]["array"] = self._arrays.copy() 778 result["begin"]["global"].update(self._gvars.copy()) 779 780 # Topic Triggers. 781 for topic in self._topics: 782 dest = {} # Where to place the topic info 783 784 if topic == "__begin__": 785 # Begin block. 786 dest = result["begin"]["triggers"] 787 else: 788 # Normal topic. 789 if topic not in result["topic"]: 790 result["topic"][topic] = {} 791 dest = result["topic"][topic] 792 793 # Copy the triggers. 794 for trig, data in self._topics[topic].iteritems(): 795 dest[trig] = self._copy_trigger(trig, data) 796 797 # %Previous's. 798 for topic in self._thats: 799 dest = {} # Where to place the topic info 800 801 if topic == "__begin__": 802 # Begin block. 803 dest = result["begin"]["that"] 804 else: 805 # Normal topic. 806 if topic not in result["that"]: 807 result["that"][topic] = {} 808 dest = result["that"][topic] 809 810 # The "that" structure is backwards: bot reply, then trigger, then info. 811 for previous, pdata in self._thats[topic].iteritems(): 812 for trig, data in pdata.iteritems(): 813 dest[trig] = self._copy_trigger(trig, data, previous) 814 815 # Inherits/Includes. 816 for topic, data in self._lineage.iteritems(): 817 result["inherit"][topic] = [] 818 for inherit in data: 819 result["inherit"][topic].append(inherit) 820 for topic, data in self._includes.iteritems(): 821 result["include"][topic] = [] 822 for include in data: 823 result["include"][topic].append(include) 824 825 return result
826
827 - def write(self, fh, deparsed=None):
828 """Write the currently parsed RiveScript data into a file. 829 830 Pass either a file name (string) or a file handle object. 831 832 This uses `deparse()` to dump a representation of the loaded data and 833 writes it to the destination file. If you provide your own data as the 834 `deparsed` argument, it will use that data instead of calling 835 `deparse()` itself. This way you can use `deparse()`, edit the data, 836 and use that to write the RiveScript document (for example, to be used 837 by a user interface for editing RiveScript without writing the code 838 directly).""" 839 840 # Passed a string instead of a file handle? 841 if type(fh) is str: 842 fh = codecs.open(fh, "w", "utf-8") 843 844 # Deparse the loaded data. 845 if deparsed is None: 846 deparsed = self.deparse() 847 848 # Start at the beginning. 849 fh.write("// Written by rivescript.deparse()\n") 850 fh.write("! version = 2.0\n\n") 851 852 # Variables of all sorts! 853 for kind in ["global", "var", "sub", "person", "array"]: 854 if len(deparsed["begin"][kind].keys()) == 0: 855 continue 856 857 for var in sorted(deparsed["begin"][kind].keys()): 858 # Array types need to be separated by either spaces or pipes. 859 data = deparsed["begin"][kind][var] 860 if type(data) not in [str, text_type]: 861 needs_pipes = False 862 for test in data: 863 if " " in test: 864 needs_pipes = True 865 break 866 867 # Word-wrap the result, target width is 78 chars minus the 868 # kind, var, and spaces and equals sign. 869 width = 78 - len(kind) - len(var) - 4 870 871 if needs_pipes: 872 data = self._write_wrapped("|".join(data), sep="|") 873 else: 874 data = " ".join(data) 875 876 fh.write("! {kind} {var} = {data}\n".format( 877 kind=kind, 878 var=var, 879 data=data, 880 )) 881 fh.write("\n") 882 883 # Begin block. 884 if len(deparsed["begin"]["triggers"].keys()): 885 fh.write("> begin\n\n") 886 self._write_triggers(fh, deparsed["begin"]["triggers"], indent="\t") 887 fh.write("< begin\n\n") 888 889 # The topics. Random first! 890 topics = ["random"] 891 topics.extend(sorted(deparsed["topic"].keys())) 892 done_random = False 893 for topic in topics: 894 if topic not in deparsed["topic"]: continue 895 if topic == "random" and done_random: continue 896 if topic == "random": done_random = True 897 898 tagged = False # Used > topic tag 899 900 if topic != "random" or topic in deparsed["include"] or topic in deparsed["inherit"]: 901 tagged = True 902 fh.write("> topic " + topic) 903 904 if topic in deparsed["inherit"]: 905 fh.write(" inherits " + " ".join(deparsed["inherit"][topic])) 906 if topic in deparsed["include"]: 907 fh.write(" includes " + " ".join(deparsed["include"][topic])) 908 909 fh.write("\n\n") 910 911 indent = "\t" if tagged else "" 912 self._write_triggers(fh, deparsed["topic"][topic], indent=indent) 913 914 # Any %Previous's? 915 if topic in deparsed["that"]: 916 self._write_triggers(fh, deparsed["that"][topic], indent=indent) 917 918 if tagged: 919 fh.write("< topic\n\n") 920 921 return True
922
923 - def _copy_trigger(self, trig, data, previous=None):
924 """Make copies of all data below a trigger.""" 925 # Copied data. 926 dest = {} 927 928 if previous: 929 dest["previous"] = previous 930 931 if "redirect" in data and data["redirect"]: 932 # @Redirect 933 dest["redirect"] = data["redirect"] 934 935 if "condition" in data and len(data["condition"].keys()): 936 # *Condition 937 dest["condition"] = [] 938 for i in sorted(data["condition"].keys()): 939 dest["condition"].append(data["condition"][i]) 940 941 if "reply" in data and len(data["reply"].keys()): 942 # -Reply 943 dest["reply"] = [] 944 for i in sorted(data["reply"].keys()): 945 dest["reply"].append(data["reply"][i]) 946 947 return dest
948
949 - def _write_triggers(self, fh, triggers, indent=""):
950 """Write triggers to a file handle.""" 951 952 for trig in sorted(triggers.keys()): 953 fh.write(indent + "+ " + self._write_wrapped(trig, indent=indent) + "\n") 954 d = triggers[trig] 955 956 if "previous" in d: 957 fh.write(indent + "% " + self._write_wrapped(d["previous"], indent=indent) + "\n") 958 959 if "condition" in d: 960 for cond in d["condition"]: 961 fh.write(indent + "* " + self._write_wrapped(cond, indent=indent) + "\n") 962 963 if "redirect" in d: 964 fh.write(indent + "@ " + self._write_wrapped(d["redirect"], indent=indent) + "\n") 965 966 if "reply" in d: 967 for reply in d["reply"]: 968 fh.write(indent + "- " + self._write_wrapped(reply, indent=indent) + "\n") 969 970 fh.write("\n")
971
972 - def _write_wrapped(self, line, sep=" ", indent="", width=78):
973 """Word-wrap a line of RiveScript code for being written to a file.""" 974 975 words = line.split(sep) 976 lines = [] 977 line = "" 978 buf = [] 979 980 while len(words): 981 buf.append(words.pop(0)) 982 line = sep.join(buf) 983 if len(line) > width: 984 # Need to word wrap! 985 words.insert(0, buf.pop()) # Undo 986 lines.append(sep.join(buf)) 987 buf = [] 988 line = "" 989 990 # Straggler? 991 if line: 992 lines.append(line) 993 994 # Returned output 995 result = lines.pop(0) 996 if len(lines): 997 eol = "" 998 if sep == " ": 999 eol = "\s" 1000 for item in lines: 1001 result += eol + "\n" + indent + "^ " + item 1002 1003 return result
1004
1005 - def _initTT(self, toplevel, topic, trigger, what=''):
1006 """Initialize a Topic Tree data structure.""" 1007 if toplevel == 'topics': 1008 if topic not in self._topics: 1009 self._topics[topic] = {} 1010 if trigger not in self._topics[topic]: 1011 self._topics[topic][trigger] = {} 1012 self._topics[topic][trigger]['reply'] = {} 1013 self._topics[topic][trigger]['condition'] = {} 1014 self._topics[topic][trigger]['redirect'] = None 1015 elif toplevel == 'thats': 1016 if topic not in self._thats: 1017 self._thats[topic] = {} 1018 if trigger not in self._thats[topic]: 1019 self._thats[topic][trigger] = {} 1020 if what not in self._thats[topic][trigger]: 1021 self._thats[topic][trigger][what] = {} 1022 self._thats[topic][trigger][what]['reply'] = {} 1023 self._thats[topic][trigger][what]['condition'] = {} 1024 self._thats[topic][trigger][what]['redirect'] = {} 1025 elif toplevel == 'syntax': 1026 if what not in self._syntax: 1027 self._syntax[what] = {} 1028 if topic not in self._syntax[what]: 1029 self._syntax[what][topic] = {} 1030 if trigger not in self._syntax[what][topic]: 1031 self._syntax[what][topic][trigger] = {} 1032 self._syntax[what][topic][trigger]['reply'] = {} 1033 self._syntax[what][topic][trigger]['condition'] = {} 1034 self._syntax[what][topic][trigger]['redirect'] = {}
1035 1036 ############################################################################ 1037 # Sorting Methods # 1038 ############################################################################ 1039
1040 - def sort_replies(self, thats=False):
1041 """Sort the loaded triggers.""" 1042 # This method can sort both triggers and that's. 1043 triglvl = None 1044 sortlvl = None 1045 if thats: 1046 triglvl = self._thats 1047 sortlvl = 'thats' 1048 else: 1049 triglvl = self._topics 1050 sortlvl = 'topics' 1051 1052 # (Re)Initialize the sort cache. 1053 self._sorted[sortlvl] = {} 1054 1055 self._say("Sorting triggers...") 1056 1057 # Loop through all the topics. 1058 for topic in triglvl: 1059 self._say("Analyzing topic " + topic) 1060 1061 # Collect a list of all the triggers we're going to need to worry 1062 # about. If this topic inherits another topic, we need to 1063 # recursively add those to the list. 1064 alltrig = self._topic_triggers(topic, triglvl) 1065 1066 # Keep in mind here that there is a difference between 'includes' 1067 # and 'inherits' -- topics that inherit other topics are able to 1068 # OVERRIDE triggers that appear in the inherited topic. This means 1069 # that if the top topic has a trigger of simply '*', then *NO* 1070 # triggers are capable of matching in ANY inherited topic, because 1071 # even though * has the lowest sorting priority, it has an automatic 1072 # priority over all inherited topics. 1073 # 1074 # The _topic_triggers method takes this into account. All topics 1075 # that inherit other topics will have their triggers prefixed with 1076 # a fictional {inherits} tag, which would start at {inherits=0} and 1077 # increment if the topic tree has other inheriting topics. So we can 1078 # use this tag to make sure topics that inherit things will have 1079 # their triggers always be on top of the stack, from inherits=0 to 1080 # inherits=n. 1081 1082 # Sort these triggers. 1083 running = self._sort_trigger_set(alltrig) 1084 1085 # Save this topic's sorted list. 1086 if sortlvl not in self._sorted: 1087 self._sorted[sortlvl] = {} 1088 self._sorted[sortlvl][topic] = running 1089 1090 # And do it all again for %Previous! 1091 if not thats: 1092 # This will sort the %Previous lines to best match the bot's last reply. 1093 self.sort_replies(True) 1094 1095 # If any of those %Previous's had more than one +trigger for them, 1096 # this will sort all those +triggers to pair back the best human 1097 # interaction. 1098 self._sort_that_triggers() 1099 1100 # Also sort both kinds of substitutions. 1101 self._sort_list('subs', self._subs) 1102 self._sort_list('person', self._person)
1103
1104 - def _sort_that_triggers(self):
1105 """Make a sorted list of triggers that correspond to %Previous groups.""" 1106 self._say("Sorting reverse triggers for %Previous groups...") 1107 1108 if "that_trig" not in self._sorted: 1109 self._sorted["that_trig"] = {} 1110 1111 for topic in self._thats: 1112 if topic not in self._sorted["that_trig"]: 1113 self._sorted["that_trig"][topic] = {} 1114 1115 for bottrig in self._thats[topic]: 1116 if bottrig not in self._sorted["that_trig"][topic]: 1117 self._sorted["that_trig"][topic][bottrig] = [] 1118 triggers = self._sort_trigger_set(self._thats[topic][bottrig].keys()) 1119 self._sorted["that_trig"][topic][bottrig] = triggers
1120
1121 - def _sort_trigger_set(self, triggers):
1122 """Sort a group of triggers in optimal sorting order.""" 1123 1124 # Create a priority map. 1125 prior = { 1126 0: [] # Default priority=0 1127 } 1128 1129 for trig in triggers: 1130 match, weight = re.search(RE.weight, trig), 0 1131 if match: 1132 weight = int(match.group(1)) 1133 if weight not in prior: 1134 prior[weight] = [] 1135 1136 prior[weight].append(trig) 1137 1138 # Keep a running list of sorted triggers for this topic. 1139 running = [] 1140 1141 # Sort them by priority. 1142 for p in sorted(prior.keys(), reverse=True): 1143 self._say("\tSorting triggers with priority " + str(p)) 1144 1145 # So, some of these triggers may include {inherits} tags, if they 1146 # came form a topic which inherits another topic. Lower inherits 1147 # values mean higher priority on the stack. 1148 inherits = -1 # -1 means no {inherits} tag 1149 highest_inherits = -1 # highest inheritance number seen 1150 1151 # Loop through and categorize these triggers. 1152 track = { 1153 inherits: self._init_sort_track() 1154 } 1155 1156 for trig in prior[p]: 1157 self._say("\t\tLooking at trigger: " + trig) 1158 1159 # See if it has an inherits tag. 1160 match = re.search(RE.inherit, trig) 1161 if match: 1162 inherits = int(match.group(1)) 1163 if inherits > highest_inherits: 1164 highest_inherits = inherits 1165 self._say("\t\t\tTrigger belongs to a topic which inherits other topics: level=" + str(inherits)) 1166 trig = re.sub(RE.inherit, "", trig) 1167 else: 1168 inherits = -1 1169 1170 # If this is the first time we've seen this inheritance level, 1171 # initialize its track structure. 1172 if inherits not in track: 1173 track[inherits] = self._init_sort_track() 1174 1175 # Start inspecting the trigger's contents. 1176 if '_' in trig: 1177 # Alphabetic wildcard included. 1178 cnt = self._word_count(trig) 1179 self._say("\t\t\tHas a _ wildcard with " + str(cnt) + " words.") 1180 if cnt > 1: 1181 if cnt not in track[inherits]['alpha']: 1182 track[inherits]['alpha'][cnt] = [] 1183 track[inherits]['alpha'][cnt].append(trig) 1184 else: 1185 track[inherits]['under'].append(trig) 1186 elif '#' in trig: 1187 # Numeric wildcard included. 1188 cnt = self._word_count(trig) 1189 self._say("\t\t\tHas a # wildcard with " + str(cnt) + " words.") 1190 if cnt > 1: 1191 if cnt not in track[inherits]['number']: 1192 track[inherits]['number'][cnt] = [] 1193 track[inherits]['number'][cnt].append(trig) 1194 else: 1195 track[inherits]['pound'].append(trig) 1196 elif '*' in trig: 1197 # Wildcard included. 1198 cnt = self._word_count(trig) 1199 self._say("\t\t\tHas a * wildcard with " + str(cnt) + " words.") 1200 if cnt > 1: 1201 if cnt not in track[inherits]['wild']: 1202 track[inherits]['wild'][cnt] = [] 1203 track[inherits]['wild'][cnt].append(trig) 1204 else: 1205 track[inherits]['star'].append(trig) 1206 elif '[' in trig: 1207 # Optionals included. 1208 cnt = self._word_count(trig) 1209 self._say("\t\t\tHas optionals and " + str(cnt) + " words.") 1210 if cnt not in track[inherits]['option']: 1211 track[inherits]['option'][cnt] = [] 1212 track[inherits]['option'][cnt].append(trig) 1213 else: 1214 # Totally atomic. 1215 cnt = self._word_count(trig) 1216 self._say("\t\t\tTotally atomic and " + str(cnt) + " words.") 1217 if cnt not in track[inherits]['atomic']: 1218 track[inherits]['atomic'][cnt] = [] 1219 track[inherits]['atomic'][cnt].append(trig) 1220 1221 # Move the no-{inherits} triggers to the bottom of the stack. 1222 track[highest_inherits + 1] = track[-1] 1223 del(track[-1]) 1224 1225 # Add this group to the sort list. 1226 for ip in sorted(track.keys()): 1227 self._say("ip=" + str(ip)) 1228 for kind in ['atomic', 'option', 'alpha', 'number', 'wild']: 1229 for wordcnt in sorted(track[ip][kind], reverse=True): 1230 # Triggers with a matching word count should be sorted 1231 # by length, descending. 1232 running.extend(sorted(track[ip][kind][wordcnt], key=len, reverse=True)) 1233 running.extend(sorted(track[ip]['under'], key=len, reverse=True)) 1234 running.extend(sorted(track[ip]['pound'], key=len, reverse=True)) 1235 running.extend(sorted(track[ip]['star'], key=len, reverse=True)) 1236 return running
1237
1238 - def _sort_list(self, name, items):
1239 """Sort a simple list by number of words and length.""" 1240 1241 def by_length(word1, word2): 1242 return len(word2) - len(word1)
1243 1244 # Initialize the list sort buffer. 1245 if "lists" not in self._sorted: 1246 self._sorted["lists"] = {} 1247 self._sorted["lists"][name] = [] 1248 1249 # Track by number of words. 1250 track = {} 1251 1252 # Loop through each item. 1253 for item in items: 1254 # Count the words. 1255 cword = self._word_count(item, all=True) 1256 if cword not in track: 1257 track[cword] = [] 1258 track[cword].append(item) 1259 1260 # Sort them. 1261 output = [] 1262 for count in sorted(track.keys(), reverse=True): 1263 sort = sorted(track[count], key=len, reverse=True) 1264 output.extend(sort) 1265 1266 self._sorted["lists"][name] = output
1267
1268 - def _init_sort_track(self):
1269 """Returns a new dict for keeping track of triggers for sorting.""" 1270 return { 1271 'atomic': {}, # Sort by number of whole words 1272 'option': {}, # Sort optionals by number of words 1273 'alpha': {}, # Sort alpha wildcards by no. of words 1274 'number': {}, # Sort number wildcards by no. of words 1275 'wild': {}, # Sort wildcards by no. of words 1276 'pound': [], # Triggers of just # 1277 'under': [], # Triggers of just _ 1278 'star': [] # Triggers of just * 1279 }
1280 1281 1282 ############################################################################ 1283 # Public Configuration Methods # 1284 ############################################################################ 1285
1286 - def set_handler(self, language, obj):
1287 """Define a custom language handler for RiveScript objects. 1288 1289 language: The lowercased name of the programming language, 1290 e.g. python, javascript, perl 1291 obj: An instance of a class object that provides the following interface: 1292 1293 class MyObjectHandler: 1294 def __init__(self): 1295 pass 1296 def load(self, name, code): 1297 # name = the name of the object from the RiveScript code 1298 # code = the source code of the object 1299 def call(self, rs, name, fields): 1300 # rs = the current RiveScript interpreter object 1301 # name = the name of the object being called 1302 # fields = array of arguments passed to the object 1303 return reply 1304 1305 Pass in a None value for the object to delete an existing handler (for example, 1306 to prevent Python code from being able to be run by default). 1307 1308 Look in the `eg` folder of the rivescript-python distribution for an example 1309 script that sets up a JavaScript language handler.""" 1310 1311 # Allow them to delete a handler too. 1312 if obj is None: 1313 if language in self._handlers: 1314 del self._handlers[language] 1315 else: 1316 self._handlers[language] = obj
1317
1318 - def set_subroutine(self, name, code):
1319 """Define a Python object from your program. 1320 1321 This is equivalent to having an object defined in the RiveScript code, except 1322 your Python code is defining it instead. `name` is the name of the object, and 1323 `code` is a Python function (a `def`) that accepts rs,args as its parameters. 1324 1325 This method is only available if there is a Python handler set up (which there 1326 is by default, unless you've called set_handler("python", None)).""" 1327 1328 # Do we have a Python handler? 1329 if 'python' in self._handlers: 1330 self._handlers['python']._objects[name] = code 1331 self._objlangs[name] = 'python' 1332 else: 1333 self._warn("Can't set_subroutine: no Python object handler!")
1334
1335 - def set_global(self, name, value):
1336 """Set a global variable. 1337 1338 Equivalent to `! global` in RiveScript code. Set to None to delete.""" 1339 if value is None: 1340 # Unset the variable. 1341 if name in self._gvars: 1342 del self._gvars[name] 1343 self._gvars[name] = value
1344
1345 - def set_variable(self, name, value):
1346 """Set a bot variable. 1347 1348 Equivalent to `! var` in RiveScript code. Set to None to delete.""" 1349 if value is None: 1350 # Unset the variable. 1351 if name in self._bvars: 1352 del self._bvars[name] 1353 self._bvars[name] = value
1354
1355 - def set_substitution(self, what, rep):
1356 """Set a substitution. 1357 1358 Equivalent to `! sub` in RiveScript code. Set to None to delete.""" 1359 if rep is None: 1360 # Unset the variable. 1361 if what in self._subs: 1362 del self._subs[what] 1363 self._subs[what] = rep
1364
1365 - def set_person(self, what, rep):
1366 """Set a person substitution. 1367 1368 Equivalent to `! person` in RiveScript code. Set to None to delete.""" 1369 if rep is None: 1370 # Unset the variable. 1371 if what in self._person: 1372 del self._person[what] 1373 self._person[what] = rep
1374
1375 - def set_uservar(self, user, name, value):
1376 """Set a variable for a user.""" 1377 1378 if user not in self._users: 1379 self._users[user] = {"topic": "random"} 1380 1381 self._users[user][name] = value
1382
1383 - def get_uservar(self, user, name):
1384 """Get a variable about a user. 1385 1386 If the user has no data at all, returns None. If the user doesn't have a value 1387 set for the variable you want, returns the string 'undefined'.""" 1388 1389 if user in self._users: 1390 if name in self._users[user]: 1391 return self._users[user][name] 1392 else: 1393 return "undefined" 1394 else: 1395 return None
1396
1397 - def get_uservars(self, user=None):
1398 """Get all variables about a user (or all users). 1399 1400 If no username is passed, returns the entire user database structure. Otherwise, 1401 only returns the variables for the given user, or None if none exist.""" 1402 1403 if user is None: 1404 # All the users! 1405 return self._users 1406 elif user in self._users: 1407 # Just this one! 1408 return self._users[user] 1409 else: 1410 # No info. 1411 return None
1412
1413 - def clear_uservars(self, user=None):
1414 """Delete all variables about a user (or all users). 1415 1416 If no username is passed, deletes all variables about all users. Otherwise, only 1417 deletes all variables for the given user.""" 1418 1419 if user is None: 1420 # All the users! 1421 self._users = {} 1422 elif user in self._users: 1423 # Just this one. 1424 self._users[user] = {}
1425
1426 - def freeze_uservars(self, user):
1427 """Freeze the variable state for a user. 1428 1429 This will clone and preserve a user's entire variable state, so that it can be 1430 restored later with `thaw_uservars`.""" 1431 1432 if user in self._users: 1433 # Clone the user's data. 1434 self._freeze[user] = copy.deepcopy(self._users[user]) 1435 else: 1436 self._warn("Can't freeze vars for user " + user + ": not found!")
1437
1438 - def thaw_uservars(self, user, action="thaw"):
1439 """Thaw a user's frozen variables. 1440 1441 The `action` can be one of the following options: 1442 1443 discard: Don't restore the user's variables, just delete the frozen copy. 1444 keep: Keep the frozen copy after restoring the variables. 1445 thaw: Restore the variables, then delete the frozen copy (default).""" 1446 1447 if user in self._freeze: 1448 # What are we doing? 1449 if action == "thaw": 1450 # Thawing them out. 1451 self.clear_uservars(user) 1452 self._users[user] = copy.deepcopy(self._freeze[user]) 1453 del self._freeze[user] 1454 elif action == "discard": 1455 # Just discard the frozen copy. 1456 del self._freeze[user] 1457 elif action == "keep": 1458 # Keep the frozen copy afterward. 1459 self.clear_uservars(user) 1460 self._users[user] = copy.deepcopy(self._freeze[user]) 1461 else: 1462 self._warn("Unsupported thaw action") 1463 else: 1464 self._warn("Can't thaw vars for user " + user + ": not found!")
1465
1466 - def last_match(self, user):
1467 """Get the last trigger matched for the user. 1468 1469 This will return the raw trigger text that the user's last message matched. If 1470 there was no match, this will return None.""" 1471 return self.get_uservar(user, "__lastmatch__")
1472
1473 - def trigger_info(self, trigger=None, dump=False):
1474 """Get information about a trigger. 1475 1476 Pass in a raw trigger to find out what file name and line number it appeared at. 1477 This is useful for e.g. tracking down the location of the trigger last matched 1478 by the user via last_match(). Returns a list of matching triggers, containing 1479 their topics, filenames and line numbers. Returns None if there weren't 1480 any matches found. 1481 1482 The keys in the trigger info is as follows: 1483 1484 * category: Either 'topic' (for normal) or 'thats' (for %Previous triggers) 1485 * topic: The topic name 1486 * trigger: The raw trigger text 1487 * filename: The filename the trigger was found in. 1488 * lineno: The line number the trigger was found on. 1489 1490 Pass in a true value for `dump`, and the entire syntax tracking 1491 tree is returned.""" 1492 if dump: 1493 return self._syntax 1494 1495 response = None 1496 1497 # Search the syntax tree for the trigger. 1498 for category in self._syntax: 1499 for topic in self._syntax[category]: 1500 if trigger in self._syntax[category][topic]: 1501 # We got a match! 1502 if response is None: 1503 response = list() 1504 fname, lineno = self._syntax[category][topic][trigger]['trigger'] 1505 response.append(dict( 1506 category=category, 1507 topic=topic, 1508 trigger=trigger, 1509 filename=fname, 1510 line=lineno, 1511 )) 1512 1513 return response
1514
1515 - def current_user(self):
1516 """Retrieve the user ID of the current user talking to your bot. 1517 1518 This is mostly useful inside of a Python object macro to get the user ID of the 1519 person who caused the object macro to be invoked (i.e. to set a variable for 1520 that user from within the object). 1521 1522 This will return None if used outside of the context of getting a reply (i.e. 1523 the value is unset at the end of the `reply()` method).""" 1524 if self._current_user is None: 1525 # They're doing it wrong. 1526 self._warn("current_user() is meant to be used from within a Python object macro!") 1527 return self._current_user
1528 1529 ############################################################################ 1530 # Reply Fetching Methods # 1531 ############################################################################ 1532
1533 - def reply(self, user, msg, errors_as_replies=True):
1534 """Fetch a reply from the RiveScript brain.""" 1535 self._say("Get reply to [" + user + "] " + msg) 1536 1537 # Store the current user in case an object macro needs it. 1538 self._current_user = user 1539 1540 # Format their message. 1541 msg = self._format_message(msg) 1542 1543 reply = '' 1544 1545 # If the BEGIN block exists, consult it first. 1546 if "__begin__" in self._topics: 1547 begin = self._getreply(user, 'request', context='begin', ignore_object_errors=errors_as_replies) 1548 1549 # Okay to continue? 1550 if '{ok}' in begin: 1551 try: 1552 reply = self._getreply(user, msg, ignore_object_errors=errors_as_replies) 1553 except RiveScriptError as e: 1554 if not errors_as_replies: 1555 raise 1556 reply = e.error_message 1557 begin = begin.replace('{ok}', reply) 1558 1559 reply = begin 1560 1561 # Run more tag substitutions. 1562 reply = self._process_tags(user, msg, reply, ignore_object_errors=errors_as_replies) 1563 else: 1564 # Just continue then. 1565 try: 1566 reply = self._getreply(user, msg, ignore_object_errors=errors_as_replies) 1567 except RiveScriptError as e: 1568 if not errors_as_replies: 1569 raise 1570 reply = e.error_message 1571 1572 # Save their reply history. 1573 oldInput = self._users[user]['__history__']['input'][:8] 1574 self._users[user]['__history__']['input'] = [msg] 1575 self._users[user]['__history__']['input'].extend(oldInput) 1576 oldReply = self._users[user]['__history__']['reply'][:8] 1577 self._users[user]['__history__']['reply'] = [reply] 1578 self._users[user]['__history__']['reply'].extend(oldReply) 1579 1580 # Unset the current user. 1581 self._current_user = None 1582 1583 return reply
1584
1585 - def _format_message(self, msg, botreply=False):
1586 """Format a user's message for safe processing.""" 1587 1588 # Make sure the string is Unicode for Python 2. 1589 if sys.version_info[0] < 3 and isinstance(msg, str): 1590 msg = msg.decode('utf8') 1591 1592 # Lowercase it. 1593 msg = msg.lower() 1594 1595 # Run substitutions on it. 1596 msg = self._substitute(msg, "subs") 1597 1598 # In UTF-8 mode, only strip metacharacters and HTML brackets 1599 # (to protect from obvious XSS attacks). 1600 if self._utf8: 1601 msg = re.sub(RE.utf8_meta, '', msg) 1602 msg = re.sub(self.unicode_punctuation, '', msg) 1603 1604 # For the bot's reply, also strip common punctuation. 1605 if botreply: 1606 msg = re.sub(RE.utf8_punct, '', msg) 1607 else: 1608 # For everything else, strip all non-alphanumerics. 1609 msg = self._strip_nasties(msg) 1610 1611 return msg
1612
1613 - def _getreply(self, user, msg, context='normal', step=0, ignore_object_errors=True):
1614 # Needed to sort replies? 1615 if 'topics' not in self._sorted: 1616 raise RepliesNotSortedError("You must call sort_replies() once you are done loading RiveScript documents") 1617 1618 # Initialize the user's profile? 1619 if user not in self._users: 1620 self._users[user] = {'topic': 'random'} 1621 1622 # Collect data on the user. 1623 topic = self._users[user]['topic'] 1624 stars = [] 1625 thatstars = [] # For %Previous's. 1626 reply = '' 1627 1628 # Avoid letting them fall into a missing topic. 1629 if topic not in self._topics: 1630 self._warn("User " + user + " was in an empty topic named '" + topic + "'") 1631 topic = self._users[user]['topic'] = 'random' 1632 1633 # Avoid deep recursion. 1634 if step > self._depth: 1635 raise DeepRecursionError 1636 1637 # Are we in the BEGIN statement? 1638 if context == 'begin': 1639 topic = '__begin__' 1640 1641 # Initialize this user's history. 1642 if '__history__' not in self._users[user]: 1643 self._users[user]['__history__'] = { 1644 'input': [ 1645 'undefined', 'undefined', 'undefined', 'undefined', 1646 'undefined', 'undefined', 'undefined', 'undefined', 1647 'undefined' 1648 ], 1649 'reply': [ 1650 'undefined', 'undefined', 'undefined', 'undefined', 1651 'undefined', 'undefined', 'undefined', 'undefined', 1652 'undefined' 1653 ] 1654 } 1655 1656 # More topic sanity checking. 1657 if topic not in self._topics: 1658 # This was handled before, which would mean topic=random and 1659 # it doesn't exist. Serious issue! 1660 raise NoDefaultRandomTopicError("no default topic 'random' was found") 1661 1662 # Create a pointer for the matched data when we find it. 1663 matched = None 1664 matchedTrigger = None 1665 foundMatch = False 1666 1667 # See if there were any %Previous's in this topic, or any topic related 1668 # to it. This should only be done the first time -- not during a 1669 # recursive redirection. This is because in a redirection, "lastreply" 1670 # is still gonna be the same as it was the first time, causing an 1671 # infinite loop! 1672 if step == 0: 1673 allTopics = [topic] 1674 if topic in self._includes or topic in self._lineage: 1675 # Get all the topics! 1676 allTopics = self._get_topic_tree(topic) 1677 1678 # Scan them all! 1679 for top in allTopics: 1680 self._say("Checking topic " + top + " for any %Previous's.") 1681 if top in self._sorted["thats"]: 1682 self._say("There is a %Previous in this topic!") 1683 1684 # Do we have history yet? 1685 lastReply = self._users[user]["__history__"]["reply"][0] 1686 1687 # Format the bot's last reply the same way as the human's. 1688 lastReply = self._format_message(lastReply, botreply=True) 1689 1690 self._say("lastReply: " + lastReply) 1691 1692 # See if it's a match. 1693 for trig in self._sorted["thats"][top]: 1694 botside = self._reply_regexp(user, trig) 1695 self._say("Try to match lastReply (" + lastReply + ") to " + trig) 1696 1697 # Match?? 1698 match = re.match(botside, lastReply) 1699 if match: 1700 # Huzzah! See if OUR message is right too. 1701 self._say("Bot side matched!") 1702 thatstars = match.groups() 1703 for subtrig in self._sorted["that_trig"][top][trig]: 1704 humanside = self._reply_regexp(user, subtrig) 1705 self._say("Now try to match " + msg + " to " + subtrig) 1706 1707 match = re.match(humanside, msg) 1708 if match: 1709 self._say("Found a match!") 1710 matched = self._thats[top][trig][subtrig] 1711 matchedTrigger = subtrig 1712 foundMatch = True 1713 1714 # Get the stars! 1715 stars = match.groups() 1716 break 1717 1718 # Break if we found a match. 1719 if foundMatch: 1720 break 1721 # Break if we found a match. 1722 if foundMatch: 1723 break 1724 1725 # Search their topic for a match to their trigger. 1726 if not foundMatch: 1727 for trig in self._sorted["topics"][topic]: 1728 # Process the triggers. 1729 regexp = self._reply_regexp(user, trig) 1730 self._say("Try to match %r against %r (%r)" % (msg, trig, regexp)) 1731 1732 # Python's regular expression engine is slow. Try a verbatim 1733 # match if this is an atomic trigger. 1734 isAtomic = self._is_atomic(trig) 1735 isMatch = False 1736 if isAtomic: 1737 # Only look for exact matches, no sense running atomic triggers 1738 # through the regexp engine. 1739 if msg == trig: 1740 isMatch = True 1741 else: 1742 # Non-atomic triggers always need the regexp. 1743 match = re.match(regexp, msg) 1744 if match: 1745 # The regexp matched! 1746 isMatch = True 1747 1748 # Collect the stars. 1749 stars = match.groups() 1750 1751 if isMatch: 1752 self._say("Found a match!") 1753 1754 # We found a match, but what if the trigger we've matched 1755 # doesn't belong to our topic? Find it! 1756 if trig not in self._topics[topic]: 1757 # We have to find it. 1758 matched = self._find_trigger_by_inheritance(topic, trig) 1759 else: 1760 # We do have it! 1761 matched = self._topics[topic][trig] 1762 1763 foundMatch = True 1764 matchedTrigger = trig 1765 break 1766 1767 # Store what trigger they matched on. If their matched trigger is None, 1768 # this will be too, which is great. 1769 self._users[user]["__lastmatch__"] = matchedTrigger 1770 1771 if matched: 1772 for nil in [1]: 1773 # See if there are any hard redirects. 1774 if matched["redirect"]: 1775 self._say("Redirecting us to " + matched["redirect"]) 1776 redirect = self._process_tags(user, msg, matched["redirect"], stars, thatstars, step, 1777 ignore_object_errors) 1778 self._say("Pretend user said: " + redirect) 1779 reply = self._getreply(user, redirect, step=(step + 1), ignore_object_errors=ignore_object_errors) 1780 break 1781 1782 # Check the conditionals. 1783 for con in sorted(matched["condition"]): 1784 halves = re.split(RE.cond_split, matched["condition"][con]) 1785 if halves and len(halves) == 2: 1786 condition = re.match(RE.cond_parse, halves[0]) 1787 if condition: 1788 left = condition.group(1) 1789 eq = condition.group(2) 1790 right = condition.group(3) 1791 potreply = halves[1] 1792 self._say("Left: " + left + "; eq: " + eq + "; right: " + right + " => " + potreply) 1793 1794 # Process tags all around. 1795 left = self._process_tags(user, msg, left, stars, thatstars, step, ignore_object_errors) 1796 right = self._process_tags(user, msg, right, stars, thatstars, step, ignore_object_errors) 1797 1798 # Defaults? 1799 if len(left) == 0: 1800 left = 'undefined' 1801 if len(right) == 0: 1802 right = 'undefined' 1803 1804 self._say("Check if " + left + " " + eq + " " + right) 1805 1806 # Validate it. 1807 passed = False 1808 if eq == 'eq' or eq == '==': 1809 if left == right: 1810 passed = True 1811 elif eq == 'ne' or eq == '!=' or eq == '<>': 1812 if left != right: 1813 passed = True 1814 else: 1815 # Gasp, dealing with numbers here... 1816 try: 1817 left, right = int(left), int(right) 1818 if eq == '<': 1819 if left < right: 1820 passed = True 1821 elif eq == '<=': 1822 if left <= right: 1823 passed = True 1824 elif eq == '>': 1825 if left > right: 1826 passed = True 1827 elif eq == '>=': 1828 if left >= right: 1829 passed = True 1830 except: 1831 self._warn("Failed to evaluate numeric condition!") 1832 1833 # How truthful? 1834 if passed: 1835 reply = potreply 1836 break 1837 1838 # Have our reply yet? 1839 if len(reply) > 0: 1840 break 1841 1842 # Process weights in the replies. 1843 bucket = [] 1844 for rep in sorted(matched["reply"]): 1845 text = matched["reply"][rep] 1846 weight = 1 1847 match = re.match(RE.weight, text) 1848 if match: 1849 weight = int(match.group(1)) 1850 if weight <= 0: 1851 self._warn("Can't have a weight <= 0!") 1852 weight = 1 1853 for i in range(0, weight): 1854 bucket.append(text) 1855 1856 # Get a random reply. 1857 reply = random.choice(bucket) 1858 break 1859 1860 # Still no reply? 1861 if not foundMatch: 1862 raise NoMatchError 1863 elif len(reply) == 0: 1864 raise NoReplyError 1865 1866 self._say("Reply: " + reply) 1867 1868 # Process tags for the BEGIN block. 1869 if context == "begin": 1870 # BEGIN blocks can only set topics and uservars. The rest happen 1871 # later! 1872 reTopic = re.findall(RE.topic_tag, reply) 1873 for match in reTopic: 1874 self._say("Setting user's topic to " + match) 1875 self._users[user]["topic"] = match 1876 reply = reply.replace('{{topic={match}}}'.format(match=match), '') 1877 1878 reSet = re.findall(RE.set_tag, reply) 1879 for match in reSet: 1880 self._say("Set uservar " + str(match[0]) + "=" + str(match[1])) 1881 self._users[user][match[0]] = match[1] 1882 reply = reply.replace('<set {key}={value}>'.format(key=match[0], value=match[1]), '') 1883 else: 1884 # Process more tags if not in BEGIN. 1885 reply = self._process_tags(user, msg, reply, stars, thatstars, step, ignore_object_errors) 1886 1887 return reply
1888
1889 - def _substitute(self, msg, kind):
1890 """Run a kind of substitution on a message.""" 1891 1892 # Safety checking. 1893 if 'lists' not in self._sorted: 1894 raise RepliesNotSortedError("You must call sort_replies() once you are done loading RiveScript documents") 1895 if kind not in self._sorted["lists"]: 1896 raise RepliesNotSortedError("You must call sort_replies() once you are done loading RiveScript documents") 1897 1898 # Get the substitution map. 1899 subs = None 1900 if kind == 'subs': 1901 subs = self._subs 1902 else: 1903 subs = self._person 1904 1905 # Make placeholders each time we substitute something. 1906 ph = [] 1907 i = 0 1908 1909 for pattern in self._sorted["lists"][kind]: 1910 result = subs[pattern] 1911 1912 # Make a placeholder. 1913 ph.append(result) 1914 placeholder = "\x00%d\x00" % i 1915 i += 1 1916 1917 cache = self._regexc[kind][pattern] 1918 msg = re.sub(cache["sub1"], placeholder, msg) 1919 msg = re.sub(cache["sub2"], placeholder + r'\1', msg) 1920 msg = re.sub(cache["sub3"], r'\1' + placeholder + r'\2', msg) 1921 msg = re.sub(cache["sub4"], r'\1' + placeholder, msg) 1922 1923 placeholders = re.findall(RE.placeholder, msg) 1924 for match in placeholders: 1925 i = int(match) 1926 result = ph[i] 1927 msg = msg.replace('\x00' + match + '\x00', result) 1928 1929 # Strip & return. 1930 return msg.strip()
1931
1932 - def _precompile_substitution(self, kind, pattern):
1933 """Pre-compile the regexp for a substitution pattern. 1934 1935 This will speed up the substitutions that happen at the beginning of 1936 the reply fetching process. With the default brain, this took the 1937 time for _substitute down from 0.08s to 0.02s""" 1938 if pattern not in self._regexc[kind]: 1939 qm = re.escape(pattern) 1940 self._regexc[kind][pattern] = { 1941 "qm": qm, 1942 "sub1": re.compile(r'^' + qm + r'$'), 1943 "sub2": re.compile(r'^' + qm + r'(\W+)'), 1944 "sub3": re.compile(r'(\W+)' + qm + r'(\W+)'), 1945 "sub4": re.compile(r'(\W+)' + qm + r'$'), 1946 }
1947
1948 - def _do_expand_array(self, array_name, depth=0):
1949 """ Do recurrent array expansion, returning a set of keywords. 1950 1951 Exception is thrown when there are cyclical dependencies between 1952 arrays or if the @array name references an undefined array.""" 1953 if depth > self._depth: 1954 raise Exception("deep recursion detected") 1955 if not array_name in self._arrays: 1956 raise Exception("array '%s' not defined" % (array_name)) 1957 ret = list(self._arrays[array_name]) 1958 for array in self._arrays[array_name]: 1959 if array.startswith('@'): 1960 ret.remove(array) 1961 expanded = self._do_expand_array(array[1:], depth+1) 1962 ret.extend(expanded) 1963 1964 return set(ret)
1965
1966 - def _expand_array(self, array_name):
1967 """ Expand variables and return a set of keywords. 1968 1969 Warning is issued when exceptions occur.""" 1970 ret = self._arrays[array_name] if array_name in self._arrays else [] 1971 try: 1972 ret = self._do_expand_array(array_name) 1973 except Exception as e: 1974 self._warn("Error expanding array '%s': %s" % (array_name, str(e))) 1975 return ret
1976 1977
1978 - def _reply_regexp(self, user, regexp):
1979 """Prepares a trigger for the regular expression engine.""" 1980 1981 if regexp in self._regexc["trigger"]: 1982 # Already compiled this one! 1983 return self._regexc["trigger"][regexp] 1984 1985 # If the trigger is simply '*' then the * there needs to become (.*?) 1986 # to match the blank string too. 1987 regexp = re.sub(RE.zero_star, r'<zerowidthstar>', regexp) 1988 1989 # Simple replacements. 1990 regexp = regexp.replace('*', '(.+?)') # Convert * into (.+?) 1991 regexp = regexp.replace('#', '(\d+?)') # Convert # into (\d+?) 1992 regexp = regexp.replace('_', '(\w+?)') # Convert _ into (\w+?) 1993 regexp = re.sub(r'\{weight=\d+\}', '', regexp) # Remove {weight} tags 1994 regexp = regexp.replace('<zerowidthstar>', r'(.*?)') 1995 1996 # Optionals. 1997 optionals = re.findall(RE.optionals, regexp) 1998 for match in optionals: 1999 parts = match.split("|") 2000 new = [] 2001 for p in parts: 2002 p = r'(?:\\s|\\b)+{}(?:\\s|\\b)+'.format(p) 2003 new.append(p) 2004 2005 # If this optional had a star or anything in it, make it 2006 # non-matching. 2007 pipes = '|'.join(new) 2008 pipes = re.sub(re.escape('(.+?)'), '(?:.+?)', pipes) 2009 pipes = re.sub(re.escape('(\d+?)'), '(?:\d+?)', pipes) 2010 pipes = re.sub(re.escape('([A-Za-z]+?)'), '(?:[A-Za-z]+?)', pipes) 2011 2012 regexp = re.sub(r'\s*\[' + re.escape(match) + '\]\s*', 2013 '(?:' + pipes + r'|(?:\\s|\\b))', regexp) 2014 2015 # _ wildcards can't match numbers! 2016 regexp = re.sub(RE.literal_w, r'[A-Za-z]', regexp) 2017 2018 # Filter in arrays. 2019 arrays = re.findall(RE.array, regexp) 2020 for array in arrays: 2021 rep = '' 2022 if array in self._arrays: 2023 rep = r'(?:' + '|'.join(self._expand_array(array)) + ')' 2024 regexp = re.sub(r'\@' + re.escape(array) + r'\b', rep, regexp) 2025 2026 # Filter in bot variables. 2027 bvars = re.findall(RE.bot_tag, regexp) 2028 for var in bvars: 2029 rep = '' 2030 if var in self._bvars: 2031 rep = self._strip_nasties(self._bvars[var]) 2032 regexp = regexp.replace('<bot {var}>'.format(var=var), rep) 2033 2034 # Filter in user variables. 2035 uvars = re.findall(RE.get_tag, regexp) 2036 for var in uvars: 2037 rep = '' 2038 if var in self._users[user]: 2039 rep = self._strip_nasties(self._users[user][var]) 2040 regexp = regexp.replace('<get {var}>'.format(var=var), rep) 2041 2042 # Filter in <input> and <reply> tags. This is a slow process, so only 2043 # do it if we have to! 2044 if '<input' in regexp or '<reply' in regexp: 2045 for type in ['input', 'reply']: 2046 tags = re.findall(r'<' + type + r'([0-9])>', regexp) 2047 for index in tags: 2048 rep = self._format_message(self._users[user]['__history__'][type][int(index) - 1]) 2049 regexp = regexp.replace('<{type}{index}>'.format(type=type, index=index), rep) 2050 regexp = regexp.replace('<{type}>'.format(type=type), 2051 self._format_message(self._users[user]['__history__'][type][0])) 2052 # TODO: the Perl version doesn't do just <input>/<reply> in trigs! 2053 2054 return re.compile(r'^' + regexp + r'$')
2055
2056 - def _precompile_regexp(self, trigger):
2057 """Precompile the regex for most triggers. 2058 2059 If the trigger is non-atomic, and doesn't include dynamic tags like 2060 `<bot>`, `<get>`, `<input>/<reply>` or arrays, it can be precompiled 2061 and save time when matching.""" 2062 if self._is_atomic(trigger): 2063 return # Don't need a regexp for atomic triggers. 2064 2065 # Check for dynamic tags. 2066 for tag in ["@", "<bot", "<get", "<input", "<reply"]: 2067 if tag in trigger: 2068 return # Can't precompile this trigger. 2069 2070 self._regexc["trigger"][trigger] = self._reply_regexp(None, trigger)
2071
2072 - def _process_tags(self, user, msg, reply, st=[], bst=[], depth=0, ignore_object_errors=True):
2073 """Post process tags in a message.""" 2074 stars = [''] 2075 stars.extend(st) 2076 botstars = [''] 2077 botstars.extend(bst) 2078 if len(stars) == 1: 2079 stars.append("undefined") 2080 if len(botstars) == 1: 2081 botstars.append("undefined") 2082 2083 # Tag shortcuts. 2084 reply = reply.replace('<person>', '{person}<star>{/person}') 2085 reply = reply.replace('<@>', '{@<star>}') 2086 reply = reply.replace('<formal>', '{formal}<star>{/formal}') 2087 reply = reply.replace('<sentence>', '{sentence}<star>{/sentence}') 2088 reply = reply.replace('<uppercase>', '{uppercase}<star>{/uppercase}') 2089 reply = reply.replace('<lowercase>', '{lowercase}<star>{/lowercase}') 2090 2091 # Weight and <star> tags. 2092 reply = re.sub(RE.weight, '', reply) # Leftover {weight}s 2093 if len(stars) > 0: 2094 reply = reply.replace('<star>', stars[1]) 2095 reStars = re.findall(RE.star_tags, reply) 2096 for match in reStars: 2097 if int(match) < len(stars): 2098 reply = reply.replace('<star{match}>'.format(match=match), stars[int(match)]) 2099 if len(botstars) > 0: 2100 reply = reply.replace('<botstar>', botstars[1]) 2101 reStars = re.findall(RE.botstars, reply) 2102 for match in reStars: 2103 if int(match) < len(botstars): 2104 reply = reply.replace('<botstar{match}>'.format(match=match), botstars[int(match)]) 2105 2106 # <input> and <reply> 2107 reply = reply.replace('<input>', self._users[user]['__history__']['input'][0]) 2108 reply = reply.replace('<reply>', self._users[user]['__history__']['reply'][0]) 2109 reInput = re.findall(RE.input_tags, reply) 2110 for match in reInput: 2111 reply = reply.replace('<input{match}>'.format(match=match), 2112 self._users[user]['__history__']['input'][int(match) - 1]) 2113 reReply = re.findall(RE.reply_tags, reply) 2114 for match in reReply: 2115 reply = reply.replace('<reply{match}>'.format(match=match), 2116 self._users[user]['__history__']['reply'][int(match) - 1]) 2117 2118 # <id> and escape codes. 2119 reply = reply.replace('<id>', user) 2120 reply = reply.replace('\\s', ' ') 2121 reply = reply.replace('\\n', "\n") 2122 reply = reply.replace('\\#', '#') 2123 2124 # Random bits. 2125 reRandom = re.findall(RE.random_tags, reply) 2126 for match in reRandom: 2127 output = '' 2128 if '|' in match: 2129 output = random.choice(match.split('|')) 2130 else: 2131 output = random.choice(match.split(' ')) 2132 reply = reply.replace('{{random}}{match}{{/random}}'.format(match=match), output) 2133 2134 # Person Substitutions and String Formatting. 2135 for item in ['person', 'formal', 'sentence', 'uppercase', 'lowercase']: 2136 matcher = re.findall(r'\{' + item + r'\}(.+?)\{/' + item + r'\}', reply) 2137 for match in matcher: 2138 output = None 2139 if item == 'person': 2140 # Person substitutions. 2141 output = self._substitute(match, "person") 2142 else: 2143 output = self._string_format(match, item) 2144 reply = reply.replace('{{{item}}}{match}{{/{item}}}'.format(item=item, match=match), output) 2145 2146 # Handle all variable-related tags with an iterative regex approach, 2147 # to allow for nesting of tags in arbitrary ways (think <set a=<get b>>) 2148 # Dummy out the <call> tags first, because we don't handle them right 2149 # here. 2150 reply = reply.replace("<call>", "{__call__}") 2151 reply = reply.replace("</call>", "{/__call__}") 2152 while True: 2153 # This regex will match a <tag> which contains no other tag inside 2154 # it, i.e. in the case of <set a=<get b>> it will match <get b> but 2155 # not the <set> tag, on the first pass. The second pass will get the 2156 # <set> tag, and so on. 2157 match = re.search(RE.tag_search, reply) 2158 if not match: break # No remaining tags! 2159 2160 match = match.group(1) 2161 parts = match.split(" ", 1) 2162 tag = parts[0].lower() 2163 data = parts[1] if len(parts) > 1 else "" 2164 insert = "" # Result of the tag evaluation 2165 2166 # Handle the tags. 2167 if tag == "bot" or tag == "env": 2168 # <bot> and <env> tags are similar. 2169 target = self._bvars if tag == "bot" else self._gvars 2170 if "=" in data: 2171 # Setting a bot/env variable. 2172 parts = data.split("=") 2173 self._say("Set " + tag + " variable " + text_type(parts[0]) + "=" + text_type(parts[1])) 2174 target[parts[0]] = parts[1] 2175 else: 2176 # Getting a bot/env variable. 2177 insert = target.get(data, "undefined") 2178 elif tag == "set": 2179 # <set> user vars. 2180 parts = data.split("=") 2181 self._say("Set uservar " + text_type(parts[0]) + "=" + text_type(parts[1])) 2182 self._users[user][parts[0]] = parts[1] 2183 elif tag in ["add", "sub", "mult", "div"]: 2184 # Math operator tags. 2185 parts = data.split("=") 2186 var = parts[0] 2187 value = parts[1] 2188 2189 # Sanity check the value. 2190 try: 2191 value = int(value) 2192 if var not in self._users[user]: 2193 # Initialize it. 2194 self._users[user][var] = 0 2195 except: 2196 insert = "[ERR: Math can't '{}' non-numeric value '{}']".format(tag, value) 2197 2198 # Attempt the operation. 2199 try: 2200 orig = int(self._users[user][var]) 2201 new = 0 2202 if tag == "add": 2203 new = orig + value 2204 elif tag == "sub": 2205 new = orig - value 2206 elif tag == "mult": 2207 new = orig * value 2208 elif tag == "div": 2209 new = orig / value 2210 self._users[user][var] = new 2211 except: 2212 insert = "[ERR: Math couldn't '{}' to value '{}']".format(tag, self._users[user][var]) 2213 elif tag == "get": 2214 insert = self._users[user].get(data, "undefined") 2215 else: 2216 # Unrecognized tag. 2217 insert = "\x00{}\x01".format(match) 2218 2219 reply = reply.replace("<{}>".format(match), insert) 2220 2221 # Restore unrecognized tags. 2222 reply = reply.replace("\x00", "<").replace("\x01", ">") 2223 2224 # Streaming code. DEPRECATED! 2225 if '{!' in reply: 2226 self._warn("Use of the {!...} tag is deprecated and not supported here.") 2227 2228 # Topic setter. 2229 reTopic = re.findall(RE.topic_tag, reply) 2230 for match in reTopic: 2231 self._say("Setting user's topic to " + match) 2232 self._users[user]["topic"] = match 2233 reply = reply.replace('{{topic={match}}}'.format(match=match), '') 2234 2235 # Inline redirecter. 2236 reRedir = re.findall(RE.redir_tag, reply) 2237 for match in reRedir: 2238 self._say("Redirect to " + match) 2239 at = match.strip() 2240 subreply = self._getreply(user, at, step=(depth + 1)) 2241 reply = reply.replace('{{@{match}}}'.format(match=match), subreply) 2242 2243 # Object caller. 2244 reply = reply.replace("{__call__}", "<call>") 2245 reply = reply.replace("{/__call__}", "</call>") 2246 reCall = re.findall(r'<call>(.+?)</call>', reply) 2247 for match in reCall: 2248 parts = re.split(RE.ws, match) 2249 output = '' 2250 obj = parts[0] 2251 args = [] 2252 if len(parts) > 1: 2253 args = parts[1:] 2254 2255 # Do we know this object? 2256 if obj in self._objlangs: 2257 # We do, but do we have a handler for that language? 2258 lang = self._objlangs[obj] 2259 if lang in self._handlers: 2260 # We do. 2261 try: 2262 output = self._handlers[lang].call(self, obj, user, args) 2263 except python.PythonObjectError as e: 2264 self._warn(str(e)) 2265 if not ignore_object_errors: 2266 raise ObjectError(str(e)) 2267 output = RS_ERR_OBJECT 2268 else: 2269 if not ignore_object_errors: 2270 raise ObjectError(RS_ERR_OBJECT_HANDLER) 2271 output = RS_ERR_OBJECT_HANDLER 2272 else: 2273 if not ignore_object_errors: 2274 raise ObjectError(RS_ERR_OBJECT_MISSING) 2275 output = RS_ERR_OBJECT_MISSING 2276 2277 reply = reply.replace('<call>{match}</call>'.format(match=match), output) 2278 2279 return reply
2280
2281 - def _string_format(self, msg, method):
2282 """Format a string (upper, lower, formal, sentence).""" 2283 if method == "uppercase": 2284 return msg.upper() 2285 elif method == "lowercase": 2286 return msg.lower() 2287 elif method == "sentence": 2288 return msg.capitalize() 2289 elif method == "formal": 2290 return string.capwords(msg)
2291 2292 ############################################################################ 2293 # Topic inheritance Utility Methods # 2294 ############################################################################ 2295
2296 - def _topic_triggers(self, topic, triglvl, depth=0, inheritance=0, inherited=False):
2297 """Recursively scan a topic and return a list of all triggers.""" 2298 2299 # Break if we're in too deep. 2300 if depth > self._depth: 2301 self._warn("Deep recursion while scanning topic inheritance") 2302 2303 # Important info about the depth vs inheritance params to this function: 2304 # depth increments by 1 each time this function recursively calls itself. 2305 # inheritance increments by 1 only when this topic inherits another 2306 # topic. 2307 # 2308 # This way, '> topic alpha includes beta inherits gamma' will have this 2309 # effect: 2310 # alpha and beta's triggers are combined together into one matching 2311 # pool, and then those triggers have higher matching priority than 2312 # gamma's. 2313 # 2314 # The inherited option is True if this is a recursive call, from a topic 2315 # that inherits other topics. This forces the {inherits} tag to be added 2316 # to the triggers. This only applies when the top topic 'includes' 2317 # another topic. 2318 self._say("\tCollecting trigger list for topic " + topic + "(depth=" 2319 + str(depth) + "; inheritance=" + str(inheritance) + "; " 2320 + "inherited=" + str(inherited) + ")") 2321 2322 # topic: the name of the topic 2323 # triglvl: reference to self._topics or self._thats 2324 # depth: starts at 0 and ++'s with each recursion 2325 2326 # Collect an array of triggers to return. 2327 triggers = [] 2328 2329 # Get those that exist in this topic directly. 2330 inThisTopic = [] 2331 if topic in triglvl: 2332 for trigger in triglvl[topic]: 2333 inThisTopic.append(trigger) 2334 2335 # Does this topic include others? 2336 if topic in self._includes: 2337 # Check every included topic. 2338 for includes in self._includes[topic]: 2339 self._say("\t\tTopic " + topic + " includes " + includes) 2340 triggers.extend(self._topic_triggers(includes, triglvl, (depth + 1), inheritance, True)) 2341 2342 # Does this topic inherit others? 2343 if topic in self._lineage: 2344 # Check every inherited topic. 2345 for inherits in self._lineage[topic]: 2346 self._say("\t\tTopic " + topic + " inherits " + inherits) 2347 triggers.extend(self._topic_triggers(inherits, triglvl, (depth + 1), (inheritance + 1), False)) 2348 2349 # Collect the triggers for *this* topic. If this topic inherits any 2350 # other topics, it means that this topic's triggers have higher 2351 # priority than those in any inherited topics. Enforce this with an 2352 # {inherits} tag. 2353 if topic in self._lineage or inherited: 2354 for trigger in inThisTopic: 2355 self._say("\t\tPrefixing trigger with {inherits=" + str(inheritance) + "}" + trigger) 2356 triggers.append("{inherits=" + str(inheritance) + "}" + trigger) 2357 else: 2358 triggers.extend(inThisTopic) 2359 2360 return triggers
2361
2362 - def _find_trigger_by_inheritance(self, topic, trig, depth=0):
2363 """Locate the replies for a trigger in an inherited/included topic.""" 2364 2365 # This sub was called because the user matched a trigger from the sorted 2366 # array, but the trigger doesn't belong to their topic, and is instead 2367 # in an inherited or included topic. This is to search for it. 2368 2369 # Prevent recursion. 2370 if depth > self._depth: 2371 self._warn("Deep recursion detected while following an inheritance trail!") 2372 return None 2373 2374 # inheritance is more important than inclusion: triggers in one topic can 2375 # override those in an inherited topic. 2376 if topic in self._lineage: 2377 for inherits in sorted(self._lineage[topic]): 2378 # See if this inherited topic has our trigger. 2379 if trig in self._topics[inherits]: 2380 # Great! 2381 return self._topics[inherits][trig] 2382 else: 2383 # Check what THAT topic inherits from. 2384 match = self._find_trigger_by_inheritance( 2385 inherits, trig, (depth + 1) 2386 ) 2387 if match: 2388 # Found it! 2389 return match 2390 2391 # See if this topic has an "includes" 2392 if topic in self._includes: 2393 for includes in sorted(self._includes[topic]): 2394 # See if this included topic has our trigger. 2395 if trig in self._topics[includes]: 2396 # Great! 2397 return self._topics[includes][trig] 2398 else: 2399 # Check what THAT topic inherits from. 2400 match = self._find_trigger_by_inheritance( 2401 includes, trig, (depth + 1) 2402 ) 2403 if match: 2404 # Found it! 2405 return match 2406 2407 # Don't know what else to do! 2408 return None
2409
2410 - def _get_topic_tree(self, topic, depth=0):
2411 """Given one topic, get the list of all included/inherited topics.""" 2412 2413 # Break if we're in too deep. 2414 if depth > self._depth: 2415 self._warn("Deep recursion while scanning topic trees!") 2416 return [] 2417 2418 # Collect an array of all topics. 2419 topics = [topic] 2420 2421 # Does this topic include others? 2422 if topic in self._includes: 2423 # Try each of these. 2424 for includes in sorted(self._includes[topic]): 2425 topics.extend(self._get_topic_tree(includes, depth + 1)) 2426 2427 # Does this topic inherit others? 2428 if topic in self._lineage: 2429 # Try each of these. 2430 for inherits in sorted(self._lineage[topic]): 2431 topics.extend(self._get_topic_tree(inherits, depth + 1)) 2432 2433 return topics
2434 2435 ############################################################################ 2436 # Miscellaneous Private Methods # 2437 ############################################################################ 2438
2439 - def _is_atomic(self, trigger):
2440 """Determine if a trigger is atomic or not.""" 2441 2442 # Atomic triggers don't contain any wildcards or parenthesis or anything 2443 # of the sort. We don't need to test the full character set, just left 2444 # brackets will do. 2445 special = ['*', '#', '_', '(', '[', '<', '@'] 2446 for char in special: 2447 if char in trigger: 2448 return False 2449 2450 return True
2451
2452 - def _word_count(self, trigger, all=False):
2453 """Count the words that aren't wildcards in a trigger.""" 2454 words = [] 2455 if all: 2456 words = re.split(RE.ws, trigger) 2457 else: 2458 words = re.split(RE.wilds, trigger) 2459 2460 wc = 0 # Word count 2461 for word in words: 2462 if len(word) > 0: 2463 wc += 1 2464 2465 return wc
2466
2467 - def _strip_nasties(self, s):
2468 """Formats a string for ASCII regex matching.""" 2469 s = re.sub(RE.nasties, '', s) 2470 return s
2471
2472 - def _dump(self):
2473 """For debugging, dump the entire data structure.""" 2474 pp = pprint.PrettyPrinter(indent=4) 2475 2476 print("=== Variables ===") 2477 print("-- Globals --") 2478 pp.pprint(self._gvars) 2479 print("-- Bot vars --") 2480 pp.pprint(self._bvars) 2481 print("-- Substitutions --") 2482 pp.pprint(self._subs) 2483 print("-- Person Substitutions --") 2484 pp.pprint(self._person) 2485 print("-- Arrays --") 2486 pp.pprint(self._arrays) 2487 2488 print("=== Topic Structure ===") 2489 pp.pprint(self._topics) 2490 print("=== %Previous Structure ===") 2491 pp.pprint(self._thats) 2492 2493 print("=== Includes ===") 2494 pp.pprint(self._includes) 2495 2496 print("=== Inherits ===") 2497 pp.pprint(self._lineage) 2498 2499 print("=== Sort Buffer ===") 2500 pp.pprint(self._sorted) 2501 2502 print("=== Syntax Tree ===") 2503 pp.pprint(self._syntax)
2504
2505 2506 ################################################################################ 2507 # Exception Classes # 2508 ################################################################################ 2509 2510 -class RiveScriptError(Exception):
2511 """RiveScript base exception class"""
2512 - def __init__(self, error_message=None):
2513 super(RiveScriptError, self).__init__(error_message) 2514 self.error_message = error_message
2515
2516 2517 -class NoMatchError(RiveScriptError):
2518 """No reply could be matched"""
2519 - def __init__(self):
2520 super(NoMatchError, self).__init__(RS_ERR_MATCH)
2521
2522 2523 -class NoReplyError(RiveScriptError):
2524 """No reply could be found"""
2525 - def __init__(self):
2526 super(NoReplyError, self).__init__(RS_ERR_REPLY)
2527
2528 2529 -class ObjectError(RiveScriptError):
2530 """An error occurred when executing a Python object"""
2531 - def __init__(self, error_message=RS_ERR_OBJECT):
2532 super(ObjectError, self).__init__(error_message)
2533
2534 2535 -class DeepRecursionError(RiveScriptError):
2536 """Prevented an infinite loop / deep recursion, unable to retrieve a reply for this message"""
2537 - def __init__(self):
2539
2540 2541 -class NoDefaultRandomTopicError(Exception):
2542 """No default topic 'random' could be found, critical error""" 2543 pass
2544
2545 2546 -class RepliesNotSortedError(Exception):
2547 """sort_replies() was not called after the RiveScript documents were loaded, critical error""" 2548 pass
2549 2550 2551 ################################################################################ 2552 # Interactive Mode # 2553 ################################################################################ 2554 2555 if __name__ == "__main__": 2556 from interactive import interactive_mode 2557 interactive_mode() 2558 2559 # vim:expandtab 2560