Source code for logviewer

#
#       Copyright 2011 Liftoff Software Corporation
#

# Meta
__version__ = '0.9'
__license__ = "AGPLv3 or Proprietary (see LICENSE.txt)"
__version_info__ = (0, 9)
__author__ = 'Dan McDougall <daniel.mcdougall@liftoffsoftware.com>'

# Import stdlib stuff
import sys, re, gzip
from time import sleep
from datetime import datetime
from optparse import OptionParser

# Import our own stuff
from utils import raw

__doc__ = """\
.. _log_viewer:

Log Viewer
==========
Allows the user to play back a given log file like a video (default) or display
it in a syslog-like format.  To view usage information, run it with the --help
switch:

.. ansi-block::
    \x1b[1;31mroot\x1b[0m@host\x1b[1;34m:/opt/gateone $\x1b[0m ./logviewer.py --help
    Usage:  logviewer.py [options] <log file>

    Options:
      --version       show program's version number and exit
      -h, --help      show this help message and exit
      -f, --flat      Display the log line-by-line in a syslog-like format.
      -p, --playback  Play back the log in a video-like fashion. This is the
                    default view.
      --pretty        Preserve font and character renditions when displaying the
                    log in flat view (default).
      --raw           Display control characters and escape sequences when
                    viewing.

Here's an example of how to display a Gate One log (.golog) in a flat, greppable
format:

.. ansi-block::

    \x1b[1;31mroot\x1b[0m@host\x1b[1;34m:/opt/gateone $\x1b[0m ./logviewer.py --flat
    Sep 09 21:07:14 Host/IP or SSH URL [localhost]: modern-host
    Sep 09 21:07:16 Port [22]:
    Sep 09 21:07:16 User: bsmith
    Sep 09 21:07:17 Connecting to: ssh://bsmith@modern-host:22
    Sep 09 21:07:17
    Sep 09 21:07:17 bsmith@modern-host's password:
    Sep 09 21:07:20 Welcome to Ubuntu 11.04 (GNU/Linux 2.6.38-11-generic x86_64)
    Sep 09 21:07:20
    Sep 09 21:07:20  * Documentation:  https://help.ubuntu.com/
    Sep 09 21:07:20
    Sep 09 21:07:20 Last login: Thu Sep 29 08:51:27 2011 from portarisk
    Sep 09 21:07:20 \x1b[1;34mbsmith\x1b[0m@modern-host\x1b[1;34m:~ $\x1b[0m ls
    Sep 09 21:07:21 why_I_love_gate_one.txt  to_dont_list.txt
    Sep 09 21:07:21 \x1b[1;34mbsmith\x1b[0m@modern-host\x1b[1;34m:~ $\x1b[0m

About Gate One's Log Format
===========================
Gate One's log format (.golog) is a gzip-compressed unicode (UTF-8) text file
consisting of time-based frames separated by the unicode character, U+F0F0F0.
Each frame consists of JavaScript-style timestamp (because it is compact)
followed by a colon and then the text characters of the frame.  A frame ends
when a U+F0F0F0 character is encountered.

Here are two example .golog frames demonstrating the format::

    1317344834868:\\x1b[H\\x1b[2JHost/IP or SSH URL [localhost]: <U+F0F0F>1317344836086:\\r\\nPort [22]: <U+F0F0F>

Gate One logs can be opened, decoded, and parsed in Python fairly easily::

    import gzip
    golog = gzip.open(path_to_golog).read().decode('utf-8')
    for frame in golog.split(u"\U000f0f0f"):
        frame_time = float(frame[:13]) # First 13 chars is the timestamp
        # Timestames can be converted into datetime objects very simply:
        datetime_obj = datetime.fromtimestamp(frame_time/1000)
        frame_text = frame[14:] # This gets you the actual text minus the colon
        # Do something with the datetime_obj and the frame_text

.. note:: U+F0F0F0 is from Private Use Area (PUA) 15 in the Unicode Character Set (UCS). It was chosen at random (mostly =) from PUA-15 because it is highly unlikely to be used in an actual terminal program where it could corrupt a session log.

Class Docstrings
================
"""

# Globals
SEPARATOR = u"\U000f0f0f" # The character used to separate frames in the log

# TODO: Support Fast forward/rewind/pause like Gate One itself.

def playback_log(log_path, file_like, show_esc=False):
    """
[docs] Plays back the log file at *log_path* by way of timely output to *file_like* which is expected to be any file-like object with write() and flush() methods. If *show_esc* is True, escape sequences and control characters will be escaped so they can be seen in the output. """ log = gzip.open(log_path).read().decode('utf-8') prev_frame_time = None for i, frame in enumerate(log.split(SEPARATOR)): try: frame_time = float(frame[:13]) # First 13 chars is the timestamp frame = frame[14:] # Skips the colon if i == 0: # Write it out immediately file_like.write(frame) prev_frame_time = frame_time else: # Wait until the time between the previous frame and now has passed wait_time = (frame_time - prev_frame_time)/1000.0 sleep(wait_time) # frame times are in milliseconds prev_frame_time = frame_time if show_esc: frame = raw(frame) file_like.write(frame) file_like.flush() except ValueError: # End of file. No biggie. return def escape_escape_seq(text, preserve_renditions=True, rstrip=True): """
[docs] Escapes escape sequences so they don't muck with the terminal viewing *text* Also replaces special characters with unicode symbol equivalents (e.g. so you can see what they are without having them do anything to your running shell) If *preserve_renditions* is True, CSI escape sequences for renditions will be preserved as-is (e.g. font color, background, etc). If *rstrip* is true, trailing escape sequences and whitespace will be removed. """ esc_sequence = re.compile( r'\x1b(.*\x1b\\|[ABCDEFGHIJKLMNOQRSTUVWXYZa-z0-9=]|[()# %*+].)') csi_sequence = re.compile(r'\x1B\[([?A-Za-z0-9;@:\!]*)([A-Za-z@_])') esc_rstrip = re.compile('[ \t]+\x1b.+$') #replacement_map = { #0: u'␀', #7: u'␇', #9: u'␉', #24: u'␘', #} out = u"" esc_buffer = u"" # If this seems confusing it is because text parsing is a black art! ARRR! for char in text: if not esc_buffer: if char == u'\x1b': esc_buffer = char #elif ord(char) in replacement_map: #out += replacement_map[ord(char)] else: out += raw(char) else: esc_buffer += char if char == u'\x07' or esc_buffer.endswith(u'\x1b\\'): # Likely title esc_buffer = u'' # Nobody wants to see your naked ESC sequence continue elif esc_buffer.endswith('\x1b\\'): esc_buffer = u'' # Ignore continue # Nobody wants to see plain ESC sequences in the buf... match_obj = esc_sequence.match(esc_buffer) if match_obj: seq_type = match_obj.group(1) esc_buffer = u'' # Just when you thought you've ESC'd... continue # CSI ESC sequences... These are worth a second look match_obj = csi_sequence.match(esc_buffer) if match_obj: csi_type = match_obj.group(2) if csi_type == 'm' and preserve_renditions: # mmmmmm! out += esc_buffer # Ooh, naked viewing of pretty things! esc_buffer = u'' # Make room for more! continue if rstrip: # Remove trailing whitespace + trailing ESC sequences return esc_rstrip.sub('', out) else: # All these trailers better make for a good movie return out def flatten_log(log_path, preserve_renditions=True, show_esc=False): """
[docs] Given a log file at *log_path*, return a list of log lines contained within. If *preserve_renditions* is True, CSI escape sequences for renditions will be preserved as-is (e.g. font color, background, etc). This is to make the output appear as close to how it was originally displayed as possible. Besides that, it looks really nice =) If *show_esc* is True, escape sequences and control characters will be visible in the output. Trailing whitespace and escape sequences will not be removed. NOTE: Converts our standard recording-based log format into something that can be used with grep and similar search/filter tools. """ import gzip lines = gzip.open(log_path).read().decode('utf-8') out = "" for frame in lines.split(SEPARATOR): try: frame_time = float(frame[:13]) # First 13 chars is the timestamp # Convert to datetime object frame_time = datetime.fromtimestamp(frame_time/1000) if '\n' in frame[14:]: # Skips the colon frame_lines = frame[14:].splitlines() for i, fl in enumerate(frame_lines): if len(fl): # NOTE: Have to put a rendition reset (\x1b[m) at the # start of each line in case the previous line didn't # reset it on its own. if show_esc: out += "%s %s\n" % ( # Standard Unix log format frame_time.strftime(u'\x1b[m%b %m %H:%M:%S'), raw(fl)) else: out += "%s %s\n" % ( # Standard Unix log format frame_time.strftime(u'\x1b[m%b %m %H:%M:%S'), escape_escape_seq(fl, rstrip=True) ) elif i:# Don't need this for the first empty line in a frame out += frame_time.strftime(u'\x1b[m%b %m %H:%M:%S \n') elif show_esc: if len(out) and out[-1] == '\n': out = u"%s%s\n" % (out[:-1], raw(frame[14:])) else: escaped_frame = escape_escape_seq(frame[14:], rstrip=False) if len(out) and out[-1] == '\n': out = u"%s%s\n" % (out[:-1], escaped_frame) elif escaped_frame: # This is pretty much always going to be the first line out += "%s %s\n" % ( # Standard Unix log format frame_time.strftime(u'\x1b[m%b %m %H:%M:%S'), escaped_frame ) except ValueError as e: pass # End of file. No biggie. return out if __name__ == "__main__": """Parse command line arguments and view the log in the specified format."""
usage = ('\t%prog [options] <log file>') parser = OptionParser(usage=usage, version=__version__) parser.disable_interspersed_args() parser.add_option("-f", "--flat", dest="flat", default=False, action="store_true", help="Display the log line-by-line in a syslog-like format." ) parser.add_option("-p", "--playback", dest="playback", default=True, action="store_false", help=("Play back the log in a video-like fashion. This is the default " "view.") ) parser.add_option("--pretty", dest="pretty", default=True, action="store_true", help=("Preserve font and character renditions when displaying the log " "in flat view (default).") ) parser.add_option("--raw", dest="raw", default=False, action="store_true", help="Display control characters and escape sequences when viewing." ) (options, args) = parser.parse_args() if len(args) < 1: print("ERROR: You must specify a log file to view.") parser.print_help() sys.exit(1) log_path = args[0] if options.flat: result = flatten_log( log_path, preserve_renditions=options.pretty, show_esc=options.raw) print(result) else: playback_log(log_path, sys.stdout)