Source code for paradrop.base.pdutils

###################################################################
# Copyright 2013-2015 All Rights Reserved
# Authors: The Paradrop Team
###################################################################

"""
lib.utils.output.
Helper for formatting output from Paradrop.
"""

import time
import json
import urllib

timeflt = lambda: time.time()
timeint = lambda: int(time.time())
timestr = lambda x=None: time.asctime(time.localtime(x)) if x else time.asctime()

# Short time string
stimestr = lambda x=None: time.strftime('%a %H:%M', time.localtime(x))


[docs]def timedur(x): """ Print consistent string format of seconds passed. Example: 300 = '5 mins' Example: 86400 = '1 day' Example: 86705 = '1 day, 5 mins, 5 sec' """ divs = [('days', 86400), ('hours', 3600), ('mins', 60)] x = float(x) res = [] for lbl, sec in divs: if(x >= sec): rm, x = divmod(x, float(sec)) # If exactly 1, remove plural of label if(rm == 1.0): res.append((lbl[:-1], int(rm))) else: res.append((lbl, int(rm))) # anything left over is seconds x = int(x) if(x == 1): res.append(("second", x)) elif(x == 0): pass else: res.append(("seconds", x)) return ", ".join(["%d %s" % (x[1], x[0]) for x in res])
[docs]def convertUnicode(elem): """Converts all unicode strings back into UTF-8 (str) so everything works. Call this function like: json.loads(s, object_hook=convertUnicode)""" if isinstance(elem, dict): return {convertUnicode(key): convertUnicode(value) for key, value in elem.iteritems()} elif isinstance(elem, list): return [convertUnicode(element) for element in elem] elif isinstance(elem, unicode): return elem.encode('utf-8') # DFW: Not sure if this has to be here, but deal with possible "null" MySQL strings elif(elem == 'null'): return None else: return elem
[docs]def urlEncodeMe(elem, safe=' '): """ Converts any values that would cause JSON parsing to fail into URL percent encoding equivalents. This function can be used for any valid JSON type including str, dict, list. Returns: Same element properly encoded. """ # What type am I? if isinstance(elem, dict): return {urlEncodeMe(key, safe): urlEncodeMe(value, safe) for key, value in elem.iteritems()} elif isinstance(elem, list): return [urlEncodeMe(element, safe) for element in elem] elif isinstance(elem, str): # Leave spaces alone, they are save to travel for JSON parsing return urllib.quote(elem, safe) else: return elem
[docs]def urlDecodeMe(elem): """ Converts any values that would cause JSON parsing to fail into URL percent encoding equivalents. This function can be used for any valid JSON type including str, dict, list. Returns: Same element properly decoded. """ # What type am I? if isinstance(elem, dict): return {urlDecodeMe(key): urlDecodeMe(value) for key, value in elem.iteritems()} elif isinstance(elem, list): return [urlDecodeMe(element) for element in elem] elif isinstance(elem, str): # Leave spaces alone, they are save to travel for JSON parsing return urllib.unquote(elem) else: return elem
[docs]def jsonPretty(j): """ Returns a string of a JSON object in 'pretty print' format fully indented, and sorted. """ return json.dumps(j, sort_keys=True, indent=4, separators=(',', ': '))
''' These two methods are outrageously slow. Example with 50k records in var 'logs': from paradrop.base.lib import pdutils import json with pdutils.Timer() as t: [pdutils.str2json(x) for x in logs] with pdutils.Timer() as t: [json.loads(x) for x in logs] >> time: 12814.471006 ms >> time: 0.007868 ms Vanilla, sanitized benches do not show the same performance issues. Not sure why, it might have to do with the content of the dictionaries or the complexity of the underlying data? They're still slower, but only by one or two magnitudes. '''
[docs]def json2str(j, safe=' '): """ Properly converts and encodes all data related to the JSON object into a string format that can be transmitted through a network and stored properly in a database. Arguments: @j : json to be converted @safe : optional, string of chars to pass to urlEncodeMe that are declared safe (don't encode) """ return json.dumps(urlEncodeMe(j, safe), separators=(',', ':'))
[docs]def str2json(s): t = json.loads(s, object_hook=convertUnicode) # If t is a list, object_hook was never called (by design of json.loads) # deal with that situation here if(isinstance(t, list)): t = [convertUnicode(i) for i in t] # Make sure to still decode any strings return urlDecodeMe(t)
[docs]class dict2obj(object): def __init__(self, aDict=None, **kwargs): if(aDict is not None): aDict.update(kwargs) else: self.__dict__.update(kwargs)
[docs]def check(pkt, pktType, keyMatches=None, **valMatches): """This function takes an object that was expected to come from a packet (after it has been JSONized) and compares it against the arg requirements so you don't have to have 10 if() statements to look for keys in a dict, etc.. Args: @pkt : object to look at @pktType : object type expected (dict, list, etc..) @keyMatches : a list of minimum keys found in parent level of dict, expected to be an array @valMatches : a dict of key:value pairs expected to be found in the parent level of dict the value can be data (like 5) OR a type (like this value must be a @list@). Returns: None if everything matches, otherwise it returns a string as to why it failed.""" # First check that the pkt type is equal to the input type if(type(pkt) is not pktType): return 'expected %s' % str(pktType) if(keyMatches): # Convert the keys to a set keyMatches = set(keyMatches) # The keyMatches is expected to be an array of the minimum keys we want to see in the pkt if the type is dict if(type(pkt) is dict): if(not keyMatches.issubset(pkt.keys())): return 'missing, "%s"' % ', '.join(list(keyMatches - set(pkt.keys()))) else: return None # Finally for anything in the valMatches find those values if(valMatches): # Pull out the dict object from the "valMatches" key if('valMatches' in valMatches.keys()): matchObj = valMatches['valMatches'] else: matchObj = valMatches for k, v in matchObj.iteritems(): # Check for the key if(k not in pkt.keys()): return 'key missing "%s"' % k # See how we should be comparing it: if(type(v) is type): if(type(pkt[k]) is not v): return 'key "%s", bad value type, "%s", expected "%s"' % (k, type(pkt[k]), v) else: # If key exists check value if(v != pkt[k]): return 'key "%s", bad value data, "%s", expected "%s"' % (k, pkt[k], v) return None
[docs]def explode(pkt, *args): """This function takes a dict object and explodes it into the tuple requested. It returns None for any value it doesn't find. The only error it throws is if args is not defined. Example: pkt = {'a':0, 'b':1} 0, 1, None = pdcomm.explode(pkt, 'a', 'b', 'c') """ if(not args): raise PDError('EXPLODE', 'args must be provided') # If there is an error make sure to return a tuple of the proper length if(not isinstance(pkt, dict)): return tuple([None] * len(args)) # Now just step through the args and pop off everything from the packet # If a key is missing, the pkt.get(a, None) returns None rather than raising an Exception return tuple([pkt.get(a, None) for a in args])
[docs]class Timer(object): ''' A timer object for simple benchmarking. Usage: with Timer(key='Name of this test') as t: do.someCode(thatTakes=aWhile) Once the code finishes executing the time is output. ''' def __init__(self, key="", verbose=True): self.verbose = verbose self.key = key def __enter__(self): self.start = time.time() return self def __exit__(self, *args): self.end = time.time() self.secs = self.end - self.start self.msecs = self.secs * 1000 # millisecs if self.verbose: print self.key + ' elapsed time: %f ms' % self.msecs