diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b48a149 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*.pyc +*egg-info* diff --git a/README.md b/README.md new file mode 100644 index 0000000..a422fdc --- /dev/null +++ b/README.md @@ -0,0 +1,25 @@ +# activity-streams-python + +Activity Streams Parser for Python + + +## Installation + + git clone git@github.com:apparentlymart/activity-streams-python.git + cd activity-streams-python + sudo python setup.py install + +## Example + + from activitystreams.atom import make_activities_from_feed + import urllib2 + import xml.etree.ElementTree + + url = 'https://github.com/apparentlymart/activity-streams-python/commits/master.atom' + response = urllib2.urlopen(url) + contents = response.read() + xml_tree = xml.etree.ElementTree.fromstring(contents) + xml_tree.getroot = lambda: xml_tree + activities = make_activities_from_feed(xml_tree) + for activity in activities: + print activity.actor.name, activity.verb, activity.object.object_type, activity.object.url \ No newline at end of file diff --git a/activitystreams/__init__.py b/activitystreams/__init__.py index 1a75f2e..3d69371 100644 --- a/activitystreams/__init__.py +++ b/activitystreams/__init__.py @@ -1,4 +1,4 @@ - +from rfc3339 import rfc3339 class Activity(object): actor = None @@ -11,7 +11,7 @@ class Activity(object): service_provider = None links = None - def __init__(self, actor=None, object=None, target=None, verb=None, time=None, generator=None, icon_url=None, service_provider=None, links=None): + def __init__(self, actor = None, object = None, target = None, verb = None, time = None, generator = None, icon_url = None, service_provider = None, links = None): self.actor = actor self.object = object self.target = target @@ -26,12 +26,32 @@ def __init__(self, actor=None, object=None, target=None, verb=None, time=None, g else: self.links = [] + def to_json(self): + activity_dict = { + 'actor': self.actor, + 'object': self.object, + 'target': self.target, + 'verb': self.verb, + 'published': self.time, + 'service_provider': self.service_provider, + 'generator': self.generator, + 'icon_url': self.icon_url + } + return jsonify(activity_dict) + + +class PostActivity(Activity): + + pass + + class Object(object): id = None name = None url = None object_type = None summary = None + content = None image = None in_reply_to_object = None attached_objects = None @@ -42,12 +62,13 @@ class Object(object): downstream_duplicate_ids = None links = None - def __init__(self, id=None, name=None, url=None, object_type=None, summary=None, image=None, in_reply_to_object=None, attached_objects=None, reply_objects=None, reaction_activities=None, action_links=None, upstream_duplicate_ids=None, downstream_duplicate_ids=None, links=None): + def __init__(self, id = None, name = None, url = None, object_type = None, summary = None, content = None, image = None, in_reply_to_object = None, attached_objects = None, reply_objects = None, reaction_activities = None, action_links = None, upstream_duplicate_ids = None, downstream_duplicate_ids = None, links = None): self.id = id self.name = name self.url = url self.object_type = object_type self.summary = summary + self.content = content self.image = image self.in_reply_to_object = in_reply_to_object @@ -86,6 +107,119 @@ def __init__(self, id=None, name=None, url=None, object_type=None, summary=None, else: self.links = [] + def to_json(self): + object_dict = { + 'id': self.id, + 'displayName': self.name, + 'url': self.url, + 'object_type': self.object_type, + 'summary': self.summary, + 'content': self.content, + 'image': self.image, + 'attachments': None, + # 'in_reply_to_object': self.in_reply_to_object, + # 'reply_objects': self.reply_objects, + # 'reaction_activities': self.reaction_activites, + # 'action_links': self.action_links, + 'upstream_duplicate_ids': self.upstream_duplicate_ids, + 'downstream_duplicate_ids': self.downstream_duplicate_ids + # 'links': self.links, + } + if self.attached_objects: + attachments = [] + for obj in self.attached_objects: + attachments.append(obj.to_json()) + object_dict['attachments'] = attachments + return jsonify(object_dict) + + +class NoteObject(Object): + + def __init__(self, content, **kwargs): + Object.__init__(self, **kwargs) + self.content = content + + +class TicketObject(Object): + ''' + Proof of concept implemetation. Subject to changes. + ''' + ticket_key = None + ticket_summary = None + ticket_type = None + ticket_status = None + ticket_created = None + ticket_closed = None + ticket_description = None + ticket_scope = None + ticket_impact = None + ticket_problem_start = None + ticket_problem_end = None + ticket_maintenance_window_start = None + ticket_maintenance_window_end = None + ticket_update = None + ticket_affected_organisation = None + + def __init__(self, id = None, name = None, url = None, object_type = None, + summary = None, image = None, in_reply_to_object = None, + attached_objects = None, reply_objects = None, + reaction_activities = None, action_links = None, + upstream_duplicate_ids = None, downstream_duplicate_ids = None, + links = None, ticket_key = None, ticket_summary = None, + ticket_type = None, ticket_status = None, ticket_created = None, + ticket_closed = None, ticket_description = None, ticket_scope = None, + ticket_impact = None, ticket_problem_start = None, + ticket_problem_end = None, ticket_maintenance_window_start = None, + ticket_maintenance_window_end = None, ticket_update = None, + ticket_affected_organisations = None): + super(TicketObject, self).__init__(id, name, url, object_type, summary, image, in_reply_to_object, attached_objects, reply_objects, reaction_activities, action_links, upstream_duplicate_ids, downstream_duplicate_ids, links) + self.ticket_key = ticket_key + self.ticket_summary = ticket_summary + self.ticket_type = ticket_type + self.ticket_status = ticket_status + self.ticket_created = ticket_created + self.ticket_closed = ticket_closed + self.ticket_description = ticket_description + self.ticket_scope = ticket_scope + self.ticket_impact = ticket_impact + self.ticket_problem_start = ticket_problem_start + self.ticket_problem_end = ticket_problem_end + self.ticket_maintenance_window_start = ticket_maintenance_window_start + self.ticket_maintenance_window_end = ticket_maintenance_window_end + self.ticket_update = ticket_update + self.ticket_affected_organisations = ticket_affected_organisations + + if ticket_affected_organisations is not None: + self.ticket_affected_organisations = ticket_affected_organisations + else: + self.ticket_affected_organisations = [] + + def to_json(self): + object_dict = super(TicketObject, self).to_json() + object_dict['ticket_key'] = self.ticket_key + object_dict['ticket_summary'] = self.ticket_summary + object_dict['ticket_type'] = self.ticket_type + object_dict['ticket_status'] = self.ticket_status + if self.ticket_created: + object_dict['ticket_created'] = rfc3339(self.ticket_created) + if self.ticket_closed: + object_dict['ticket_closed'] = rfc3339(self.ticket_closed) + object_dict['ticket_description'] = self.ticket_description + object_dict['ticket_scope'] = self.ticket_scope + object_dict['ticket_impact'] = self.ticket_impact + if self.ticket_problem_start: + object_dict['ticket_problem_start'] = rfc3339(self.ticket_problem_start) + if self.ticket_problem_end: + object_dict['ticket_problem_end'] = rfc3339(self.ticket_problem_end) + if self.ticket_maintenance_window_start: + object_dict['ticket_maintenance_window_start'] = rfc3339(self.ticket_maintenance_window_start) + if self.ticket_maintenance_window_end: + object_dict['ticket_maintenance_window_end'] = rfc3339(self.ticket_maintenance_window_end) + object_dict['ticket_update'] = self.ticket_update + object_dict['ticket_affected_organisations'] = [] + for obj in self.ticket_affected_organisations: + object_dict['ticket_affected_organisations'].append(obj) + return jsonify(object_dict) class MediaLink(object): url = None @@ -94,19 +228,29 @@ class MediaLink(object): height = None duration = None - def __init__(self, url=None, media_type=None, width=None, height=None, duration=None): + def __init__(self, url = None, media_type = None, width = None, height = None, duration = None): self.url = url self.media_type = media_type self.width = width self.height = height self.duration = duration + def to_json(self): + medialink_dict = { + 'url': self.url, + 'media_type': self.media_type, + 'width': self.width, + 'height': self.height, + 'duration': self.duration + } + return medialink_dict + class ActionLink(object): url = None caption = None - def __init__(self, url=None, caption=None): + def __init__(self, url = None, caption = None): self.url = url self.caption = caption @@ -116,12 +260,23 @@ class Link(object): media_type = None rel = None - def __init__(self, url=None, media_type=None, rel=None): + def __init__(self, url = None, media_type = None, rel = None): self.url = url self.media_type = media_type self.rel = rel - - - +def jsonify(dictionary): + classes = ['actor', 'generator', 'object', 'provider', 'target', 'author' + 'image'] + datetimes = ['published', 'updated'] + for d in datetimes: + if d in dictionary.keys() and dictionary[d]: + dictionary[d] = rfc3339(dictionary[d]) + for c in classes: + if c in dictionary.keys() and dictionary[c]: + dictionary[c] = dictionary[c].to_json() + for k, v in dictionary.items(): + if v == []: + dictionary[k] = None + return dictionary diff --git a/activitystreams/atom.py b/activitystreams/atom.py index 46055fd..830d33f 100644 --- a/activitystreams/atom.py +++ b/activitystreams/atom.py @@ -1,15 +1,20 @@ +import logging - -from activitystreams import Activity, Object, MediaLink, ActionLink, Link +from activitystreams import Activity, Object, MediaLink, ActionLink, Link, NoteObject, PostActivity import re import datetime import time +from HTMLParser import HTMLParser class AtomActivity(Activity): - pass + + def __new__(self, *args, **kwargs): + if kwargs['verb'] == POST_VERB: + return PostActivity(*args, **kwargs) + return Activity.__new__(self, *args, **kwargs) # This is a weird enum-like thing. @@ -53,6 +58,8 @@ def __repr__(self): MEDIA_HEIGHT = MEDIA_PREFIX + "height" MEDIA_DURATION = MEDIA_PREFIX + "duration" MEDIA_DESCRIPTION = MEDIA_PREFIX + "description" +PERSON_OBJECT = "http://activitystrea.ms/schema/1.0/person" +NOTE_OBJECT = "http://activitystrea.ms/schema/1.0/note" def make_activities_from_feed(et): @@ -68,6 +75,7 @@ def make_activities_from_feed(et): def make_activities_from_entry(entry_elem, feed_elem): + object_elems = entry_elem.findall(ACTIVITY_OBJECT) activity_is_implied = False @@ -112,7 +120,7 @@ def make_activities_from_entry(entry_elem, feed_elem): target = make_object_from_elem(target_elem, feed_elem, ObjectParseMode.ACTIVITY_OBJECT) actor = None - if author_elem: + if author_elem is not None: actor = make_object_from_elem(author_elem, feed_elem, ObjectParseMode.ATOM_AUTHOR) activities = [] @@ -122,7 +130,7 @@ def make_activities_from_entry(entry_elem, feed_elem): else: object = make_object_from_elem(object_elem, feed_elem, ObjectParseMode.ACTIVITY_OBJECT) - activity = Activity(object=object, actor=actor, target=target, verb=verb, time=published_datetime, icon_url=icon_url) + activity = AtomActivity(object = object, actor = actor, target = target, verb = verb, time = published_datetime, icon_url = icon_url) activities.append(activity) return activities @@ -140,6 +148,19 @@ def make_object_from_elem(object_elem, feed_elem, mode): if summary_elem is not None: summary = summary_elem.text + content = None + content_elem = object_elem.find(ATOM_CONTENT) + if content_elem is not None: + # may be interpre this later + if content_elem.attrib['type'] == 'html': + + # ugly fix + content = HTMLParser().unescape(content_elem.text) + + #content = content_elem.text + else: + logging.warn('unexpected activity object type %s' % content_elem.attrib['type']) + name_tag_name = ATOM_TITLE # The ATOM_AUTHOR parsing mode looks in atom:name instead of atom:title if mode == ObjectParseMode.ATOM_AUTHOR: @@ -160,7 +181,7 @@ def make_object_from_elem(object_elem, feed_elem, mode): if rel == "preview": if type is None or type == "image/jpeg" or type == "image/gif" or type == "image/png": # FIXME: Should pull out the width/height/duration attributes from AtomMedia too. - image = MediaLink(url=link_elem.get("href")) + image = MediaLink(url = link_elem.get("href")) # In the atom:author parse mode we fall back on atom:uri if there's no link rel="alternate" if url is None and mode == ObjectParseMode.ATOM_AUTHOR: @@ -173,7 +194,21 @@ def make_object_from_elem(object_elem, feed_elem, mode): if object_type_elem is not None: object_type = object_type_elem.text - return Object(id=id, name=name, url=url, object_type=object_type, image=image, summary=summary) + object_params = { + 'id': id, + 'name': name, + 'url': url, + 'object_type': object_type, + 'image': image, + 'summary': summary, + 'content': content + } + + if object_type == PERSON_OBJECT: + pass + elif object_type == NOTE_OBJECT: + return NoteObject(**object_params)#content = content, + return Object(**object_params) # This is pilfered from Universal Feed Parser. @@ -249,7 +284,7 @@ def __extract_tzd(m): minutes = int(minutes) else: minutes = 0 - offset = (hours*60 + minutes) * 60 + offset = (hours * 60 + minutes) * 60 if tzd[0] == '+': return -offset return offset diff --git a/activitystreams/rfc3339.py b/activitystreams/rfc3339.py new file mode 100644 index 0000000..27cf8ff --- /dev/null +++ b/activitystreams/rfc3339.py @@ -0,0 +1,285 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python +# +# Copyright (c) 2009, 2010, Henry Precheur +# +# Permission to use, copy, modify, and/or distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH +# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND +# FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, +# INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM +# LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR +# OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +# PERFORMANCE OF THIS SOFTWARE. +# +'''Formats dates according to the :RFC:`3339`. + +Report bugs & problems on BitBucket_ + +.. _BitBucket: https://bitbucket.org/henry/clan.cx/issues +''' + +__author__ = 'Henry Precheur ' +__license__ = 'ISCL' +__version__ = '5.1' +__all__ = ('rfc3339', ) + +import datetime +import time +import unittest + +def _timezone(utc_offset): + ''' + Return a string representing the timezone offset. + + >>> _timezone(0) + '+00:00' + >>> _timezone(3600) + '+01:00' + >>> _timezone(-28800) + '-08:00' + >>> _timezone(-1800) + '-00:30' + ''' + # Python's division uses floor(), not round() like in other languages: + # -1 / 2 == -1 and not -1 / 2 == 0 + # That's why we use abs(utc_offset). + hours = abs(utc_offset) // 3600 + minutes = abs(utc_offset) % 3600 // 60 + return '%c%02d:%02d' % ('-' if utc_offset < 0 else '+', hours, minutes) + +def _timedelta_to_seconds(timedelta): + ''' + >>> _timedelta_to_seconds(datetime.timedelta(hours=3)) + 10800 + >>> _timedelta_to_seconds(datetime.timedelta(hours=3, minutes=15)) + 11700 + ''' + return (timedelta.days * 86400 + timedelta.seconds + + timedelta.microseconds // 1000) + +def _utc_offset(date, use_system_timezone): + ''' + Return the UTC offset of `date`. If `date` does not have any `tzinfo`, use + the timezone informations stored locally on the system. + + >>> if time.localtime().tm_isdst: + ... system_timezone = -time.altzone + ... else: + ... system_timezone = -time.timezone + >>> _utc_offset(datetime.datetime.now(), True) == system_timezone + True + >>> _utc_offset(datetime.datetime.now(), False) + 0 + ''' + if isinstance(date, datetime.datetime) and date.tzinfo is not None: + return _timedelta_to_seconds(date.dst() or date.utcoffset()) + elif use_system_timezone: + if date.year < 1970: + # We use 1972 because 1970 doesn't have a leap day (feb 29) + t = time.mktime(date.replace(year=1972).timetuple()) + else: + t = time.mktime(date.timetuple()) + if time.localtime(t).tm_isdst: # pragma: no cover + return -time.altzone + else: + return -time.timezone + else: + return 0 + +def _string(d, timezone): + return ('%04d-%02d-%02dT%02d:%02d:%02d%s' % + (d.year, d.month, d.day, d.hour, d.minute, d.second, timezone)) + +def rfc3339(date, utc=False, use_system_timezone=True): + ''' + Return a string formatted according to the :RFC:`3339`. If called with + `utc=True`, it normalizes `date` to the UTC date. If `date` does not have + any timezone information, uses the local timezone:: + + >>> d = datetime.datetime(2008, 4, 2, 20) + >>> rfc3339(d, utc=True, use_system_timezone=False) + '2008-04-02T20:00:00Z' + >>> rfc3339(d) # doctest: +ELLIPSIS + '2008-04-02T20:00:00...' + + If called with `user_system_timezone=False` don't use the local timezone if + `date` does not have timezone informations and consider the offset to UTC + to be zero:: + + >>> rfc3339(d, use_system_timezone=False) + '2008-04-02T20:00:00+00:00' + + `date` must be a `datetime.datetime`, `datetime.date` or a timestamp as + returned by `time.time()`:: + + >>> rfc3339(0, utc=True, use_system_timezone=False) + '1970-01-01T00:00:00Z' + >>> rfc3339(datetime.date(2008, 9, 6), utc=True, + ... use_system_timezone=False) + '2008-09-06T00:00:00Z' + >>> rfc3339(datetime.date(2008, 9, 6), + ... use_system_timezone=False) + '2008-09-06T00:00:00+00:00' + >>> rfc3339('foo bar') + Traceback (most recent call last): + ... + TypeError: Expected timestamp or date object. Got . + + For dates before January 1st 1970, the timezones will be the ones used in + 1970. It might not be accurate, but on most sytem there is no timezone + information before 1970. + ''' + # Try to convert timestamp to datetime + try: + if use_system_timezone: + date = datetime.datetime.fromtimestamp(date) + else: + date = datetime.datetime.utcfromtimestamp(date) + except TypeError: + pass + + if not isinstance(date, datetime.date): + raise TypeError('Expected timestamp or date object. Got %r.' % + type(date)) + + if not isinstance(date, datetime.datetime): + date = datetime.datetime(*date.timetuple()[:3]) + utc_offset = _utc_offset(date, use_system_timezone) + if utc: + return _string(date + datetime.timedelta(seconds=utc_offset), 'Z') + else: + return _string(date, _timezone(utc_offset)) + + +class LocalTimeTestCase(unittest.TestCase): + ''' + Test the use of the timezone saved locally. Since it is hard to test using + doctest. + ''' + + def setUp(self): + local_utcoffset = _utc_offset(datetime.datetime.now(), True) + self.local_utcoffset = datetime.timedelta(seconds=local_utcoffset) + self.local_timezone = _timezone(local_utcoffset) + + def test_datetime(self): + d = datetime.datetime.now() + self.assertEqual(rfc3339(d), + d.strftime('%Y-%m-%dT%H:%M:%S') + self.local_timezone) + + def test_datetime_timezone(self): + + class FixedNoDst(datetime.tzinfo): + 'A timezone info with fixed offset, not DST' + + def utcoffset(self, dt): + return datetime.timedelta(hours=2, minutes=30) + + def dst(self, dt): + return None + + fixed_no_dst = FixedNoDst() + + class Fixed(FixedNoDst): + 'A timezone info with DST' + + def dst(self, dt): + return datetime.timedelta(hours=3, minutes=15) + + fixed = Fixed() + + d = datetime.datetime.now().replace(tzinfo=fixed_no_dst) + timezone = _timezone(_timedelta_to_seconds(fixed_no_dst.\ + utcoffset(None))) + self.assertEqual(rfc3339(d), + d.strftime('%Y-%m-%dT%H:%M:%S') + timezone) + + d = datetime.datetime.now().replace(tzinfo=fixed) + timezone = _timezone(_timedelta_to_seconds(fixed.dst(None))) + self.assertEqual(rfc3339(d), + d.strftime('%Y-%m-%dT%H:%M:%S') + timezone) + + def test_datetime_utc(self): + d = datetime.datetime.now() + d_utc = d + self.local_utcoffset + self.assertEqual(rfc3339(d, utc=True), + d_utc.strftime('%Y-%m-%dT%H:%M:%SZ')) + + def test_date(self): + d = datetime.date.today() + self.assertEqual(rfc3339(d), + d.strftime('%Y-%m-%dT%H:%M:%S') + self.local_timezone) + + def test_date_utc(self): + d = datetime.date.today() + # Convert `date` to `datetime`, since `date` ignores seconds and hours + # in timedeltas: + # >>> datetime.date(2008, 9, 7) + datetime.timedelta(hours=23) + # datetime.date(2008, 9, 7) + d_utc = datetime.datetime(*d.timetuple()[:3]) + self.local_utcoffset + self.assertEqual(rfc3339(d, utc=True), + d_utc.strftime('%Y-%m-%dT%H:%M:%SZ')) + + def test_timestamp(self): + d = time.time() + self.assertEqual(rfc3339(d), + datetime.datetime.fromtimestamp(d).\ + strftime('%Y-%m-%dT%H:%M:%S') + self.local_timezone) + + def test_timestamp_utc(self): + d = time.time() + d_utc = datetime.datetime.utcfromtimestamp(d) + self.local_utcoffset + self.assertEqual(rfc3339(d), + (d_utc.strftime('%Y-%m-%dT%H:%M:%S') + + self.local_timezone)) + + def test_before_1970(self): + d = datetime.date(1885, 01, 04) + self.assertEqual(rfc3339(d), + '1885-01-04T00:00:00' + self.local_timezone) + self.assertEqual(rfc3339(d, utc=True, use_system_timezone=False), + '1885-01-04T00:00:00Z') + + def test_1920(self): + d = datetime.date(1920, 02, 29) + self.assertEqual(rfc3339(d, utc=False, use_system_timezone=True), + '1920-02-29T00:00:00' + self.local_timezone) + + # If these tests start failing it probably means there was a policy change + # for the Pacific time zone. + # See http://en.wikipedia.org/wiki/Pacific_Time_Zone. + if 'PST' in time.tzname: + def testPDTChange(self): + '''Test Daylight saving change''' + # PDT switch happens at 2AM on March 14, 2010 + + # 1:59AM PST + self.assertEqual(rfc3339(datetime.datetime(2010, 3, 14, 1, 59)), + '2010-03-14T01:59:00-08:00') + # 3AM PDT + self.assertEqual(rfc3339(datetime.datetime(2010, 3, 14, 3, 0)), + '2010-03-14T03:00:00-07:00') + + def testPSTChange(self): + '''Test Standard time change''' + # PST switch happens at 2AM on November 6, 2010 + + # 0:59AM PDT + self.assertEqual(rfc3339(datetime.datetime(2010, 11, 7, 0, 59)), + '2010-11-07T00:59:00-07:00') + + # 1:00AM PST + # There's no way to have 1:00AM PST without a proper tzinfo + self.assertEqual(rfc3339(datetime.datetime(2010, 11, 7, 1, 0)), + '2010-11-07T01:00:00-07:00') + + +if __name__ == '__main__': # pragma: no cover + import doctest + doctest.testmod() + unittest.main() + diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..40c6fc9 --- /dev/null +++ b/setup.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python + +import os +try: + from setuptools import setup +except ImportError: + import distribute_setup + distribute_setup.use_setuptools() + from setuptools import setup + +def read(fname): + return open(os.path.join(os.path.dirname(__file__), fname)).read() + + +setup( + name = 'activitystreams', + version = '0.0', + description = 'Activity Streams Parser for Python', + author = 'Martin Atkins', + license = read('LICENSE'), + url = 'https://github.com/apparentlymart/activity-streams-python', + packages = ['activitystreams'], +)