""" rawdog tag categorization plugin, by Decklin Foster . Version 0.3. May be redistributed under the same terms as rawdog itself. Rather than writing a single output file, read a "tags" option from each feed and create one category page for every value that appears there. On each such page, write the articles from any feed that included that value in its list of tags. The list of tags for a feed defaults to "misc" if not otherwise specified. This value may be changed with the "defaulttags" config option. The output filename must contain __cat__, which will be replaced with the name of the category (i.e., tag). TODO: - Select different articles within a feed for different tags, based on either regular expressions or tag/author metadata from the feed itself. - At the moment, there is a one-to-one mapping from tags (the things you set on entries) to categories (the files that get written out). There should be smarter/more flexible ways to do this. """ import os import rawdoglib.plugins from rawdoglib.rawdog import ConfigError, DayWriter from rawdoglib.rawdog import write_ascii, fill_template from StringIO import StringIO def startup(rawdog, config): """Save the user's value for maxarticles, but zero out the one that rawdog sees so that we get all the articles before sorting.""" global maxarticles maxarticles = config['maxarticles'] config['maxarticles'] = 0 def config_option(config, name, value): """If an option is something we handle, just let it go through into config, where we will pull it out later.""" if name in ('defaulttags'): config.config[name] = value return False else: return True def output_write_files(rawdog, config, articles, article_dates): """Write one file for each tag that we've seen. Takes all articles as input, and cuts each page down to maxarticles after sorting into categories.""" try: deftags = config['defaulttags'] except KeyError: deftags = 'misc' outfile = config['outputfile'] if not '__cat__' in outfile: raise ConfigError('outputfile must contain "__cat__"') class CatPage: def __init__(self, name): self.name = name self.buf = StringIO() self.dw = DayWriter(self.buf, config) self.feeds = {} self.n = 0 def add(self, art): if self.n < maxarticles: self.dw.time(article_dates[art]) rawdog.write_article(self.buf, art, config) self.n += 1 if self.n == 1: config.log('Started cat: %s' % self.name) if self.n == maxarticles: config.log('Finished cat: %s' % self.name) def get_bits(self): self.dw.close() return { 'cat': name, 'items': self.buf.getvalue(), 'num_items': str(self.n), } # XXX: these should deal with output_items_* hooks somehow cats = {} for url, feed in rawdog.feeds.items(): try: tags = feed.args['tags'].split(',') except KeyError: tags = [deftags] for t in tags: if not cats.has_key(t): cats[t] = CatPage(t) cats[t].feeds[url] = feed for art in articles: for name, page in cats.items(): if art.feed in page.feeds: page.add(art) savefeeds = rawdog.feeds for name, page in cats.items(): rawdog.feeds = page.feeds bits = rawdog.get_main_template_bits(config) rawdoglib.plugins.call_hook("output_bits", rawdog, config, bits) bits.update(page.get_bits()) config.log('Writing page for cat: %s' % name) fn = outfile.replace('__cat__', name) f = open(fn + '.new', 'w') s = fill_template(rawdog.get_template(config), bits) write_ascii(f, s, config) f.close() os.rename(fn + '.new', fn) rawdog.feeds = savefeeds return False rawdoglib.plugins.attach_hook('startup', startup) rawdoglib.plugins.attach_hook('config_option', config_option) rawdoglib.plugins.attach_hook('output_write_files', output_write_files)