## Babble/DataSource/FlatFile.pm
## Copyright (C) 2004 Gergely Nagy <algernon@bonehunter.rulez.org>
##
## This file is part of Babble.
##
## Babble is free software; you can redistribute it and/or modify it
## under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## Babble is distributed in the hope that it will be useful, but WITHOUT
## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
## FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
## for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program; if not, write to the Free Software
## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

package Babble::DataSource::FlatFile;

use strict;
use Carp;

use Babble::Document;
use Babble::Document::Collection;
use Babble::DataSource;

use File::Find;
use File::stat;
use File::Basename;
use Date::Manip;

use Exporter ();
use vars qw(@ISA);
@ISA = qw(Babble::DataSource);


=pod

Babble::DataSource::FlatFile - Flat file source fetcher for Babble

=head1 SYNOPSIS

 use Babble;
 use Babble::DataSource::FlatFile;

 my $babble = Babble->new ();
 $babble->add_sources (
	Babble::DataSource::FLatFile->new (
		-data_dir => "/home/me/blog/data",
		-extension => "\.blog",
		-permalink_base => 'http://example.org/~me/blog/'
	)
 );
 ...

=head1 DESCRIPTION

Babble::DataSource::FlatFile implements a Babble data source class
that fetches documents directly from the filesystem.

=head1 METHODS

=over 4

=cut

my $permalink_blosxom = sub {
	my ($base, $file, $date, $ext) = @_;
	my $anchor = basename ($file);

	$anchor =~ s/$ext$//g;

	return $base . UnixDate (ParseDate ($date),
				 "%Y/%m/%d/#") . $anchor;
};

=pod

=item I<new>(B<%params>)

This method creates a new object. The recognised arguments are
I<-data_dir>, which specifies the directory where documents should be
collected from; I<-extension>, a regexp that will be used to find out
which files are documents, and which are not (defaults to C<\.txt>);
I<-permalink_base>, the base URL for the collection (used by the
permanent link generator, see later); and I<-permalink_gen>, a code
reference that is used to generate links to documents.

The method specified in I<-permalink_gen> takes four arguments:
I<base>, I<file>, I<date> and I<ext>. Base is what we specified using
I<-permalink_base>, file is the full path to the filename we're
currently operating on, date is its submission date, and ext is its
extension.

=cut

sub new {
	my $type = shift;
	my $class = ref ($type) || $type;
	my $self = $class->SUPER::new (@_);

	croak "$type->new() called without -data_dir argument"
		unless $self->{-data_dir};
	$self->{-extension} = "\.txt" unless $self->{-extension};
	$self->{-permalink_gen} = \&$permalink_blosxom
		unless $self->{-permalink_gen};

	bless $self, $type;
}

=pod

=item I<collect>()

This function finds all the files in the data directory (recursively)
which have the specified extension, and makes a Babble::Document out
of them. The title is the first line of the file, the date is its
modification time, subject is the subdirectory under which it was
found (or main, if it was in the top-level directory), author is the
user owning the file, content is the file content, save the first
line.

It's id property contains a pointer to the entry (eg, to one's
weblog). This is generated by the I<$source-E<gt>{permalink_gen}>
function, explained above.

For the Babble::Document::Collection object to return, some
information will be gathered from the Babble object which calls this
method, or from the parameters passed to us. Namely, the
I<meta_title>, I<meta_desc>, I<meta_link>, I<meta_owner>,
I<meta_owner_email>, I<meta_subject> and I<meta_feed_link> keys will
be used, if present.

=cut

sub collect($) {
	my ($self, $babble) = @_;
	my @files = ();
	my $collection;
	my %args;

	foreach ("meta_title", "meta_desc", "meta_link", "meta_owner_email",
		 "meta_subject", "meta_feed_link", "meta_owner") {
		$args{$_} = $babble->{Params}->{$_};

		$args{$_} = $self->{$_} if (defined $self->{$_});
		$args{$_} = "" if (!$args{$_});
	}

	find ({wanted => sub {
		       /$self->{-extension}$/ &&
		       push (@files, $File::Find::name);
	       }}, $self->{-data_dir});

	$collection = Babble::Document::Collection->new (
		title => $args{meta_title},
		link => $args{meta_feed_link},
		id => $args{meta_link},
		author => $args{meta_owner} || $args{meta_owner_email},
		content => $args{meta_desc},
		date => ParseDate ("today"),
		subject => $args{meta_subject}
	);

	foreach my $file (@files) {
		my ($title, $subject, $date, $link);
		my @content;
		my $doc;

		open (INF, "<" . $file);
		$title = <INF>;
		chomp ($title);
		@content = <INF>;
		close (INF);

		$subject = dirname ($file);
		$subject =~ s/^$self->{-data_dir}/./;
		$subject =~ s{^\./?}{};
		$subject = "main" unless $subject;

		$date = gmtime (stat ($file)->mtime);
		$link = $self->{-permalink_gen} ($self->{-permalink_base},
						 $file, $date,
						 $self->{-extension});

		$doc = Babble::Document->new (
			title => $title,
			id => $link,
			content => join ("", @content),
			subject => $subject,
			date => ParseDate ($date),
			author => getpwuid (stat ($file)->uid),
		);

		push (@{$collection->{documents}}, $doc);
	}

	return $collection;
}

=pod

=back

=head1 AUTHOR

Gergely Nagy, algernon@bonehunter.rulez.org

Bugs should be reported at L<http://bugs.bonehunter.rulez.org/babble>.

=head1 SEE ALSO

Babble::Document, Babble::Document::Collection,
Babble::DataSource

=cut

1;

# arch-tag: 13b638e3-a4e5-4b81-a924-dc931cd25ded
