r1521@llin: dpavlin | 2007-11-04 14:34:03 +0100
authorDobrica Pavlinusic <dpavlin@rot13.org>
Sun, 4 Nov 2007 13:34:05 +0000 (13:34 +0000)
committerDobrica Pavlinusic <dpavlin@rot13.org>
Sun, 4 Nov 2007 13:34:05 +0000 (13:34 +0000)
 added configuration for PerlMonks XML dump indexing

git-svn-id: svn+ssh://mjesec/home/dpavlin/svn/webpac2/trunk@991 07558da8-63fa-0310-ba24-9fe276d99e06

conf/llin.yml
conf/mungle/perlmonks-xml.pl [new file with mode: 0755]
conf/normalize/perlmonks.pl [new file with mode: 0644]

index afab20b..1a78fc4 100644 (file)
@@ -368,8 +368,9 @@ databases:
       - name: xml
         module: 'WebPAC::Input::XML'
         path: '/home/dpavlin/monk-search/xml-dump'
+        mungle: 'conf/mungle/perlmonks-xml.pl'
         normalize:
-          path: 'conf/normalize/perlmonks.pl' 
+          path: 'conf/normalize/perlmonks.pl'
     output:
       - module: 'KinoSearch'
         path: 'var/kinosearch'
diff --git a/conf/mungle/perlmonks-xml.pl b/conf/mungle/perlmonks-xml.pl
new file mode 100755 (executable)
index 0000000..242b476
--- /dev/null
@@ -0,0 +1,14 @@
+# special mungle file which prepare data_structure from XML
+
+my $h = get_ds->{node};
+warn "## $0 hash to work on = ",dump( $h );
+
+set_ds(
+       Title =>        $h->{title},
+       Author =>       $h->{author}->{content},
+       Date =>         $h->{created},
+       Content =>      $h->{doctext}->{content},
+       Type =>         $h->{type}->{content},
+);
+
+
diff --git a/conf/normalize/perlmonks.pl b/conf/normalize/perlmonks.pl
new file mode 100644 (file)
index 0000000..168bf56
--- /dev/null
@@ -0,0 +1,9 @@
+
+search_display( $_, rec( $_ ) ) foreach ( qw/
+Title
+Author
+Date
+Content
+Type
+/ );
+