koha-cvs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Koha-cvs] CVS: koha/C4 SimpleMarc.pm,NONE,1.1.2.1


From: Alan Millar
Subject: [Koha-cvs] CVS: koha/C4 SimpleMarc.pm,NONE,1.1.2.1
Date: Wed, 26 Jun 2002 00:27:37 -0700

Update of /cvsroot/koha/koha/C4
In directory usw-pr-cvs1:/tmp/cvs-serv21384/C4

Added Files:
      Tag: rel-1-2
        SimpleMarc.pm 
Log Message:
Moved acqui.simple MARC handling to new module SimpleMarc.pm

--- NEW FILE ---
#!/usr/bin/perl

# $Id: SimpleMarc.pm,v 1.1.2.1 2002/06/26 07:27:35 amillar Exp $

package C4::SimpleMarc;

# Routines for handling import of MARC data into Koha db

# Koha library project  www.koha.org

# Licensed under the GPL

use strict;

# standard or CPAN modules used
use DBI;

# Koha modules used
use C4::Database;

require Exporter;

use warnings;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);

# set the version for version checking
$VERSION = 0.01;

@ISA = qw(Exporter);
@EXPORT = qw(
        &extractmarcfields 
        &parsemarcfileformat 
        %tagtext
        %tagmap
);
%EXPORT_TAGS = ( );     # eg: TAG => [ qw!name1 name2! ],

# your exported package globals go here,
# as well as any optionally exported functions

@EXPORT_OK   = qw(
        %tagtext
        %tagmap
);

# non-exported package globals go here
use vars qw(@more $stuff);

# initalize package globals, first exported ones

my $Var1   = '';
my %Hashit = ();

# then the others (which are still accessible as $Some::Module::stuff)
my $stuff  = '';
my @more   = ();

# all file-scoped lexicals must be created before
# the functions below that use them.

# file-private lexicals go here
my $priv_var    = '';
my %secret_hash = ();

# here's a file-private function as a closure,
# callable as &$priv_func;  it cannot be prototyped.
my $priv_func = sub {
  # stuff goes here.
  };
  
# make all your functions, whether exported or not;
#------------------------------------------------

#------------------
# Constants

my %tagtext = (
    '001' => 'Control number',
    '003' => 'Control number identifier',
    '005' => 'Date and time of latest transaction',
    '006' => 'Fixed-length data elements -- additional material 
characteristics',
    '007' => 'Physical description fixed field',
    '008' => 'Fixed length data elements',
    '010' => 'LCCN',
    '015' => 'LCCN Cdn',
    '020' => 'ISBN',
    '022' => 'ISSN',
    '037' => 'Source of acquisition',
    '040' => 'Cataloging source',
    '041' => 'Language code',
    '043' => 'Geographic area code',
    '050' => 'Library of Congress call number',
    '060' => 'National Library of Medicine call number',
    '082' => 'Dewey decimal call number',
    '100' => 'Main entry -- Personal name',
    '110' => 'Main entry -- Corporate name',
    '130' => 'Main entry -- Uniform title',
    '240' => 'Uniform title',
    '245' => 'Title statement',
    '246' => 'Varying form of title',
    '250' => 'Edition statement',
    '256' => 'Computer file characteristics',
    '260' => 'Publication, distribution, etc.',
    '263' => 'Projected publication date',
    '300' => 'Physical description',
    '306' => 'Playing time',
    '440' => 'Series statement / Added entry -- Title',
    '490' => 'Series statement',
    '500' => 'General note',
    '504' => 'Bibliography, etc. note',
    '505' => 'Formatted contents note',
    '508' => 'Creation/production credits note',
    '510' => 'Citation/references note',
    '511' => 'Participant or performer note',
    '520' => 'Summary, etc. note',
    '521' => 'Target audience note (ie age)',
    '530' => 'Additional physical form available note',
    '538' => 'System details note',
    '586' => 'Awards note',
    '600' => 'Subject added entry -- Personal name',
    '610' => 'Subject added entry -- Corporate name',
    '650' => 'Subject added entry -- Topical term',
    '651' => 'Subject added entry -- Geographic name',
    '656' => 'Index term -- Occupation',
    '700' => 'Added entry -- Personal name',
    '710' => 'Added entry -- Corporate name',
    '730' => 'Added entry -- Uniform title',
    '740' => 'Added entry -- Uncontrolled related/analytical title',
    '800' => 'Series added entry -- Personal name',
    '830' => 'Series added entry -- Uniform title',
    '852' => 'Location',
    '856' => 'Electronic location and access',
);

# tag, subfield, field name, repeats, striptrailingchars
my %tagmap=(
    '010'=>{'a'=>{name=> 'lccn',        rpt=>0, striptrail=>' '         }},
    '015'=>{'a'=>{name=> 'lccn',        rpt=>0  }},
    '020'=>{'a'=>{name=> 'isbn',        rpt=>0  }},
    '022'=>{'a'=>{name=> 'issn',        rpt=>0  }},
    '082'=>{'a'=>{name=> 'dewey',       rpt=>0  }},
    '100'=>{'a'=>{name=> 'author',      rpt=>0, striptrail=>',:;/-'     }},
    '245'=>{'a'=>{name=> 'title',       rpt=>0, striptrail=>',:;/'      },
            'b'=>{name=> 'subtitle',    rpt=>0, striptrail=>',:;/'      }},
    '260'=>{'a'=>{name=> 'place',       rpt=>0, striptrail=>',:;/-'     },
            'b'=>{name=> 'publisher',   rpt=>0, striptrail=>',:;/-'     },
            'c'=>{name=> 'year' ,       rpt=>0, striptrail=>'.,:;/-'    }},
    '300'=>{'a'=>{name=> 'pages',       rpt=>0, striptrail=>',:;/-'     },
            'c'=>{name=> 'size',        rpt=>0, striptrail=>',:;/-'     }},
    '362'=>{'a'=>{name=> 'volume-number',       rpt=>0  }},
    '440'=>{'a'=>{name=> 'seriestitle', rpt=>0, striptrail=>',:;/'      },
            'v'=>{name=> 'volume-number',rpt=>0 }},
    '490'=>{'a'=>{name=> 'seriestitle', rpt=>0, striptrail=>',:;/'      },
            'v'=>{name=> 'volume-number',rpt=>0 }},
    '700'=>{'a'=>{name=> 'addtional-author-illus',rpt=>1, striptrail=>',:;/'    
}},
    '5xx'=>{'a'=>{name=> 'notes',       rpt=>1  }},
    '65x'=>{'a'=>{name=> 'subject',     rpt=>1, striptrail=>'.,:;/-'    }},
);


#------------------
sub extractmarcfields {
    use strict;
    # input
    my (
        $record,        # pointer to list of MARC field hashes.
                        # Example: $record->[0]->{'tag'} = '100' # Author
                        #       $record->[0]->{'subfields'}->{'a'} = 
subfieldvalue
    )address@hidden;

    # return 
    my $bib;            # pointer to hash of named output fields
                        # Example: $bib->{'author'} = "Twain, Mark";

    my $debug=0;

    my (
        $field,         # hash ref
        $value, 
        $subfield,      # Marc subfield [a-z]
        $fieldname,     # name of field "author", "title", etc.
        $strip,         # chars to remove from end of field
        $stripregex,    # reg exp pattern
    );
    my ($lccn, $isbn, $issn,    
        $publicationyear, @subjects, $subject,
        $controlnumber, 
        $notes, $additionalauthors, $illustrator, $copyrightdate, 
        $s, $subdivision, $subjectsubfield,
    );

    print "<PRE>\n" if $debug;

    if ( ref($record) eq "ARRAY" ) {
        foreach $field (@$record) {

            # Check each subfield in field
            foreach $subfield ( keys %{$field->{subfields}} ) {
                # see if it is defined in our Marc to koha mapping table
                if ( $fieldname=$tagmap{ $field->{'tag'} }->{$subfield}->{name} 
) {
                    # Yes, so keep the value
                    if ( ref($field->{'subfields'}->{$subfield} ) eq 'ARRAY' ) {
                        # if it was an array, just keep first element.
                        
$bib->{$fieldname}=$field->{'subfields'}->{$subfield}[0];
                    } else {
                        $bib->{$fieldname}=$field->{'subfields'}->{$subfield};
                    } # if array
                    print "$field->{'tag'} $subfield 
$fieldname=$bib->{$fieldname}\n" if $debug;
                    # see if this field should have trailing chars dropped
                    if ($strip=$tagmap{ $field->{'tag'} 
}->{$subfield}->{striptrail} ) {
                        $strip=~s//\\/; # backquote each char
                        $stripregex='[ ' . $strip . ']+$';  # remove trailing 
spaces also
                        $bib->{$fieldname}=~s/$stripregex//;
                        # also strip leading spaces
                        $bib->{$fieldname}=~s/^ +//;
                    } # if strip
                    print "Found subfield $field->{'tag'} $subfield " .
                        "$fieldname = $bib->{$fieldname}\n" if $debug;
                } # if tagmap exists

            } # foreach subfield


            if ($field->{'tag'} eq '001') {
                $bib->{controlnumber}=$field->{'indicator'};
            }
            if ($field->{'tag'} eq '015') {
                $bib->{lccn}=$field->{'subfields'}->{'a'};
                $bib->{lccn}=~s/^\s*//;
                $bib->{lccn}=~s/^C//;
                ($bib->{lccn}) = (split(/\s+/, $bib->{lccn}))[0];
            }


                if ($field->{'tag'} eq '260') {

                    $publicationyear=$field->{'subfields'}->{'c'};
                    if ($publicationyear=~/c(\d\d\d\d)/) {
                        $copyrightdate=$1;
                    }
                    if ($publicationyear=~/[^c](\d\d\d\d)/) {
                        $publicationyear=$1;
                    } elsif ($copyrightdate) {
                        $publicationyear=$copyrightdate;
                    } else {
                        $publicationyear=~/(\d\d\d\d)/;
                        $publicationyear=$1;
                    }
                }
                if ($field->{'tag'} eq '700') {
                    my $name=$field->{'subfields'}->{'a'};
                    if ( defined($field->{'subfields'}->{'e'}) 
                        and  $field->{'subfields'}->{'e'}!~/ill/) {
                        $additionalauthors.="$name\n";
                    } else {
                        $illustrator=$name;
                    }
                }
                if ($field->{'tag'} =~/^5/) {
                    $notes.="$field->{'subfields'}->{'a'}\n";
                }
                if ($field->{'tag'} =~/65\d/) {
                    my $sub;
                    my $subject=$field->{'subfields'}->{'a'};
                    $subject=~s/\.$//;
                    print "Subject=$subject\n" if $debug;
                    foreach $subjectsubfield ( 'x','y','z' ) {
                      if 
($subdivision=$field->{'subfields'}->{$subjectsubfield}) {
                        if ( ref($subdivision) eq 'ARRAY' ) {
                            foreach $s (@$subdivision) {
                                $s=~s/\.$//;
                                $subject.=" -- $s";
                            } # foreach subdivision
                        } else {
                            $subdivision=~s/\.$//;
                            $subject.=" -- $subdivision";
                        } # if array
                      } # if subfield exists
                    } # foreach subfield
                    print "Subject=$subject\n" if $debug;
                    push @subjects, $subject;
                } # if tag 65x


        } # foreach field
        ($publicationyear       ) && ($bib->{publicationyear}=$publicationyear  
);
        ($copyrightdate         ) && ($bib->{copyrightdate}=$copyrightdate  );
        ($additionalauthors     ) && 
($bib->{additionalauthors}=$additionalauthors  );
        ($illustrator           ) && ($bib->{illustrator}=$illustrator  );
        ($notes                 ) && ($bib->{notes}=$notes  );
        ($#subjects             ) && ($bib->address@hidden  );

        # Misc cleanup
        if ($bib->{dewey}) {
            $bib->{dewey}=~s/\///g;     # drop any slashes
        }

        if ($bib->{lccn}) {
           ($bib->{lccn}) = (split(/\s+/, $bib->{lccn}))[0]; # only keep first 
word
        }

        if ( $bib->{isbn} ) {
            $bib->{isbn}=~s/[^\d]*//g;  # drop non-digits
        };

        if ( $bib->{issn} ) {
            $bib->{issn}=~s/^\s*//;
            ($bib->{issn}) = (split(/\s+/, $bib->{issn}))[0];
        };

        if ( $bib->{'volume-number'} ) {
            if ($bib->{'volume-number'}=~/(\d+).*(\d+)/ ) {
                $bib->{'volume'}=$1;
                $bib->{'number'}=$2;
            } else {
                $bib->{volume}=$bib->{'volume-number'};
            }
            delete $bib->{'volume-number'};
        } # if volume-number

    } else {
        print "Error: extractmarcfields: input ref $record is " .
                ref($record) . " not ARRAY. Contact sysadmin.\n";
    }
    print "</PRE>\n" if $debug;

    return $bib;

} # sub extractmarcfields
#---------------------------------

#--------------------------
# Parse MARC data in file format with control-character separators
#   May be multiple records.
sub parsemarcfileformat {
    use strict;
    # Input is one big text string
    my $data=shift;
    # Output is list of records.  Each record is list of field hashes
    my @records;

    my $splitchar=chr(29);
    my $splitchar2=chr(30);
    my $splitchar3=chr(31);
    my $debug=0;
    my $record;
    foreach $record (split(/$splitchar/, $data)) {
        my @record;
        my $directory=0;
        my $tagcounter=0;
        my %tag;
        my $field;

        my $leader=substr($record,0,24);
        print "<tr><td>Leader:</td><td>$leader</td></tr>\n" if $debug;
        push (@record, {
                'tag' => 'Leader',
                'indicator' => $leader ,
        } );

        $record=substr($record,24);
        foreach $field (split(/$splitchar2/, $record)) {
            my %field;
            my $tag;
            my $indicator;
            unless ($directory) {
                $directory=$field;
                my $itemcounter=1;
                my $counter2=0;
                my $item;
                my $length;
                my $start;
                while ($item=substr($directory,0,12)) {
                    $tag=substr($directory,0,3);
                    $length=substr($directory,3,4);
                    $start=substr($directory,7,6);
                    $directory=substr($directory,12);
                    $tag{$counter2}=$tag;
                    $counter2++;
                }
                $directory=1;
                next;
            }
            $tag=$tag{$tagcounter};
            $tagcounter++;
            $field{'tag'}=$tag;
            my @subfields=split(/$splitchar3/, $field);
            $indicator=$subfields[0];
            $field{'indicator'}=$indicator;
            my $firstline=1;
            unless ($#subfields==0) {
                my %subfields;
                my @subfieldlist;
                my $i;
                for ($i=1; $i<=$#subfields; $i++) {
                    my $text=$subfields[$i];
                    my $subfieldcode=substr($text,0,1);
                    my $subfield=substr($text,1);
                    # if this subfield already exists, do array
                    if ($subfields{$subfieldcode}) {
                        my $subfieldlist=$subfields{$subfieldcode};
                        if ( ref($subfieldlist) eq 'ARRAY' ) {
                            # Already an array, add on to it
                            print "$tag Adding to array $subfieldcode -- 
$subfield<br>\n" if $debug;
                            @address@hidden;
                            push (@subfieldlist, $subfield);
                        } else {
                            # Change simple value to array
                            print "$tag Arraying $subfieldcode -- 
$subfield<br>\n" if $debug;
                            @subfieldlist=($subfields{$subfieldcode}, 
$subfield);
                        }
                        # keep new array
                        address@hidden;
                    } else {
                        # subfield doesn't exist yet, keep simple value
                        $subfields{$subfieldcode}=$subfield;
                    }
                }
                $field{'subfields'}=\%subfields;
            }
            push (@record, \%field);
        } # foreach field in record
        push (@records, address@hidden);
        # $counter++;
    }
    print "</pre>" if $debug;
    return @records;
} # sub parsemarcfileformat

#---------------------------------------------
# $Log: SimpleMarc.pm,v $
# Revision 1.1.2.1  2002/06/26 07:27:35  amillar
# Moved acqui.simple MARC handling to new module SimpleMarc.pm
#




reply via email to

[Prev in Thread] Current Thread [Next in Thread]