koha-cvs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Koha-cvs] koha/misc/migration_tools bulkmarcimport.pl


From: Antoine Farnault
Subject: [Koha-cvs] koha/misc/migration_tools bulkmarcimport.pl
Date: Tue, 04 Jul 2006 15:06:35 +0000

CVSROOT:        /sources/koha
Module name:    koha
Changes by:     Antoine Farnault <toins>        06/07/04 15:06:35

Modified files:
        misc/migration_tools: bulkmarcimport.pl 

Log message:
        Head & rel_2_2 merged

CVSWeb URLs:
http://cvs.savannah.gnu.org/viewcvs/koha/misc/migration_tools/bulkmarcimport.pl?cvsroot=koha&r1=1.6&r2=1.7

Patches:
Index: bulkmarcimport.pl
===================================================================
RCS file: /sources/koha/koha/misc/migration_tools/bulkmarcimport.pl,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -b -r1.6 -r1.7
--- bulkmarcimport.pl   1 Apr 2006 22:10:50 -0000       1.6
+++ bulkmarcimport.pl   4 Jul 2006 15:06:35 -0000       1.7
@@ -2,14 +2,21 @@
 # small script that import an iso2709 file into koha 2.0
 
 use strict;
+# use warnings;
 
 # Koha modules used
 use MARC::File::USMARC;
+# Uncomment the line below and use MARC::File::XML again when it works better.
+# -- thd
+# use MARC::File::XML;
 use MARC::Record;
 use MARC::Batch;
+use MARC::Charset;
 use C4::Context;
 use C4::Biblio;
 use Time::HiRes qw(gettimeofday);
+use Getopt::Long;
+binmode(STDOUT, ":utf8");
 
 use Getopt::Long;
 
@@ -27,6 +34,110 @@
     'v:s' => \$verbose,
 );
 
+# FIXME:  Management of error conditions needed for record parsing problems
+# and MARC8 character sets with mappings to Unicode not yet included in 
+# MARC::Charset.  The real world rarity of these problems is not fully tested.
+# Unmapped character sets will throw a warning currently and processing will 
+# continue with the error condition.  A fairly trivial correction should 
+# address some record parsing and unmapped character set problems but I need 
+# time to implement a test and correction for undef subfields and revert to 
+# MARC8 if mappings are missing. -- thd
+sub fMARC8ToUTF8($$) {
+       my ($record) = shift;
+       my ($verbose) = shift;
+       if ($verbose) {
+               if ($verbose >= 2) {
+                       my $leader = $record->leader();
+                       $leader =~ s/ /#/g;
+                       print "\n000 " . $leader;
+               }
+       }
+       foreach my $field ($record->fields()) {
+               if ($field->is_control_field()) {
+                       if ($verbose) {
+                               if ($verbose >= 2) {
+                                       my $fieldName = $field->tag();
+                                       my $fieldValue = $field->data();
+                                       $fieldValue =~ s/ /#/g;
+                                       print "\n" . $fieldName;
+                                       print ' ' . $fieldValue;
+                               }
+                       }
+               } else {
+                       my @subfieldsArray;
+                       my $fieldName = $field->tag();
+                       my $indicator1Value = $field->indicator(1);
+                       my $indicator2Value = $field->indicator(2);
+                       if ($verbose) {
+                               if ($verbose >= 2) {
+                                       $indicator1Value =~ s/ /#/;
+                                       $indicator2Value =~ s/ /#/;
+                                       print "\n" . $fieldName . ' ' . 
+                                                       $indicator1Value . 
+                                       $indicator2Value;
+                               }
+                       }
+                       foreach my $subfield ($field->subfields()) {
+                               my $subfieldName = $subfield->[0];
+                               my $subfieldValue = $subfield->[1];
+                               $subfieldValue = 
MARC::Charset::marc8_to_utf8($subfieldValue);
+                               
+                               # Alas, MARC::Field::update() does not work 
correctly.
+                               ## push (@subfieldsArray, $subfieldName, 
$subfieldValue);
+                               
+                               push @subfieldsArray, [$subfieldName, 
$subfieldValue];
+                               if ($verbose) {
+                                       if ($verbose >= 2) {
+                                               print " \$" . $subfieldName . ' 
' . $subfieldValue;
+                                       }
+                               }
+                       }
+                       
+                       # Alas, MARC::Field::update() does not work correctly.
+                       # 
+                       # The first instance in the field of a of a repeated 
subfield 
+                       # overwrites the content from later instances with the 
content 
+                       # from the first instance.
+                       ## $field->update(@subfieldsArray);
+                       
+                       foreach my $subfieldRow(@subfieldsArray) {
+                               my $subfieldName = $subfieldRow->[0];
+                               $field->delete_subfields($subfieldName);
+                       }
+                       foreach my $subfieldRow(@subfieldsArray) {
+                               $field->add_subfields(@$subfieldRow);
+                       }
+                       
+                       if ($verbose) {
+                               if ($verbose >= 2) {
+                                       # Reading the indicator values again is 
not necessary.  
+                                       # They were not converted.
+                                       # $indicator1Value = 
$field->indicator(1);
+                                       # $indicator2Value = 
$field->indicator(2);
+                                       # $indicator1Value =~ s/ /#/;
+                                       # $indicator2Value =~ s/ /#/;
+                                       print "\nCONVERTED TO UTF-8:\n" . 
$fieldName . ' ' . 
+                                                       $indicator1Value . 
+                                       $indicator2Value;
+                                       foreach my $subfield 
($field->subfields()) {
+                                               my $subfieldName = 
$subfield->[0];
+                                               my $subfieldValue = 
$subfield->[1];
+                                               print " \$" . $subfieldName . ' 
' . $subfieldValue;
+                                       }
+                               }
+                       }
+                       if ($verbose) {
+                               if ($verbose >= 2) {
+                                       print "\n" if $verbose;
+                               }
+                       }
+               }
+       }
+       $record->encoding('UTF-8');
+       return $record;
+}
+
+
 if ($version || ($input_marc_file eq '')) {
        print <<EOF
 small script to import an iso2709 file into Koha.
@@ -37,7 +148,8 @@
 \tn : the number of records to import. If missing, all the file is imported
 \tcommit : the number of records to wait before performing a 'commit' operation
 \tt : test mode : parses the file, saying what he would do, but doing nothing.
-\tc : the char encoding. At the moment, only MARC21 and UNIMARC supported. 
MARC21 by default.
+\tc : the characteristic MARC flavour. At the moment, only MARC21 and UNIMARC 
+\tsupported. MARC21 by default.
 \td : delete EVERYTHING related to biblio in koha-DB before import  :tables :
 \t\tbiblio, \t\tbiblioitems, \t\tsubjects,\titems
 \t\tadditionalauthors, \tbibliosubtitles, \tmarc_biblio,
@@ -72,8 +184,8 @@
        print "TESTING MODE ONLY\n    DOING NOTHING\n===============\n";
 }
 
-$char_encoding = 'MARC21' unless ($char_encoding);
-print "CHAR : $char_encoding\n" if $verbose;
+$marcFlavour = 'MARC21' unless ($marcFlavour);
+print "Characteristic MARC flavour: $marcFlavour\n" if $verbose;
 my $starttime = gettimeofday;
 my $batch = MARC::Batch->new( 'USMARC', $input_marc_file );
 $batch->warnings_off();
@@ -118,7 +230,12 @@
     foreach my $oldField ( $record->fields() ) {
 
        # just reproduce tags < 010 in our new record
-       if ( $oldField->tag() < 10 ) {
+       # 
+       # Fields are not necessarily only numeric in the actual world of 
records 
+       # nor in what I would recommend for additonal safe non-interfering local
+       # use fields.  The following regular expression match is much safer 
than 
+       # a numeric evaluation. -- thd
+       if ( $oldField->tag() =~ m/^00/ ) {
            $newRecord->append_fields( $oldField );
            next();
        }




reply via email to

[Prev in Thread] Current Thread [Next in Thread]