diff --git a/.github/workflows/test_and_check.yml b/.github/workflows/test_and_check.yml index 73ed123..d7fda58 100644 --- a/.github/workflows/test_and_check.yml +++ b/.github/workflows/test_and_check.yml @@ -16,7 +16,7 @@ jobs: uses: actions-rs/cargo@v1 with: command: test - args: --workspace --verbose + args: --features default,xml --verbose format_check: runs-on: ubuntu-latest steps: diff --git a/Cargo.toml b/Cargo.toml index 151d4c9..93bca87 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,3 +18,12 @@ path = "src/lib.rs" [features] default = [] nightly = [] +xml = ["dep:xml-rs"] + +[dependencies] +xml-rs = { version = "0.8", optional = true } + +[package.metadata.docs.rs] +rustdoc-args = ["--cfg", "docsrs"] +no-default-features = true +features = ["default", "xml"] diff --git a/src/errors.rs b/src/errors.rs index de9496d..6aa4ea2 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -29,6 +29,9 @@ pub enum Error { UnexpectedSubfieldEnd, NonUnicodeSequence(Pointer), UnknownCharacterCodingScheme(u8), + Utf8Error(std::str::Utf8Error), + #[cfg(feature = "xml")] + XmlError(xml::writer::Error), Io(io::ErrorKind), } @@ -65,14 +68,43 @@ impl fmt::Display for Error { } }, Error::Io(err) => write!(f, "IO error: {:?}", err), + Error::Utf8Error(err) => write!(f, "UTF8 error: {}", err), + #[cfg(feature = "xml")] + Error::XmlError(err) => write!(f, "XML error: {}", err), } } } -impl ::std::error::Error for Error {} +impl ::std::error::Error for Error { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + Error::UnexpectedByteInDecNum(_) + | Error::FieldTooLarge(_) + | Error::RecordTooLarge(_) + | Error::RecordTooShort(_) + | Error::UnexpectedEofInDecNum + | Error::UnexpectedEof + | Error::UnexpectedEofInDirectory + | Error::NoRecordTerminator + | Error::UnexpectedSubfieldEnd + | Error::NonUnicodeSequence(_) + | Error::UnknownCharacterCodingScheme(_) + | Error::Io(_) => None, + Error::Utf8Error(ref e) => Some(e as &_), + #[cfg(feature = "xml")] + Error::XmlError(ref e) => Some(e as &_), + } + } +} impl From for Error { fn from(err: io::Error) -> Error { Error::Io(err.kind()) } } + +impl From for Error { + fn from(err: std::str::Utf8Error) -> Error { + Error::Utf8Error(err) + } +} diff --git a/src/lib.rs b/src/lib.rs index e93772f..6b880a2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -112,9 +112,14 @@ mod identifier; mod indicator; mod misc; mod tag; +#[cfg(feature = "xml")] +mod xml; pub use errors::*; +#[cfg(feature = "xml")] +#[doc(inline)] +pub use crate::xml::MarcXml; #[doc(inline)] pub use field::fields::Fields; #[doc(inline)] diff --git a/src/xml.rs b/src/xml.rs new file mode 100644 index 0000000..59f08e4 --- /dev/null +++ b/src/xml.rs @@ -0,0 +1,434 @@ +//! # Module to convert MARC 21 records to MARC XML + +use crate::{Error, Field, Record, Result, Subfield}; +use std::io::Write; +use xml::writer::{EmitterConfig, EventWriter, XmlEvent}; + +const MARCXML_NS: &[(&str, &str)] = &[ + ("xmlns:marc", "http://www.loc.gov/MARC21/slim"), + ("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance"), + ( + "xsi:schemaLocation", + "http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd", + ), +]; + +pub trait XmlElement { + fn xml_element(&self, w: &mut EventWriter) -> Result<()>; +} + +pub trait XmlRootElement { + fn xml_root_element(&self, w: &mut EventWriter) -> Result<()>; +} + +/// Output a single record or a collection of records as MARC XML. +/// +/// ## Examples +/// +/// ### Outputting a single record +/// +/// ```rust +/// # use marc::*; +/// # fn main() -> Result<()> { +/// let mut builder = RecordBuilder::new(); +/// let record = builder +/// .add_fields(fields!( +/// control fields: [ +/// b"001" => "000000002", +/// b"003" => "RuMoRGB", +/// ]; +/// data fields: [ +/// b"979", b" ", [ +/// b'a' => "autoref", +/// b'a' => "dlopen", +/// ], +/// ]; +/// ))? +/// .get_record()?; +/// assert_eq!(String::from_utf8(record.xml_minified()?).unwrap(), "00100nam 2200061 i 4500000000002RuMoRGBautorefdlopen".to_string()); +/// # Ok(()) +/// # } +/// ``` +/// +/// ### Outputting a collection of records +/// +/// ```rust +/// # use marc::*; +/// # fn main() -> Result<()> { +/// let mut builder = RecordBuilder::new(); +/// let records = vec![builder +/// .add_fields(fields!( +/// control fields: [ +/// b"001" => "000000002", +/// b"003" => "RuMoRGB", +/// ]; +/// data fields: [ +/// b"979", b" ", [ +/// b'a' => "autoref", +/// b'a' => "dlopen", +/// ], +/// ]; +/// ))? +/// .get_record()?]; +/// assert_eq!(String::from_utf8(records.xml_minified()?).unwrap(), "00100nam \ +/// 2200061 i 4500000000002RuMoRGBautorefdlopen".to_string()); +/// # Ok(()) +/// # } +/// ``` +#[cfg_attr(docsrs, doc(cfg(feature = "xml")))] +pub trait MarcXml<'a> +where + Self: XmlRootElement>, +{ + /// Output MARC XML + fn xml(&self, pretty_print: bool) -> Result> { + let mut buffer = Vec::new(); + let mut writer = EmitterConfig::new() + .perform_indent(pretty_print) + .create_writer(&mut buffer); + + self.xml_root_element(&mut writer)?; + Ok(buffer) + } + + /// Output minified (outdented) MARC XML + fn xml_minified(&self) -> Result> { + Self::xml(self, false) + } + + /// Output pretty-print (indented) MARC XML + fn xml_pretty(&self) -> Result> { + Self::xml(self, true) + } +} + +impl<'a> MarcXml<'a> for Vec> {} +impl<'a> MarcXml<'a> for Record<'a> {} + +impl XmlRootElement> for Vec> { + fn xml_root_element(&self, w: &mut EventWriter) -> Result<()> { + write_element("marc:collection", MARCXML_NS.to_vec(), w, |w| { + for record in self { + record.xml_element(w)?; + } + Ok(()) + })?; + Ok(()) + } +} + +impl XmlRootElement> for Record<'_> { + fn xml_root_element(&self, w: &mut EventWriter) -> Result<()> { + write_element("marc:record", MARCXML_NS.to_vec(), w, |w| { + write_element("marc:leader", vec![], w, |w| { + w.write(XmlEvent::Characters(&String::from_utf8_lossy( + &self.as_ref()[0..24], + ))) + .map_err(Into::into) + })?; + for field in self.fields() { + field.xml_element(w)?; + } + Ok(()) + }) + } +} + +impl XmlElement for Record<'_> { + fn xml_element(&self, w: &mut EventWriter) -> Result<()> { + write_element("marc:record", vec![], w, |w| { + write_element("marc:leader", vec![], w, |w| { + w.write(XmlEvent::Characters(&String::from_utf8_lossy( + &self.as_ref()[0..24], + ))) + .map_err(Into::into) + })?; + for field in self.fields() { + field.xml_element(w)?; + } + Ok(()) + }) + } +} + +impl XmlElement for Field<'_> { + fn xml_element(&self, w: &mut EventWriter) -> Result<()> { + let tag = self.get_tag(); + match tag.0 { + [b'0', b'0', ..] => { + let attributes = vec![("tag", tag.as_str())]; + write_element("marc:controlfield", attributes, w, |w| { + w.write(XmlEvent::Characters(self.get_data::())) + .map_err(Into::into) + })?; + } + _ => { + let indicator = self.get_indicator(); + let attributes = vec![ + ("tag", tag.as_str()), + ("ind1", indicator.first()), + ("ind2", indicator.second()), + ]; + write_element("marc:datafield", attributes, w, |w| { + for subfield in self.subfields() { + subfield.xml_element(w)?; + } + Ok(()) + })?; + } + } + Ok(()) + } +} + +impl XmlElement for Subfield<'_> { + fn xml_element(&self, w: &mut EventWriter) -> Result<()> { + let code: &str = &self.get_identifier().as_char().to_string(); + let attributes = vec![("code", code)]; + write_element("marc:subfield", attributes, w, |w| { + w.write(XmlEvent::Characters(self.get_data::())) + .map_err(Into::into) + })?; + Ok(()) + } +} + +fn write_element) -> Result<()>>( + element: &str, + attr: Vec<(&str, &str)>, + w: &mut EventWriter, + f: F, +) -> Result<()> { + let mut event_builder = XmlEvent::start_element(element); + + for &(k, v) in attr.iter() { + event_builder = event_builder.attr(k, v); + } + + let mut event: XmlEvent<'_> = event_builder.into(); + w.write(event)?; + f(w)?; + event = XmlEvent::end_element().into(); + w.write(event).map_err(Into::into) +} + +impl From for Error { + fn from(error: xml::writer::Error) -> Error { + Error::XmlError(error) + } +} + +#[cfg(test)] +mod tests { + use crate::{fields, MarcXml, Record, RecordBuilder}; + use xml::writer::XmlEvent; + use xml::EmitterConfig; + + fn test_record() -> Record<'static> { + let mut builder = RecordBuilder::new(); + builder + .add_fields(fields!( + data fields: [ + b"264", b" 1", [ + b'a' => "León, Spain", + ], + b"245", b"00", [ + b'a' => "Book title", + b'b' => "Book Subtitle", + ], + b"100", b"1 ", [ + b'a' => "Author Name", + ], + b"041", b"0 ", [ + b'a' => "eng", + ], + ]; + control fields: [ + b"008" => "210128t20212021enka sb 000 0 eng d", + b"001" => "000000001", + ]; + )) + .unwrap(); + builder.get_record().unwrap() + } + + fn test_records() -> Vec> { + vec![test_record()] + } + + #[test] + fn should_output_minified_xml_record() { + let minified_xml = test_record() + .xml_minified() + .map(String::from_utf8) + .unwrap() + .unwrap(); + + let expected = "\ + \ + 00220nam 2200097 i 4500\ + 000000001\ + 210128t20212021enka sb 000 0 eng d\ + \ + eng\ + \ + \ + Author Name\ + \ + \ + Book title\ + Book Subtitle\ + \ + \ + León, Spain\ + \ + ".to_string(); + assert_eq!(minified_xml, expected); + } + + #[test] + fn should_output_pretty_xml_record() { + let minified_xml = test_record() + .xml_pretty() + .map(String::from_utf8) + .unwrap() + .unwrap(); + + let expected = "\ + \n\ + \n \ + 00220nam 2200097 i 4500\n \ + 000000001\n \ + 210128t20212021enka sb 000 0 eng d\n \ + \n \ + eng\n \ + \n \n \ + Author Name\n \ + \n \ + \n \ + Book title\n \ + Book Subtitle\n \ + \n \ + \n \ + León, Spain\n \ + \n\ + ".to_string(); + + assert_eq!(minified_xml, expected); + } + + #[test] + fn should_output_minified_xml_collection() { + let minified_xml = test_records() + .xml_minified() + .map(String::from_utf8) + .unwrap() + .unwrap(); + + let expected = "\ + \ + \ + 00220nam 2200097 i 4500\ + 000000001\ + 210128t20212021enka sb 000 0 eng d\ + \ + eng\ + \ + \ + Author Name\ + \ + \ + Book title\ + Book Subtitle\ + \ + \ + León, Spain\ + \ + \ + ".to_string(); + assert_eq!(minified_xml, expected); + } + + #[test] + fn should_output_pretty_xml_collection() { + let minified_xml = test_records() + .xml_pretty() + .map(String::from_utf8) + .unwrap() + .unwrap(); + + let expected = "\ + \n\ + \n \ + \n \ + 00220nam 2200097 i 4500\n \ + 000000001\n \ + 210128t20212021enka sb 000 0 eng d\n \ + \n \ + eng\n \ + \n \ + \n \ + Author Name\n \ + \n \ + \n \ + Book title\n \ + Book Subtitle\n \n \ + \n \ + León, Spain\n \ + \n \ + \n\ + ".to_string(); + + assert_eq!(minified_xml, expected); + } + + #[test] + fn should_write_element() { + let mut buffer = Vec::new(); + let mut writer = EmitterConfig::new() + .write_document_declaration(false) + .create_writer(&mut buffer); + + super::write_element( + "test_element", + vec![("attr1", "value1"), ("attr2", "value2")], + &mut writer, + |w| { + // Write some content to the element + let event = XmlEvent::characters("test content"); + w.write(event).map_err(Into::into) + }, + ) + .ok(); + let xml_str = String::from_utf8(buffer).unwrap(); + + let expected_xml = + r#"test content"#; + assert_eq!(xml_str, expected_xml); + } +}