diff --git a/Cargo.lock b/Cargo.lock index c28f80b..b4e5c8a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -229,6 +229,26 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf0a07a401f374238ab8e2f11a104d2851bf9ce711ec69804834de8af45c7af" +[[package]] +name = "const_format" +version = "0.2.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7faa7469a93a566e9ccc1c73fe783b4a65c274c5ace346038dca9c39fe0030ad" +dependencies = [ + "const_format_proc_macros", +] + +[[package]] +name = "const_format_proc_macros" +version = "0.2.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d57c2eccfb16dbac1f4e61e206105db5820c9d26c3c472bc17c774259ef7744" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + [[package]] name = "core-foundation-sys" version = "0.8.6" @@ -263,6 +283,7 @@ dependencies = [ "assert_cmd", "chrono", "codspeed-divan-compat", + "const_format", "diff", "itoa", "predicates", diff --git a/Cargo.toml b/Cargo.toml index 1673839..1889a12 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,7 @@ path = "src/main.rs" [dependencies] chrono = "0.4.38" +const_format = "0.2.35" diff = "0.1.13" itoa = "1.0.11" regex = "1.10.4" @@ -46,7 +47,14 @@ harness = false [features] # default = ["feat_bench_not_diff"] -# Turn bench for diffutils cmp off +# instead of limiting to KiB, MiB, etc, one can write kib, mib, Mb or whatever case. +feat_allow_case_insensitive_number_units = [] +# Disable bench for diffutils cmp feat_bench_not_cmp = [] -# Turn bench for diffutils diff off +# Disable bench for diffutils diff feat_bench_not_diff = [] +# Enables a check on options defined in NOT_YET_IMPLEMENTED. +# If on the parser will return an error message in these cases. +# This is preferable when running the util as unsupported options +# are pointed out to the user, but can make tests fail. +feat_check_not_yet_implemented = [] diff --git a/fuzz/fuzz_targets/fuzz_side.rs b/fuzz/fuzz_targets/fuzz_side.rs index 8a69c07..93768b2 100644 --- a/fuzz/fuzz_targets/fuzz_side.rs +++ b/fuzz/fuzz_targets/fuzz_side.rs @@ -4,9 +4,9 @@ extern crate libfuzzer_sys; use diffutilslib::side_diff; +use diffutilslib::params::Params; use std::fs::File; use std::io::Write; -use diffutilslib::params::Params; fuzz_target!(|x: (Vec, Vec, /* usize, usize */ bool)| { let (original, new, /* width, tabsize, */ expand) = x; @@ -22,7 +22,7 @@ fuzz_target!(|x: (Vec, Vec, /* usize, usize */ bool)| { ..Default::default() }; let mut output_buf = vec![]; - side_diff::diff(&original, &new, &mut output_buf, ¶ms); + side_diff::diff(&original, &new, &mut output_buf, &(¶ms).into()); File::create("target/fuzz.file.original") .unwrap() .write_all(&original) @@ -39,4 +39,4 @@ fuzz_target!(|x: (Vec, Vec, /* usize, usize */ bool)| { .unwrap() .write_all(&output_buf) .unwrap(); -}); \ No newline at end of file +}); diff --git a/src/arg_parser.rs b/src/arg_parser.rs new file mode 100644 index 0000000..0cf1606 --- /dev/null +++ b/src/arg_parser.rs @@ -0,0 +1,909 @@ +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE-* +// files that was distributed with this source code. + +//! This is a generic parser for program arguments (operands and options). +//! +//! The [Parser] is able to parse the options of all diffutils, e.g. `cmp --options` or `diff --options`. +//! +//! Features: +//! +//! - Allows options to be abbreviated, e.g. \--wi instead of \--width +//! - Allows input like in GNU utils, e.g. the following are all identical: +//! - `diff --ignore-case --minimal --width=50 file_a file_b` +//! - `diff --ignore-case --minimal --width 50 file_a file_b` +//! - `diff -i -d -w 50 file_a file_b` +//! - `diff -id -w50 file_a file_b` +//! - `diff -idw50 file_a file_b` +//! - A [NumberParser] is available, which parses option arguments +//! with optional byte units, e.g. =1024 or =1024KiB +//! - Default handling for \--version and \--help +//! - Returns the [ParsedOption]s or a [ParseError] Enum, which makes it library friendly. +//! - Contains error handling for the typical parsing errors: +//! - missing and extra operands +//! - invalid, ambiguous or conflicting options +//! - missing or not allowed option arguments +//! - Provides error text functions, e.g. add executable and 'Try \--help' to message. +//! +use std::{ + error::Error, + ffi::{OsStr, OsString}, + fmt::Display, + iter::Peekable, +}; + +// TODO finalize copyright +pub const TEXT_COPYRIGHT: &str = r#"Copyright (c) uutils developers +Licenses: MIT License, Apache License 2.0 "#; + +// TODO finalize help text footer +pub const TEXT_HELP_FOOTER: &str = r#" +This utility is part of the Rust uutils project: https://github.com/uutils/. +Report bugs here: https://github.com/uutils/diffutils/issues. +"#; + +// Version text +#[allow(unused)] +pub const TEXT_VERSION_BASE: &str = concat!("(uutils diffutils) ", env!("CARGO_PKG_VERSION"),); + +// AppOption for help, also reacting on -h +pub const OPT_HELP: AppOption = AppOption { + long_name: "help", + short: Some('h'), + has_arg: false, +}; +pub const OPT_VERSION: AppOption = AppOption { + long_name: "version", + short: Some('v'), + has_arg: false, +}; + +/// Add a centralized copyright message to another text. +pub fn add_copyright(text: &str) -> String { + format!("{text}\n{TEXT_COPYRIGHT}") +} + +/// Writes the error message and adds the help hint "Try 'diff \--help' for more information." +/// +/// * exe: [Executable] +/// * msg: The message to output. It will be preceded by 'executable: '. +/// Sometimes the executable is not available during error message creation, +/// so #EXE will be replaced by the name of the executable, e.g. 'diff'. +/// +/// This is the central output function. I affects all utils. \ +/// It allows to just use 'eprintln!("{e}");' in case of an error. +pub fn format_error_text(executable: &Executable, error: &T) -> String { + // for messages the have the executable already + let exe = format!("{executable}: "); + let msg = error + .to_string() + .replace("#EXE", executable.to_string().as_str()); + if msg.starts_with(&exe) { + format!("{msg}\n{exe}Try '{executable} --help' for more information.",) + } else { + format!("{exe}{msg}\n{exe}Try '{executable} --help' for more information.",) + } +} + +/// Returns the standardized version text for this utility. +pub fn get_version_text(executable: &Executable) -> String { + format!("{executable} {TEXT_VERSION_BASE}") +} + +/// Convert a text into input for the parsers. +/// +/// This is for testing and allows to write a simple string `diff file_1 file_2 --width=50` +/// to be converted in the input format the parser expects, like ArgsOs. +#[allow(unused)] +pub fn args_into_peekable_os_strings(args: &str) -> Peekable> { + let mut o = Vec::new(); + for arg in args.split(' ') { + o.push(OsString::from(arg)); + } + o.into_iter().peekable() +} + +/// Check if the user selected an option which is not yet implemented. +#[allow(unused)] +pub fn is_implemented( + options_parsed: &[ParsedOption], + implemented_options: &[AppOption], +) -> Result<(), ParseError> { + if let Some(not_yet) = options_parsed + .iter() + .find(|o| implemented_options.contains(o.app_option)) + { + return Err(ParseError::NotYetImplemented(format!( + "'--{}' (-{})", + not_yet.app_option.long_name, + not_yet.app_option.short.unwrap_or(' ') + ))); + } + + Ok(()) +} + +/// This contains the args/options the app allows. They must be all of const value. +#[derive(Debug, Clone, Copy, PartialEq)] +pub struct AppOption { + /// long name of option + pub long_name: &'static str, + pub short: Option, + pub has_arg: bool, + // pub arg_default: Option<&'static str>, +} + +impl AppOption { + /// formatted long option + /// + /// Returns the long name formatted: `--option`. \ + /// There is inconsistency in GNU diffutils, if these are printed with or without quotes. + pub fn format_long(&self) -> String { + format!("--{}", self.long_name) + } + + /// formatted long and short option + /// + /// There is inconsistency in GNU diffutils, if these are printed with or without quotes. + /// + /// # Returns + /// * Some(short): `'--option' (-c)`. + /// * None: [Self::format_long] + pub fn format_for_error_msg(&self) -> String { + self.format_long() + // match self.short { + // Some(c) => format!("--{} (-{c})", self.long_name), + // None => self.format_long(), + // } + } + + /// formatted option char + /// + /// Returns the short char formatted: "-c" or an empty String if None. + #[allow(unused)] + pub fn short_or_empty(&self) -> String { + match self.short { + Some(c) => format!("-{c}"), + None => String::new(), + } + } +} + +/// One parsed option. +#[derive(Debug, Clone, PartialEq)] +pub struct ParsedOption { + pub app_option: &'static AppOption, + /// Argument of the option as string_lossy, e.g. the "1000kB" of "\--bytes=1000kB". + pub arg_for_option: Option, + /// Argument of the option as original OsString + pub arg_for_option_os: Option, + /// If the user typed the long name or used the short char to set the option. + pub name_type_used: OptionNameTypeUsed, +} + +impl ParsedOption { + pub fn new( + app_option: &'static AppOption, + arg_for_option_os: OsString, + name_type_used: OptionNameTypeUsed, + ) -> Self { + Self { + app_option, + arg_for_option: Some(arg_for_option_os.to_string_lossy().to_string()), + arg_for_option_os: Some(arg_for_option_os), + name_type_used, + } + } + + /// Create an option which does not have an argument. + pub fn new_no_arg(app_option: &'static AppOption, used: OptionNameTypeUsed) -> Self { + Self { + app_option, + arg_for_option: None, + arg_for_option_os: None, + name_type_used: used, + } + } + + /// This checks if an option requires an argument and if it already known. + /// + /// * Case A: `--long-option=argument`: Argument is already parsed + /// * Case B: `--long-option argument`: Argument must be the next in the given args + /// * Case C: `-bArgument`: Argument is already parsed + /// * Case D: `-b Argument`: Argument must be the next in the given args + fn check_add_arg>( + &mut self, + opts: &mut Peekable, + ) -> Result<(), ParseError> { + // argument missing + if self.app_option.has_arg { + if self.arg_for_option.is_none() { + // take following argument if it is not an option + if let Some(arg) = opts.peek() { + let arg = arg.to_string_lossy(); + if !arg.starts_with('-') { + self.arg_for_option = Some(arg.to_string()); + _ = opts.next(); + } + } + if self.arg_for_option.is_none() { + return Err(ParseError::ArgForOptionMissing(self.clone())); + } + } + } else { + // argument allowed? + if self.arg_for_option.is_some() { + return Err(ParseError::ArgForOptionNotAllowed(self.clone())); + } + } + + Ok(()) + } + + /// Sets arg_for_option_os and arg_for_option as string_lossy. + pub fn set_arg_for_option(&mut self, arg_for_option_os: OsString) { + self.arg_for_option = Some(arg_for_option_os.to_string_lossy().to_string()); + self.arg_for_option_os = Some(arg_for_option_os); + } + + /// Easy String conversion: returns the Argument or an empty String if None. + pub fn arg_for_option_or_empty_string(&self) -> String { + match &self.arg_for_option { + Some(s) => s.clone(), + None => String::new(), + } + } +} + +impl Default for ParsedOption { + fn default() -> Self { + Self { + app_option: &AppOption { + long_name: "dummy", + short: None, + has_arg: false, + }, + arg_for_option: None, + arg_for_option_os: None, + name_type_used: OptionNameTypeUsed::LongName, + } + } +} + +/// To differentiate the user input, did he use -s or \--silent. +/// While this is technically no difference, the error message may vary. +#[derive(Debug, Default, Clone, Copy, PartialEq)] +pub enum OptionNameTypeUsed { + #[default] + None, + LongName, + ShortName, +} + +/// This is a generic parser for program arguments (operands and options), +/// but without the executable. +/// +/// This generic parser is able to parse the options of all diffutils, e.g. `cmp --options` or `diff --options`. \ +/// The allowed options are passed as a list of static [AppOption]s, as they are known at compile time. +/// +/// # Example: read params for sdiff +/// ```rust +/// # use diffutilslib::sdiff::{sdiff, SDiffOk, TEXT_HELP}; +/// # use diffutilslib::sdiff::params_sdiff; +/// let args = "sdiff --help"; +/// // let args = "sdiff file_1.txt file_2.txt --width=40"; +/// // Test helper conversion, usually this is ArgsOs. +/// let args = diffutilslib::arg_parser::args_into_peekable_os_strings(&args); +/// let params = match sdiff(args) { +/// Ok(res) => match res { +/// SDiffOk::Different => todo!(), +/// SDiffOk::Equal => todo!(), +/// SDiffOk::Help => { +/// println!("{TEXT_HELP}"); +/// return; // ExitCode::from(0); +/// } +/// SDiffOk::Version => todo!(), +/// }, +/// Err(e) => { +/// eprintln!("{e}"); +/// return; // ExitCode::from(2); +/// } +/// }; +/// ``` +#[derive(Debug, Default)] +pub struct Parser { + pub options_parsed: Vec, + pub operands: Vec, + // temporary stored for each param + name_type_used: OptionNameTypeUsed, +} + +impl Parser { + /// Parse the args into operands and options for the utility. + /// + /// The arguments must not contain the executable. + /// + /// The allowed options are passed as a list of static [AppOption]s, as they are known at compile time. + /// + /// # Returns Result + /// * Ok: [Parser] with [ParsedOption]s and operands (file names) + /// * Error: [ParseError] + pub fn parse_params>( + app_options: &'static [AppOption], + mut args: Peekable, + ) -> Result { + // sdiff options begin with ‘-’, so normally from-file and to-file may not begin with ‘-’. + // However, -- as an argument by itself treats the remaining arguments as file names even if they begin with ‘-’. + // You may not use - as an input file. + // read next param as file name, here we generally use read as operand + let mut parser = Self::default(); + let mut is_double_dash = false; + while let Some(param_os) = args.next() { + let mut param = param_os.to_string_lossy().to_string(); + // dbg!(¶m); + let mut ci = param.char_indices().peekable(); + let (_, c0) = ci.next().expect("Param must have at least one char!"); + // is param? + if c0 != '-' || param == "-" || is_double_dash { + // Operand, not an option with - or -- + // or single dash '-', this is for file as StandardInput + parser.operands.push(param_os); + continue; + } + // check 2nd char, which must exist, see above checks + let (_, c1) = ci.next().unwrap(); + let mut p_opt = ParsedOption::default(); + // has 3rd char? + if let Some((pos_c2, _c2)) = ci.peek() { + if c1 == '-' { + // long option, e.g. --bytes + parser.name_type_used = OptionNameTypeUsed::LongName; + + // Find argument for some options, either '=' or following arg. + // This also shortens param to its name. + if let Some(p) = param[*pos_c2..].find('=') { + // only --bytes and --ignore-initial must have bytes, else return error + // reduce param to option and + // return bytes without = sign. + let os = Self::split_os_prefix(¶m_os, p + *pos_c2 + 1)?; + p_opt.set_arg_for_option(os); + param = param[0..p + *pos_c2].to_string(); + } + + // allow partial option descriptors, like --he for --help, if unique + p_opt.app_option = + Self::identify_option_from_partial_text(¶m_os, app_options)?; + + p_opt.name_type_used = OptionNameTypeUsed::LongName; + p_opt.check_add_arg(&mut args)?; + parser.options_parsed.push(p_opt); + } else { + // -MultiSingleChar, e.g. -bl or option with bytes -n200 + parser.name_type_used = OptionNameTypeUsed::ShortName; + let mut c = c1; + let mut pos = 1; + loop { + let Some(opt) = app_options.iter().find(|o| o.short == Some(c)) else { + return Err(ParseError::InvalidOption(param_os)); + }; + if opt.has_arg { + // take rest of the string as arg + let arg_for_option_os = if param.len() > pos + 1 { + Some(Self::split_os_prefix(¶m_os, pos + 1)?) + } else { + args.next() + }; + let Some(os) = arg_for_option_os else { + return Err(ParseError::ArgForOptionMissing( + ParsedOption::new_no_arg(opt, OptionNameTypeUsed::ShortName), + )); + }; + parser.options_parsed.push(ParsedOption::new( + opt, + os, + OptionNameTypeUsed::ShortName, + )); + break; + } else { + parser + .options_parsed + .push(ParsedOption::new_no_arg(opt, OptionNameTypeUsed::ShortName)); + } + match ci.next() { + Some((p, cx)) => { + c = cx; + pos = p + } + None => break, + } + } + } + } else { + // single short options, e.g. -b. + parser.name_type_used = OptionNameTypeUsed::ShortName; + match app_options.iter().find(|opt| { + if let Some(c) = opt.short { + c == c1 + } else { + false + } + }) { + Some(opt) => { + p_opt.app_option = opt; + p_opt.name_type_used = OptionNameTypeUsed::ShortName; + p_opt.check_add_arg(&mut args)?; + parser.options_parsed.push(p_opt); + } + None => { + if c1 == '-' { + is_double_dash = true + } else { + return Err(ParseError::InvalidOption(param_os)); + } + } + } + } + } + + // identified unique option + if parser.is_help() { + parser.set_only_option(&OPT_HELP); + return Ok(parser); + } + if parser.is_version() { + parser.set_only_option(&OPT_VERSION); + return Ok(parser); + } + + Ok(parser) + } + + /// * param_os: expected to start with "\--" + pub fn identify_option_from_partial_text( + param_os: &OsStr, + app_options: &'static [AppOption], + ) -> Result<&'static AppOption, ParseError> { + assert!(param_os.len() > 2); + let mut param = ¶m_os.to_string_lossy()[2..]; + if let Some(p) = param.find('=') { + param = ¶m[0..p]; + } + let l = param.len(); + let possible_opts: Vec<&'static AppOption> = app_options + .iter() + .filter(|&it| it.long_name.len() >= l && &it.long_name[0..l] == param) + .collect(); + + match possible_opts.len() { + 0 => Err(ParseError::UnrecognizedOption(param_os.to_os_string())), + + 1 => Ok(*possible_opts.first().unwrap()), + + _ => Err(ParseError::AmbiguousOption( + param_os.to_os_string(), + possible_opts, + )), + } + } + + /// Check if user requested the \--help output. + pub fn is_help(&self) -> bool { + self.options_parsed + .iter() + .any(|opt| *opt.app_option == OPT_HELP) + } + + /// Check if user requested the \--version output. + pub fn is_version(&self) -> bool { + self.options_parsed + .iter() + .any(|opt| *opt.app_option == OPT_VERSION) + } + + fn set_only_option(&mut self, option: &'static AppOption) { + self.options_parsed = vec![ParsedOption::new_no_arg(option, self.name_type_used)]; + self.operands.clear(); + } + + /// Split an OsString on Linux. On Windows this is not possible. \ + /// This is required for options like `--file-name=argument-non-utf-8` + /// + /// # Returns + /// * A slice of the OsStr starting from `index`. + /// * None if the OS doesn't support byte-slicing or index is out of bounds. + pub fn split_os_prefix(os_str: &OsStr, index: usize) -> Result { + #[cfg(unix)] + { + // On Unix, OsStr is just a sequence of bytes (often UTF-8, but not guaranteed). + use std::os::unix::ffi::OsStrExt; + let bytes = os_str.as_bytes(); + if index <= bytes.len() { + return Ok(OsStr::from_bytes(&bytes[index..]).to_os_string()); + } + } + + #[cfg(not(unix))] + { + // On Windows/others, we can't safely slice raw bytes because + // they use Wtf-8/Utf-16 which might split a surrogate pair. + // We fall back to UTF-8 conversion if possible. + let r = os_str.to_str().and_then(|s| { + if index <= s.len() { + Some(OsString::from(&s[index..])) + } else { + None + } + }); + if let Some(os) = r { + return Ok(os); + } + } + + Err(ParseError::NoUnicode(os_str.to_os_string())) + } +} + +/// Contains all parser errors and their text messages. +/// +/// All errors can be output easily using the normal Display functionality. +/// To format the error message for the typical diffutils output, use [format_error_text]. +#[derive(Debug, Clone, PartialEq)] +pub enum ParseError { + /// When the long option is abbreviated, but does not have a unique match. + /// (ambiguous option, possible options) + AmbiguousOption(OsString, Vec<&'static AppOption>), + + /// 'executable': option '--silent' doesn't allow an argument + /// (wrong option) + ArgForOptionNotAllowed(ParsedOption), + + /// (option, short or long name used) + ArgForOptionMissing(ParsedOption), + + /// Having more operands than allowed (usually 2) + /// (wrong operand) + ExtraOperand(OsString), + + /// Non-existent single dash option. + /// (unidentified option) + InvalidOption(OsString), + + /// number for an option argument incorrect + InvalidValueNumber(ParsedOption), + #[allow(unused)] // Allow external usage (cmp) + InvalidValueNumberUnit(ParsedOption), + #[allow(unused)] // Allow external usage (cmp) + InvalidValueNumberOverflow(ParsedOption), + + /// 'executable' as first parameter missing. + #[allow(unused)] // Allow usage for main function so all parsing errors are covered. + NoExecutable, + + /// no args for the actual utility given + NoOperands(Executable), + + /// Parsed option is not in unicode. + /// Since Rust cannot split OsString on Non-Linux Systems, + /// it can accept the argument for an option only as + /// separate arg (--regex someRegex). + NoUnicode(OsString), + + /// Two options cannot be used together, e.g. cmp --silent and --verbose (output). + #[allow(unused)] // Allow external usage (cmp) + OptionsIncompatible(&'static AppOption, &'static AppOption), + + /// Non-existent long option. This is "unrecognized" because the name can be abbreviated. + /// (unrecognized option) + UnrecognizedOption(OsString), + + NotYetImplemented(String), +} + +impl Display for ParseError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ParseError::AmbiguousOption(param, possible_opts) => { + // create list of possible options + let mut list = Vec::new(); + for opt in possible_opts { + list.push(opt.format_long()); + } + write!( + f, + "option '{}' is ambiguous; possibilities: {}", + param.to_string_lossy(), + list.join(" ") + ) + } + ParseError::ArgForOptionNotAllowed(opt) => write!( + f, + "option {} doesn't allow an argument", + opt.app_option.format_long() + ), + ParseError::ArgForOptionMissing(opt) => { + write!( + f, + "option {} requires an argument", + opt.app_option.format_for_error_msg(), + ) + } + ParseError::ExtraOperand(opt) => write!(f, "extra operand '{}'", opt.to_string_lossy()), + ParseError::InvalidValueNumber(opt) | ParseError::InvalidValueNumberUnit(opt) => { + write!( + f, + "invalid {} value '{}'", + // "invalid argument '{}' for '--{}'{}", + opt.app_option.format_for_error_msg(), + opt.arg_for_option_or_empty_string(), + // opt.short_char_or_empty_string(), + ) + } + ParseError::InvalidValueNumberOverflow(opt) => { + write!( + f, + "invalid {} value '{}' (too large)", + // "invalid argument '{}' for '--{}'{}", + opt.app_option.format_for_error_msg(), + opt.arg_for_option_or_empty_string(), + // opt.short_char_or_empty_string(), + ) + } + ParseError::InvalidOption(param) => { + write!( + f, + "{}", + &format!("invalid option '{}'", param.to_string_lossy()) + ) + } + ParseError::NoExecutable => { + write!(f, "Expected utility name as second argument, got nothing.") + } + ParseError::NoOperands(exe) => { + write!(f, "missing operand after '{exe}'") + } + ParseError::NoUnicode(os) => { + let mut s = OsString::from("Cannot parse non-unicode '"); + s.push(os); + s.push(OsString::from( + "'. Separate the argument from the option, e.g. '--from-file argument' instead '--from-file=argument'", + )); + write!(f, "Expected utility name as second argument, got nothing.") + } + ParseError::OptionsIncompatible(op_1, op_2) => { + write!( + f, + "options {} and {} are incompatible", + op_1.format_for_error_msg(), + op_2.format_for_error_msg() + ) + } + ParseError::UnrecognizedOption(param) => { + write!( + f, + "{}", + &format!("unrecognized option '{}'", param.to_string_lossy()) + ) + } + ParseError::NotYetImplemented(param) => { + write!(f, "{}", &format!("not yet implemented: option {param}")) + } + } + } +} + +#[allow(unused)] // required for cmp +pub struct NumberParser {} + +#[allow(unused)] // required for cmp +impl NumberParser { + /// Parses a number with an optional unit, e.g. 10MiB. + /// + /// Follows . + pub fn parse_number(parsed_option: &ParsedOption) -> Result { + let Some(num_unit) = &parsed_option.arg_for_option else { + return Err(ParseError::InvalidValueNumber(parsed_option.clone())); + }; + if num_unit.is_empty() { + return Err(ParseError::InvalidValueNumber(parsed_option.clone())); + } + + // split number and unit, parse unit + let multiplier: u64; + let n = match num_unit.find(|b: char| !b.is_ascii_digit()) { + Some(pos) => { + if pos == 0 { + return Err(ParseError::InvalidValueNumber(parsed_option.clone())); + } + multiplier = Self::parse_number_unit(&num_unit[pos..], parsed_option)?; + &num_unit[0..pos] + } + None => { + multiplier = 1; + num_unit + } + }; + + // return value + match n.parse::() { + Ok(num) => { + if multiplier == 1 { + Ok(num) + } else { + match num.checked_mul(multiplier) { + Some(r) => Ok(r), + None => Err(ParseError::InvalidValueNumberOverflow( + parsed_option.clone(), + )), + } + } + } + // This is an additional error message not present in GNU DiffUtils. + Err(e) if *e.kind() == std::num::IntErrorKind::PosOverflow => Err( + ParseError::InvalidValueNumberOverflow(parsed_option.clone()), + ), + Err(_) => Err(ParseError::InvalidValueNumber(parsed_option.clone())), + } + } + + /// Parses a number unit, e.g. "KiB" into a multiplier + /// which then can be used to calculate the final number of bytes. + /// + /// # Returns + /// A multiplier depending on the given unit, e.g. 'KiB' -> 1024 + /// or None if unit could not be identified. + /// + /// Units up eo Exabyte (EiB) following GNU documentation: \ + /// . + #[cfg(not(feature = "feat_allow_case_insensitive_number_units"))] + fn parse_number_unit(unit: &str, parsed_option: &ParsedOption) -> Result { + let multiplier = match unit { + "kB" | "KB" => 1_000, + "k" | "K" | "KiB" | "kiB" => 1_024, + "MB" => 1_000_000, + "M" | "MiB" => 1_048_576, + "GB" => 1_000_000_000, + "G" | "GiB" => 1_073_741_824, + + "TB" => 1_000_000_000_000, + "T" | "TiB" => 1_099_511_627_776, + "PB" => 1_000_000_000_000_000, + "P" | "PiB" => 1_125_899_906_842_624, + "EB" => 1_000_000_000_000_000_000, + "E" | "EiB" => 1_152_921_504_606_846_976, + + // Everything above EiB cannot fit into u64. + // GNU cmp just returns an invalid bytes value + // "ZB" => 1_000_000_000_000_000_000_000, + // "Z" | "ZiB" => 1_180_591_620_717_411_303_424, + // "YB" => 1_000_000_000_000_000_000_000_000, + // "Y" | "YiB" => 1_208_925_819_614_629_174_706_176, + _ => { + return Err(ParseError::InvalidValueNumberUnit(parsed_option.clone())); + } + }; + + Ok(multiplier) + } + + /// Returns a multiplier depending on the given unit, e.g. 'KiB' -> 1024, + /// which then can be used to calculate the final number of bytes. + /// Following GNU documentation: https://www.gnu.org/software/diffutils/manual/html_node/cmp-Options.html + #[cfg(feature = "feat_allow_case_insensitive_number_units")] + fn parse_number_unit(unit: &str, parsed_option: &ParsedOption) -> Result { + // Note that GNU cmp advertises supporting up to Y, but fails if you try + // to actually use anything beyond E. + let unit = unit.to_owned().to_ascii_lowercase(); + // .to_ascii_lowercase().as_str(); + let multiplier = match unit.as_str() { + "kb" => 1_000, + "k" | "kib" => 1_024, + "mb" => 1_000_000, + "m" | "mib" => 1_048_576, + "gb" => 1_000_000_000, + "g" | "gib" => 1_073_741_824, + + "tb" => 1_000_000_000_000, + "t" | "tib" => 1_099_511_627_776, + "pb" => 1_000_000_000_000_000, + "p" | "pib" => 1_125_899_906_842_624, + "eb" => 1_000_000_000_000_000_000, + "e" | "eib" => 1_152_921_504_606_846_976, + + // Everything above EiB cannot fit into u64. + // GNU cmp just returns an invalid bytes value + // "zb" => 1_000_000_000_000_000_000_000, + // "z" | "zib" => 1_180_591_620_717_411_303_424, + // "yb" => 1_000_000_000_000_000_000_000_000, + // "y" | "yib" => 1_208_925_819_614_629_174_706_176, + _ => { + return Err(ParseError::InvalidValueNumberUnit(parsed_option.clone())); + } + }; + + Ok(multiplier) + } +} + +/// Differentiates the utilities included in DiffUtil +/// and replaces executable as OsString. +/// +/// This allows easy output of the executable name with +/// ```format!("{}", params.executable)``` +/// without calling ```to_string_lossy()``` each time. +#[derive(Debug, Clone, Eq, PartialEq)] +pub enum Executable { + Cmp, + Diff, + Diff3, + Patch, + SDiff, + + // Called from a library. Stores name. + NotRecognized(OsString), +} + +#[allow(unused)] +impl Executable { + /// Returns the executable name as OsString. \ + /// + /// In case of [Self::NotRecognized], this is the original OsString. + /// + /// The name is mostly used to write it which always requires a String. + pub fn executable(&self) -> OsString { + match self { + Executable::NotRecognized(os_string) => os_string.clone(), + _ => OsString::from(self.to_string()), + } + } + + /// Return as OsString. Same as fn [Self::executable]. + pub fn to_os_string(&self) -> OsString { + self.executable() + } + + /// Read the first arg (the executable) without moving the iterator of args. + /// + /// Returns + /// - Some: [Executable]. + /// - Diffutils: diff, cmp, sdiff, diff3 and patch + /// - NotRecognized(OsString) for all other inputs + /// - None: only if no argument was found. + pub fn from_args_os>( + args: &mut Peekable, + move_iter: bool, + ) -> Option { + if move_iter { + args.next().map(|exe| Self::from(&exe)) + } else { + args.peek().map(Self::from) + } + } +} + +impl From<&OsString> for Executable { + fn from(executable: &OsString) -> Self { + match executable.to_str() { + Some("cmp") => Executable::Cmp, + Some("diff") => Executable::Diff, + Some("diff3") => Executable::Diff3, + Some("patch") => Executable::Patch, + Some("sdiff") => Executable::SDiff, + _ => Executable::NotRecognized(OsString::from(executable)), + } + } +} + +impl Display for Executable { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let name = match self { + Executable::Cmp => "cmp", + Executable::Diff => "diff", + Executable::Diff3 => "diff3", + Executable::SDiff => "sdiff", + Executable::Patch => "patch", + Executable::NotRecognized(name) => &name.to_string_lossy(), + }; + write!(f, "{name}") + } +} diff --git a/src/diff.rs b/src/diff.rs index f4c0614..a705e15 100644 --- a/src/diff.rs +++ b/src/diff.rs @@ -81,7 +81,7 @@ pub fn main(opts: Peekable) -> ExitCode { }), Format::SideBySide => { let mut output = stdout().lock(); - side_diff::diff(&from_content, &to_content, &mut output, ¶ms) + side_diff::diff(&from_content, &to_content, &mut output, &(¶ms).into()) } }; if params.brief && !result.is_empty() { diff --git a/src/lib.rs b/src/lib.rs index 342b01c..8dfa3cf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,9 +1,11 @@ +pub mod arg_parser; pub mod cmp; pub mod context_diff; pub mod ed_diff; pub mod macros; pub mod normal_diff; pub mod params; +pub mod sdiff; pub mod side_diff; pub mod unified_diff; pub mod utils; diff --git a/src/main.rs b/src/main.rs index b7c2712..c66e12f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -11,6 +11,7 @@ use std::{ process::ExitCode, }; +mod arg_parser; mod cmp; mod context_diff; mod diff; @@ -18,6 +19,7 @@ mod ed_diff; mod macros; mod normal_diff; mod params; +mod sdiff; mod side_diff; mod unified_diff; mod utils; @@ -72,6 +74,7 @@ fn main() -> ExitCode { match util_name.to_str() { Some("diff") => diff::main(args), Some("cmp") => cmp::main(args), + Some("sdiff") => sdiff::main(args), Some(name) => { eprintln!("{name}: utility not supported"); ExitCode::from(2) diff --git a/src/sdiff.rs b/src/sdiff.rs new file mode 100644 index 0000000..a8869f7 --- /dev/null +++ b/src/sdiff.rs @@ -0,0 +1,253 @@ +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE-* +// files that was distributed with this source code. + +//! This module holds the core compare logic of sdiff. +pub mod params_sdiff; + +use std::{ + env::ArgsOs, + ffi::OsString, + fs, + io::{self, stdout, Read, Write}, + iter::Peekable, + process::ExitCode, +}; + +use crate::{ + arg_parser::{ + add_copyright, format_error_text, get_version_text, Executable, ParseError, + TEXT_HELP_FOOTER, + }, + sdiff::params_sdiff::{ParamsSDiff, SDiffParseOk}, + side_diff, utils, +}; + +// This contains the hard coded 'sdiff'. If required this needs to be replaced with the executable. +pub const TEXT_HELP: &str = const_format::concatcp!( + r#" + sdiff is a tool which allows to compare two text files for differences. + It outputs the differences in a side-by-side view. + Use 'diff' for a row-by-row view. + Use 'cmp' to compare binary files. + + Usage: sdiff [OPTIONS] [FILE]... + If a FILE is '-', read operating system's standard input. + + Options: + -o, --output=FILE operate interactively while sending output to FILE + --diff-program=PROGRAM use PROGRAM to compare files + -a, --text treat all files as text + -H, --speed-large-files assume large files with many scattered small changes + -d, --minimal try to find a smaller set of changes + + -i, --ignore-case do not distinguish between upper- and lower-case letters + -E, --ignore-tab-expansion ignore changes due to tab expansion + -Z, --ignore-trailing-space ignore white space at line end + -b, --ignore-space-change ignore changes in the amount of white space + -W, --ignore-all-space ignore all white space + -B, --ignore-blank-lines ignore changes whose lines are all blank + -I, --ignore-matching-lines=REGEX ignore changes all whose lines match REGEX expression + --strip-trailing-cr strip trailing carriage return on input + + -s, --suppress-common-lines do not output common lines + -l, --left-column output only the left column of common lines + -t, --expand-tabs expand tabs to spaces in output + --tabsize=NUM tab stops at every NUM (default 8) print columns + -w, --width=NUM limit the print width to NUM print columns (default 130) + + -h --help display this help and exit + -v, --version output version information and exit + + Exit status is 0 if inputs are identical, 1 if different, 2 in error case. + "#, + TEXT_HELP_FOOTER +); + +/// Entry into sdiff. +/// +/// Param options, e.g. 'sdiff file1.txt file2.txt -bd n2000kB'. \ +/// sdiff options as documented in the GNU manual. +/// +/// Ends program with Exit Status: +/// * 0 if inputs are identical +/// * 1 if inputs are different +/// * 2 in error case +pub fn main(mut args: Peekable) -> ExitCode { + let Some(executable) = Executable::from_args_os(&mut args, false) else { + eprintln!("Expected utility name as first argument, got nothing."); + return ExitCode::FAILURE; + }; + match sdiff(args) { + Ok(res) => match res { + SDiffOk::Different => ExitCode::FAILURE, + SDiffOk::Equal => ExitCode::SUCCESS, + SDiffOk::Help => { + println!("{}", add_copyright(TEXT_HELP)); + ExitCode::SUCCESS + } + SDiffOk::Version => { + println!("{}", get_version_text(&executable)); + ExitCode::SUCCESS + } + }, + Err(e) => { + let msg = match e { + SDiffError::ReadFileErrors(_, _) => { + format!("{e}") + } + _ => format_error_text(&executable, &e), + }; + // let msg = format_error_text(&executable, &e); + eprintln!("{msg}"); + ExitCode::from(2) + } + } +} + +/// This is the full sdiff call. +/// +/// The first arg needs to be the executable, then the operands and options. +pub fn sdiff>(mut args: Peekable) -> Result { + let Some(executable) = Executable::from_args_os(&mut args, true) else { + return Err(ParseError::NoExecutable.into()); + }; + // read params + let params = match ParamsSDiff::parse_params(&executable, args)? { + SDiffParseOk::Params(p) => p, + SDiffParseOk::Help => return Ok(SDiffOk::Help), + SDiffParseOk::Version => return Ok(SDiffOk::Version), + }; + // dbg!("{params:?}"); + + // compare files + sdiff_compare(¶ms) +} + +/// This is the main function to compare the files. \ +/// +/// TODO sdiff is missing a number of options, currently implemented: +/// * expand_tabs +/// * tabsize +/// * width +/// * The output format does not match GNU sdiff +pub fn sdiff_compare(params: &ParamsSDiff) -> Result { + if utils::is_same_file(¶ms.from, ¶ms.to) { + return Ok(SDiffOk::Equal); + } + + let (from_content, to_content) = match read_both_files(¶ms.from, ¶ms.to) { + Ok(files) => files, + Err(errors) => { + let mut vs = Vec::new(); + for (file, e) in errors { + let s = utils::format_failure_to_read_input_file( + ¶ms.executable.to_os_string(), + &file, + &e, + ); + vs.push(s); + } + return Err(SDiffError::ReadFileErrors( + params.executable.clone(), + vs.to_vec(), + )); + } + }; + + // run diff + let mut output = stdout().lock(); + let result = side_diff::diff(&from_content, &to_content, &mut output, ¶ms.into()); + + match std::io::stdout().write_all(&result) { + Ok(_) => { + if result.is_empty() { + Ok(SDiffOk::Equal) + } else { + Ok(SDiffOk::Different) + } + } + Err(e) => Err(SDiffError::OutputError(e.to_string())), + } +} + +/// Helper function to read a file fully into memory. +// While this could be in utils, the functionality is limited to files which fit into memory. +// TODO will not work for large files, need buffered approach. +pub fn read_file_contents(filepath: &OsString) -> io::Result> { + if filepath == "-" { + let mut content = Vec::new(); + io::stdin().read_to_end(&mut content).and(Ok(content)) + } else { + fs::read(filepath) + } +} + +/// Reads both files and returns the files or a list of errors, as both files can produce a separate error. +pub type ResultReadBothFiles = Result<(Vec, Vec), Vec<(OsString, io::Error)>>; +/// Reads both files and returns the files or a list of errors, as both files can produce a separate error. +pub fn read_both_files(from: &OsString, to: &OsString) -> ResultReadBothFiles { + let mut read_errors = Vec::new(); + let from_content = match read_file_contents(from).map_err(|e| (from.clone(), e)) { + Ok(r) => r, + Err(e) => { + read_errors.push(e); + Vec::new() + } + }; + let to_content = match read_file_contents(to).map_err(|e| (to.clone(), e)) { + Ok(r) => r, + Err(e) => { + read_errors.push(e); + Vec::new() + } + }; + + if read_errors.is_empty() { + Ok((from_content, to_content)) + } else { + Err(read_errors) + } +} + +/// The Ok result of sdiff. +#[derive(Debug, PartialEq, Clone, Copy)] +pub enum SDiffOk { + Different, + Equal, + Help, + Version, +} + +/// Errors for sdiff. +/// +/// To centralize error messages and make it easier to use in a lib. +#[derive(Debug, Clone, PartialEq)] +#[allow(clippy::enum_variant_names)] +pub enum SDiffError { + // parse errors + ParseError(ParseError), + + // compare errors + OutputError(String), + ReadFileErrors(Executable, Vec), +} + +impl std::error::Error for SDiffError {} + +impl From for SDiffError { + fn from(e: ParseError) -> Self { + Self::ParseError(e) + } +} + +impl std::fmt::Display for SDiffError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + SDiffError::ParseError(e) => write!(f, "{e}"), + SDiffError::OutputError(msg) => write!(f, "{msg}"), + SDiffError::ReadFileErrors(_exe, vec_err) => write!(f, "{}", vec_err.join("\n")), + } + } +} diff --git a/src/sdiff/params_sdiff.rs b/src/sdiff/params_sdiff.rs new file mode 100644 index 0000000..211cf83 --- /dev/null +++ b/src/sdiff/params_sdiff.rs @@ -0,0 +1,488 @@ +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE-* +// files that was distributed with this source code. + +//! This module contains the Parser for sdiff arguments ([ParamsSDiff::parse_params]). +use std::{ffi::OsString, iter::Peekable}; + +use crate::arg_parser::{ + AppOption, Executable, ParseError, ParsedOption, Parser, OPT_HELP, OPT_VERSION, +}; + +// use crate::{ +// arg_parser::{self, AppOption, Executable, ParseError, ParsedOption, OPT_HELP, OPT_VERSION}, +// sdiff::params_sdiff::ParamsSDiff, +// }; + +pub type ResultSDiffParse = Result; + +// AppOptions for sdiff +pub const OPT_DIFF_PROGRAM: AppOption = AppOption { + long_name: "diff-program", + short: None, + has_arg: true, +}; +pub const OPT_EXPAND_TABS: AppOption = AppOption { + long_name: "expand-tabs", + short: Some('t'), + has_arg: false, +}; +pub const OPT_IGNORE_ALL_SPACE: AppOption = AppOption { + long_name: "ignore-all-space", + short: Some('W'), + has_arg: false, +}; +pub const OPT_IGNORE_BLANK_LINES: AppOption = AppOption { + long_name: "ignore-blank-lines", + short: Some('B'), + has_arg: false, +}; +pub const OPT_IGNORE_CASE: AppOption = AppOption { + long_name: "ignore-case", + short: Some('i'), + has_arg: false, +}; +pub const OPT_IGNORE_MATCHING_LINES: AppOption = AppOption { + long_name: "ignore-matching-lines", + short: Some('I'), + has_arg: true, +}; +pub const OPT_IGNORE_SPACE_CHANGE: AppOption = AppOption { + long_name: "ignore-space-change", + short: Some('b'), + has_arg: false, +}; +pub const OPT_IGNORE_TAB_EXPANSION: AppOption = AppOption { + long_name: "ignore-tab-expansion", + short: Some('E'), + has_arg: false, +}; +pub const OPT_IGNORE_TRAILING_SPACE: AppOption = AppOption { + long_name: "ignore-trailing-space", + short: Some('Z'), + has_arg: false, +}; +pub const OPT_LEFT_COLUMN: AppOption = AppOption { + long_name: "left-column", + short: Some('l'), + has_arg: false, +}; +pub const OPT_MINIMAL: AppOption = AppOption { + long_name: "minimal", + short: Some('d'), + has_arg: false, +}; +pub const OPT_OUTPUT: AppOption = AppOption { + long_name: "output", + short: Some('o'), + has_arg: true, +}; +pub const OPT_SPEED_LARGE_FILES: AppOption = AppOption { + long_name: "speed-large-files", + short: Some('H'), + has_arg: false, +}; +pub const OPT_STRIP_TRAILING_CR: AppOption = AppOption { + long_name: "strip-trailing-cr", + short: None, + has_arg: false, +}; +pub const OPT_SUPPRESS_COMMON_LINES: AppOption = AppOption { + long_name: "suppress-common-lines", + short: Some('s'), + has_arg: false, +}; +pub const OPT_TABSIZE: AppOption = AppOption { + long_name: "tabsize", + short: None, + has_arg: true, +}; +pub const OPT_TEXT: AppOption = AppOption { + long_name: "text", + short: Some('a'), + has_arg: false, +}; +pub const OPT_WIDTH: AppOption = AppOption { + long_name: "width", + short: Some('w'), + has_arg: true, +}; + +// Array for ArgParser +pub const APP_OPTIONS: [AppOption; 20] = [ + OPT_DIFF_PROGRAM, + OPT_EXPAND_TABS, + OPT_HELP, + OPT_IGNORE_ALL_SPACE, + OPT_IGNORE_BLANK_LINES, + OPT_IGNORE_CASE, + OPT_IGNORE_MATCHING_LINES, + OPT_IGNORE_SPACE_CHANGE, + OPT_IGNORE_TAB_EXPANSION, + OPT_IGNORE_TRAILING_SPACE, + OPT_LEFT_COLUMN, + OPT_MINIMAL, + OPT_OUTPUT, + OPT_SPEED_LARGE_FILES, + OPT_STRIP_TRAILING_CR, + OPT_SUPPRESS_COMMON_LINES, + OPT_TABSIZE, + OPT_TEXT, + OPT_VERSION, + OPT_WIDTH, +]; + +// These options throw an error, rather than go unnoticed. +#[cfg(feature = "feat_check_not_yet_implemented")] +pub const NOT_YET_IMPLEMENTED: [AppOption; 15] = [ + OPT_DIFF_PROGRAM, + OPT_IGNORE_ALL_SPACE, + OPT_IGNORE_BLANK_LINES, + OPT_IGNORE_CASE, + OPT_IGNORE_MATCHING_LINES, + OPT_IGNORE_SPACE_CHANGE, + OPT_IGNORE_TAB_EXPANSION, + OPT_IGNORE_TRAILING_SPACE, + OPT_LEFT_COLUMN, + OPT_MINIMAL, + OPT_OUTPUT, + OPT_SPEED_LARGE_FILES, + OPT_STRIP_TRAILING_CR, + OPT_SUPPRESS_COMMON_LINES, + OPT_TEXT, +]; + +/// Parser Result Ok Enum with Params. +/// +/// # Returns +/// - Params in normal cases +/// - Just Help or Version when these are requested as the params are then not relevant. +/// +/// Error will be returned as [ParseError] in the function Result Error. +#[derive(Debug, PartialEq)] +pub enum SDiffParseOk { + Params(ParamsSDiff), + Help, + Version, +} + +/// Holds the given command line arguments except "--version" and "--help". +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct ParamsSDiff { + /// Identifier + pub executable: Executable, + pub from: OsString, + pub to: OsString, + /// --diff-program=PROGRAM use PROGRAM to compare files + pub diff_program: Option, + /// -t, --expand-tabs expand tabs to spaces in output + pub expand_tabs: bool, + /// --help display this help and exit + pub help: bool, + /// -W, --ignore-all-space ignore all white space + pub ignore_all_space: bool, + /// -B, --ignore-blank-lines ignore changes whose lines are all blank + pub ignore_blank_lines: bool, + /// -i, --ignore-case consider upper- and lower-case to be the same + pub ignore_case: bool, + /// -I, --ignore-matching-lines=REGEXP ignore changes all whose lines match REGEXP + pub ignore_matching_lines: Option, + /// -b, --ignore-space-change ignore changes in the amount of white space + pub ignore_space_change: bool, + /// -E, --ignore-tab-expansion ignore changes due to tab expansion + pub ignore_tab_expansion: bool, + /// -Z, --ignore-trailing-space ignore white space at line end + pub ignore_trailing_space: bool, + /// -l, --left-column output only the left column of common lines + pub left_column: bool, + /// -d, --minimal try hard to find a smaller set of changes + pub minimal: bool, + /// -o, --output=FILE operate interactively, sending output to FILE + pub output: Option, + /// -H, --speed-large-files assume large files, many scattered small changes + pub speed_large_files: bool, + /// --strip-trailing-cr strip trailing carriage return on input + pub strip_trailing_cr: bool, + /// -s, --suppress-common-lines do not output common lines + pub suppress_common_lines: bool, + /// --tabsize=NUM tab stops at every NUM (default 8) print columns + pub tabsize: usize, + /// -a, --text treat all files as text + pub text: bool, + /// -v, --version output version information and exit + pub version: bool, + /// -w, --width=NUM output at most NUM (default 130) print columns + pub width: usize, +} + +impl Default for ParamsSDiff { + fn default() -> Self { + Self { + executable: Executable::SDiff, + from: Default::default(), + to: Default::default(), + diff_program: Default::default(), + expand_tabs: Default::default(), + help: Default::default(), + ignore_all_space: Default::default(), + ignore_blank_lines: Default::default(), + ignore_case: Default::default(), + ignore_matching_lines: Default::default(), + ignore_space_change: Default::default(), + ignore_tab_expansion: Default::default(), + ignore_trailing_space: Default::default(), + left_column: Default::default(), + minimal: Default::default(), + output: Default::default(), + speed_large_files: Default::default(), + strip_trailing_cr: Default::default(), + suppress_common_lines: Default::default(), + tabsize: 8, + text: Default::default(), + version: Default::default(), + width: 130, + } + } +} + +impl ParamsSDiff { + /// Parses the program arguments. + /// + /// The arguments must not contain the executable. + pub fn parse_params>( + executable: &Executable, + args: Peekable, + ) -> ResultSDiffParse { + let parser = Parser::parse_params(&APP_OPTIONS, args)?; + + // check implemented options + #[cfg(feature = "feat_check_not_yet_implemented")] + { + crate::arg_parser::is_implemented(&parser.options_parsed, &NOT_YET_IMPLEMENTED)?; + } + + let mut params = Self { + executable: executable.clone(), + ..Default::default() + }; + + // set options + for parsed_option in &parser.options_parsed { + // dbg!(parsed_option); + match *parsed_option.app_option { + OPT_DIFF_PROGRAM => params.diff_program = parsed_option.arg_for_option.clone(), + OPT_EXPAND_TABS => params.expand_tabs = true, + OPT_HELP => return Ok(SDiffParseOk::Help), + OPT_IGNORE_ALL_SPACE => params.ignore_all_space = true, + OPT_IGNORE_BLANK_LINES => params.ignore_blank_lines = true, + OPT_IGNORE_CASE => params.ignore_case = true, + OPT_IGNORE_MATCHING_LINES => { + params.ignore_matching_lines = parsed_option.arg_for_option.clone() + } + OPT_IGNORE_SPACE_CHANGE => params.ignore_space_change = true, + OPT_IGNORE_TAB_EXPANSION => params.ignore_tab_expansion = true, + OPT_IGNORE_TRAILING_SPACE => params.ignore_trailing_space = true, + OPT_LEFT_COLUMN => params.left_column = true, + OPT_MINIMAL => params.minimal = true, + OPT_OUTPUT => params.output = parsed_option.arg_for_option.clone(), + OPT_SPEED_LARGE_FILES => params.speed_large_files = true, + OPT_STRIP_TRAILING_CR => params.strip_trailing_cr = true, + OPT_SUPPRESS_COMMON_LINES => params.suppress_common_lines = true, + OPT_TABSIZE => { + params.set_tabsize(parsed_option)?; + } + OPT_TEXT => params.text = true, + OPT_VERSION => return Ok(SDiffParseOk::Version), + OPT_WIDTH => { + params.set_width(parsed_option)?; + } + + // This is not an error, but a todo. Unfortunately an Enum is not possible. + _ => todo!("Err Option: {}", parsed_option.app_option.long_name), + } + } + + // set operands + match parser.operands.len() { + 0 => return Err(ParseError::NoOperands(executable.clone())), + // If only file_1 is set, then file_2 defaults to '-', so it reads from StandardInput. + 1 => { + params.from = parser.operands[0].clone(); + params.to = OsString::from("-"); + } + 2 => { + params.from = parser.operands[0].clone(); + params.to = parser.operands[1].clone(); + } + _ => { + return Err(ParseError::ExtraOperand(parser.operands[2].clone())); + } + } + + // dbg!(¶ms); + Ok(SDiffParseOk::Params(params)) + } + + pub fn set_tabsize(&mut self, parsed_option: &ParsedOption) -> Result { + let tab_size = parsed_option.arg_for_option.clone().unwrap_or_default(); + let t = match tab_size.parse::() { + Ok(w) => w, + Err(_) => return Err(ParseError::InvalidValueNumber(parsed_option.clone())), + }; + self.tabsize = t; + + Ok(t) + } + + pub fn set_width(&mut self, parsed_option: &ParsedOption) -> Result { + let width = parsed_option.arg_for_option.clone().unwrap_or_default(); + let w = match width.parse::() { + Ok(w) => w, + Err(_) => return Err(ParseError::InvalidValueNumber(parsed_option.clone())), + }; + self.width = w; + + Ok(w) + } +} + +// Usually assert is used like assert_eq(test result, expected result). +#[cfg(test)] +mod tests { + use super::*; + + fn os(s: &str) -> OsString { + OsString::from(s) + } + + /// Simplify call of parser, just pass a normal string like in the Terminal. + fn parse(args: &str) -> ResultSDiffParse { + let mut o = Vec::new(); + for arg in args.split(' ') { + o.push(os(arg)); + } + let mut p = o.into_iter().peekable(); + // remove executable + let executable = Executable::from_args_os(&mut p, true).unwrap(); + + ParamsSDiff::parse_params(&executable, p) + } + + fn res_ok(params: ParamsSDiff) -> ResultSDiffParse { + Ok(SDiffParseOk::Params(params)) + } + + #[test] + fn positional() { + // file_1 and file_2 given + assert_eq!( + parse("sdiff foo bar"), + res_ok(ParamsSDiff { + executable: Executable::SDiff, + from: os("foo"), + to: os("bar"), + ..Default::default() + }), + ); + + // file_1 only + assert_eq!( + parse("sdiff foo"), + res_ok(ParamsSDiff { + executable: Executable::SDiff, + from: os("foo"), + to: os("-"), + ..Default::default() + }), + ); + + // double dash without operand + assert_eq!( + parse("sdiff foo -- --help"), + res_ok(ParamsSDiff { + executable: Executable::SDiff, + from: os("foo"), + to: os("--help"), + ..Default::default() + }), + ); + + // Err: no arguments + let msg = "missing operand after 'sdiff'"; + match parse("sdiff") { + Ok(_) => assert!(false, "Should not be ok!"), + Err(e) => assert!( + e.to_string().contains(msg), + "error must contain: \"{msg}\"\nactual error: \"{e}\"" + ), + } + + // Err: too many operands + let msg = "extra operand 'should-not-be-here'"; + match parse("sdiff foo bar should-not-be-here") { + Ok(_) => assert!(false, "Should not be ok!"), + Err(e) => assert!( + e.to_string().contains(msg), + "error must contain: \"{msg}\"\nactual error: \"{e}\"" + ), + } + } + + #[test] + fn execution_modes() { + // Test all options + // Disable feature "feat_check_not_yet_implemented" + // I^A is at the end of the single options, forcing '^A' as argument for 'I'. + // --wi is abbreviated and uses equal sign + // diff-program uses next arg + // -O uses next arg + let params = ParamsSDiff { + executable: Executable::SDiff, + from: os("foo"), + to: os("bar"), + diff_program: Some("prg".to_string()), + expand_tabs: true, + help: false, + ignore_all_space: true, + ignore_blank_lines: true, + ignore_case: true, + ignore_matching_lines: Some("^A".to_string()), + ignore_space_change: true, + ignore_tab_expansion: true, + ignore_trailing_space: true, + left_column: true, + minimal: true, + output: Some("out".to_string()), + speed_large_files: true, + strip_trailing_cr: true, + suppress_common_lines: true, + tabsize: 2, + text: true, + version: false, + width: 150, + }; + let r = parse( + "sdiff foo bar -iEZbWBalstdHI^A --wi=150 --diff-program prg -o out --strip --tab=2", + ); + match &r { + Ok(_) => assert_eq!(r, res_ok(params.clone())), + Err(e) => match e { + ParseError::NotYetImplemented(_) => {} + _ => assert_eq!(r, res_ok(params.clone())), + }, + } + + // negative value + // let msg = "invalid argument '-2' for '--tabsize'"; + let msg = "invalid --tabsize value '-2'"; + let r = parse("sdiff foo bar --tab=-2"); + match r { + Ok(_) => assert!(false, "Should not be Ok."), + Err(e) => assert!( + e.to_string().contains(msg), + "Must contain: {msg}\nactual: {e}" + ), + } + } +} diff --git a/src/side_diff.rs b/src/side_diff.rs index 56953d2..8d4bc7a 100644 --- a/src/side_diff.rs +++ b/src/side_diff.rs @@ -8,7 +8,7 @@ use diff::Result; use std::{io::Write, vec}; use unicode_width::UnicodeWidthStr; -use crate::params::Params; +use crate::sdiff::params_sdiff::ParamsSDiff; const GUTTER_WIDTH_MIN: usize = 3; @@ -98,6 +98,34 @@ impl Config { } } +/// Params for side_diff, so the functions can be used by multiple modules (diff and sdiff) +#[derive(Default)] +pub struct Params { + pub expand_tabs: bool, + pub tabsize: usize, + pub width: usize, +} + +impl From<&crate::params::Params> for Params { + fn from(param: &crate::params::Params) -> Self { + Self { + expand_tabs: param.expand_tabs, + tabsize: param.tabsize, + width: param.width, + } + } +} + +impl From<&ParamsSDiff> for Params { + fn from(param: &ParamsSDiff) -> Self { + Self { + expand_tabs: param.expand_tabs, + tabsize: param.tabsize, + width: param.width, + } + } +} + fn format_tabs_and_spaces( from: usize, to: usize, @@ -1093,7 +1121,7 @@ mod tests { let mut output = vec![]; diff(from_file, to_file, &mut output, ¶ms); - assert_eq!(output, vec![]); + assert_eq!(output, Vec::::new()); } #[test] diff --git a/src/utils.rs b/src/utils.rs index daca18d..325c4f1 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -4,7 +4,10 @@ // files that was distributed with this source code. use regex::Regex; -use std::{ffi::OsString, io::Write}; +use std::{ + ffi::{OsStr, OsString}, + io::Write, +}; use unicode_width::UnicodeWidthStr; /// Replace tabs by spaces in the input line. @@ -13,11 +16,11 @@ use unicode_width::UnicodeWidthStr; #[must_use] pub fn do_expand_tabs(line: &[u8], tabsize: usize) -> Vec { let tab = b'\t'; - let ntabs = line.iter().filter(|c| **c == tab).count(); - if ntabs == 0 { + let n_tabs = line.iter().filter(|c| **c == tab).count(); + if n_tabs == 0 { return line.to_vec(); } - let mut result = Vec::with_capacity(line.len() + ntabs * (tabsize - 1)); + let mut result = Vec::with_capacity(line.len() + n_tabs * (tabsize - 1)); let mut offset = 0; let mut iter = line.split(|c| *c == tab).peekable(); @@ -71,6 +74,11 @@ pub fn get_modification_time(file_path: &str) -> String { modification_time } +/// Checks if files are the same (same file link), which must return 'equal'. +pub fn is_same_file(from: &OsStr, to: &OsStr) -> bool { + (from == "-" && to == "-") || same_file::is_same_file(from, to).unwrap_or(false) +} + pub fn format_failure_to_read_input_file( executable: &OsString, filepath: &OsString,