From 94533ede69af04dc77fccc0796f5dd5e104692c9 Mon Sep 17 00:00:00 2001 From: Scott Aron Bloom Date: Sat, 27 Jan 2024 09:07:59 -0800 Subject: [PATCH 01/15] All unit tests pass in new c++17 port (no Qt) --- AUTHORS | 2 +- cpp17/CMakeLists.txt | 44 + cpp17/diff_match_patch.cpp | 2557 +++++++++++++++++++++++++++++++ cpp17/diff_match_patch.h | 671 ++++++++ cpp17/diff_match_patch.pro | 19 + cpp17/diff_match_patch_test.cpp | 1128 ++++++++++++++ cpp17/diff_match_patch_test.h | 236 +++ cpp17/include.cmake | 23 + 8 files changed, 4679 insertions(+), 1 deletion(-) create mode 100644 cpp17/CMakeLists.txt create mode 100644 cpp17/diff_match_patch.cpp create mode 100644 cpp17/diff_match_patch.h create mode 100644 cpp17/diff_match_patch.pro create mode 100644 cpp17/diff_match_patch_test.cpp create mode 100644 cpp17/diff_match_patch_test.h create mode 100644 cpp17/include.cmake diff --git a/AUTHORS b/AUTHORS index c82809e7..90c65e95 100644 --- a/AUTHORS +++ b/AUTHORS @@ -7,4 +7,4 @@ Duncan Cross (Lua port) Jan Weiß (Objective C port) Matthaeus G. Chajdas (C# port) Mike Slemmer (C++ port) - +Scott Aron Bloom (C++11 port-remove Qt dependency) diff --git a/cpp17/CMakeLists.txt b/cpp17/CMakeLists.txt new file mode 100644 index 00000000..e58cbd06 --- /dev/null +++ b/cpp17/CMakeLists.txt @@ -0,0 +1,44 @@ +cmake_minimum_required(VERSION 3.22) + +find_package(IncludeProjectSettings REQUIRED) +include( ${CMAKE_CURRENT_LIST_DIR}/include.cmake ) +project( ${_PROJECT_NAME} ) +IncludeProjectSettings(QT ${USE_QT}) +add_library(${_PROJECT_NAME} STATIC + ${_PROJECT_DEPENDENCIES} +) +set_target_properties( ${_PROJECT_NAME} PROPERTIES FOLDER ${FOLDER_NAME} ) +target_include_directories( ${_PROJECT_NAME} PUBLIC ${CMAKE_SOURCE_DIR} ) +target_link_libraries( ${_PROJECT_NAME} + PUBLIC + ${project_pub_DEPS} + PRIVATE + ${project_pri_DEPS} +) + +set( testProjectName "" ) +SET( TEST_SOURCE_FILES + diff_match_patch_test.cpp + diff_match_patch_test.h + ) + +SAB_UNIT_TEST(diff_match_patch_cpp17 + diff_match_patch_test.cpp + "gtest;gmock;diff_match_patch_cpp17" + testProjectName + ${TEST_SOURCE_FILES} + ) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED YES) +set_target_properties( ${testProjectName} PROPERTIES + VS_DEBUGGER_WORKING_DIRECTORY "$" + VS_DEBUGGER_COMMAND "$" + VS_DEBUGGER_ENVIRONMENT "PATH=${DEBUG_PATH}" +) +if( WIN32 ) + set_target_properties( ${testProjectName} PROPERTIES + CXX_STANDARD 17 + ) +target_include_directories( ${testProjectName} PUBLIC ${CMAKE_SOURCE_DIR}) +endif() diff --git a/cpp17/diff_match_patch.cpp b/cpp17/diff_match_patch.cpp new file mode 100644 index 00000000..e99be5d5 --- /dev/null +++ b/cpp17/diff_match_patch.cpp @@ -0,0 +1,2557 @@ +/* + * Diff Match and Patch + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "diff_match_patch.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +////////////////////////// +// +// Diff Class +// +////////////////////////// + +/** + * Constructor. Initializes the diff with the provided values. + * @param operation One of INSERT, DELETE or EQUAL + * @param text The text being applied + */ +Diff::Diff( Operation _operation, const std::wstring &_text ) : + operation( _operation ), + text( _text ) +{ + // Construct a diff with the specified operation and text. +} + +Diff::Diff() +{ +} + +Diff::Diff( Operation _operation, const wchar_t *_text ) : + Diff( _operation, ( _text ? std::wstring( _text ) : std::wstring( L"" ) ) ) +{ +} + +std::wstring Diff::strOperation( Operation op ) +{ + switch ( op ) + { + case INSERT: + return L"INSERT"; + case DELETE: + return L"DELETE"; + case EQUAL: + return L"EQUAL"; + } + throw "Invalid operation."; +} + +/** + * Display a human-readable version of this Diff. + * @return text version + */ +std::wstring Diff::toString() const +{ + std::wstring prettyText = text; + // Replace linebreaks with Pilcrow signs. + std::replace( prettyText.begin(), prettyText.end(), L'\n', L'\u00b6' ); + return std::wstring( L"Diff(" ) + strOperation( operation ) + std::wstring( L",\"" ) + prettyText + std::wstring( L"\")" ); +} + +/** + * Is this Diff equivalent to another Diff? + * @param d Another Diff to compare against + * @return true or false + */ +bool Diff::operator==( const Diff &d ) const +{ + return ( d.operation == this->operation ) && ( d.text == this->text ); +} + +bool Diff::operator!=( const Diff &d ) const +{ + return !( operator==( d ) ); +} + +///////////////////////////////////////////// +// +// Patch Class +// +///////////////////////////////////////////// + +/** + * Constructor. Initializes with an empty list of diffs. + */ +Patch::Patch() +{ +} + +Patch::Patch( std::wstring &text ) +{ + std::wsmatch matches; + auto patchHeader = std::wregex( LR"(^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$)" ); + if ( !std::regex_match( text, matches, patchHeader ) || ( matches.size() != 5 ) ) + { + throw std::wstring( L"Invalid patch string: " + text ); + } + start1 = diff_match_patch::toInt( matches[ 1 ].str() ); + if ( !matches[ 2 ].length() ) + { + start1--; + length1 = 1; + } + else if ( matches[ 2 ].str() == L"0" ) + { + length1 = 0; + } + else + { + start1--; + length1 = diff_match_patch::toInt( matches[ 2 ].str() ); + } + + start2 = diff_match_patch::toInt( matches[ 3 ].str() ); + if ( !matches[ 4 ].length() ) + { + start2--; + length2 = 1; + } + else if ( matches[ 4 ].str() == L"0" ) + { + length2 = 0; + } + else + { + start2--; + length2 = diff_match_patch::toInt( matches[ 4 ].str() ); + } + text.erase( text.begin() ); +} + +bool Patch::isNull() const +{ + if ( start1 == 0 && start2 == 0 && length1 == 0 && length2 == 0 && diffs.empty() ) + { + return true; + } + return false; +} + +/** + * Emulate GNU diff's format. + * Header: @@ -382,8 +481,9 @@ + * Indices are printed as 1-based, not 0-based. + * @return The GNU diff string + */ +std::wstring Patch::toString() const +{ + auto text = getPatchHeader(); + // Escape the body of the patch with %xx notation. + for ( auto &&aDiff : diffs ) + { + switch ( aDiff.operation ) + { + case INSERT: + text += L"+"; + break; + case DELETE: + text += L"-"; + break; + case EQUAL: + text += L" "; + break; + } + text += std::wstring( diff_match_patch::toPercentEncoding( aDiff.text, L" !~*'();/?:@&=+$,#" ) ) + std::wstring( L"\n" ); + } + + return text; +} + +std::wstring Patch::getPatchHeader() const +{ + auto coords1 = getCoordinateString( start1, length1 ); + auto coords2 = getCoordinateString( start2, length2 ); + auto text = std::wstring( L"@@ -" ) + coords1 + std::wstring( L" +" ) + coords2 + std::wstring( L" @@\n" ); + return text; +} + +std::wstring Patch::getCoordinateString( std::size_t start, std::size_t length ) const +{ + std::wstring retVal; + if ( length == 0 ) + { + retVal = std::to_wstring( start ) + std::wstring( L",0" ); + } + else if ( length == 1 ) + { + retVal = std::to_wstring( start + 1 ); + } + else + { + retVal = std::to_wstring( start + 1 ) + std::wstring( L"," ) + std::to_wstring( length ); + } + return retVal; +} + +///////////////////////////////////////////// +// +// diff_match_patch Class +// +///////////////////////////////////////////// + +// all class members initialized in the class +diff_match_patch::diff_match_patch() +{ +} + +TDiffVector diff_match_patch::diff_main( const std::wstring &text1, const std::wstring &text2 ) +{ + return diff_main( text1, text2, true ); +} + +TDiffVector diff_match_patch::diff_main( const std::wstring &text1, const std::wstring &text2, bool checklines ) +{ + // Set a deadline by which time the diff must be complete. + clock_t deadline; + if ( Diff_Timeout <= 0 ) + { + deadline = std::numeric_limits< clock_t >::max(); + } + else + { + deadline = clock() + (clock_t)( Diff_Timeout * CLOCKS_PER_SEC ); + } + return diff_main( text1, text2, checklines, deadline ); +} + +TDiffVector diff_match_patch::diff_main( const std::wstring &text1, const std::wstring &text2, bool checklines, clock_t deadline ) +{ + // Check for equality (speedup). + TDiffVector diffs; + if ( text1 == text2 ) + { + if ( !text1.empty() ) + { + diffs.emplace_back( EQUAL, text1 ); + } + return diffs; + } + + if ( !text1.empty() && text2.empty() ) + { + diffs.emplace_back( DELETE, text1 ); + return diffs; + } + + if ( text1.empty() && !text2.empty() ) + { + diffs.emplace_back( INSERT, text2 ); + return diffs; + } + + // Trim off common prefix (speedup). + auto commonlength = diff_commonPrefix( text1, text2 ); + auto commonprefix = text1.substr( 0, commonlength ); + auto textChopped1 = text1.substr( commonlength ); + auto textChopped2 = text2.substr( commonlength ); + + // Trim off common suffix (speedup). + commonlength = diff_commonSuffix( textChopped1, textChopped2 ); + auto commonsuffix = textChopped1.substr( textChopped1.length() - commonlength ); + textChopped1 = textChopped1.substr( 0, textChopped1.length() - commonlength ); + textChopped2 = textChopped2.substr( 0, textChopped2.length() - commonlength ); + + // Compute the diff on the middle block. + diffs = diff_compute( textChopped1, textChopped2, checklines, deadline ); + + // Restore the prefix and suffix. + if ( !commonprefix.empty() ) + { + diffs.emplace( diffs.begin(), EQUAL, commonprefix ); + } + if ( !commonsuffix.empty() ) + { + diffs.emplace_back( EQUAL, commonsuffix ); + } + + diff_cleanupMerge( diffs ); + + return diffs; +} + +TDiffVector diff_match_patch::diff_compute( const std::wstring &text1, const std::wstring &text2, bool checklines, clock_t deadline ) +{ + TDiffVector diffs; + + if ( text1.empty() ) + { + // Just add some text (speedup). + diffs.emplace_back( INSERT, text2 ); + return diffs; + } + + if ( text2.empty() ) + { + // Just delete some text (speedup). + diffs.emplace_back( DELETE, text1 ); + return diffs; + } + + { + auto [ longtext, shorttext ] = ( text1.length() > text2.length() ) ? std::make_pair( text1, text2 ) : std::make_pair( text2, text1 ); + auto i = longtext.find( shorttext ); + if ( i != std::string::npos ) + { + // Shorter text is inside the longer text (speedup). + const Operation op = ( text1.length() > text2.length() ) ? DELETE : INSERT; + diffs.emplace_back( op, longtext.substr( 0, i ) ); + diffs.emplace_back( EQUAL, shorttext ); + diffs.emplace_back( op, safeMid( longtext, i + shorttext.length() ) ); + return diffs; + } + + if ( shorttext.length() == 1 ) + { + // Single character string. + // After the previous speedup, the character can't be an equality. + diffs.emplace_back( DELETE, text1 ); + diffs.emplace_back( INSERT, text2 ); + return diffs; + } + // Garbage collect longtext and shorttext by scoping out. + } + + // Check to see if the problem can be split in two. + const TStringVector hm = diff_halfMatch( text1, text2 ); + if ( !hm.empty() ) + { + // A half-match was found, sort out the return data. + auto &&text1_a = hm[ 0 ]; + auto &&text1_b = hm[ 1 ]; + auto &&text2_a = hm[ 2 ]; + auto &&text2_b = hm[ 3 ]; + auto &&mid_common = hm[ 4 ]; + // Send both pairs off for separate processing. + diffs = diff_main( text1_a, text2_a, checklines, deadline ); + const TDiffVector diffs_b = diff_main( text1_b, text2_b, checklines, deadline ); + // Merge the results. + diffs.emplace_back( EQUAL, mid_common ); + diffs.insert( diffs.end(), diffs_b.begin(), diffs_b.end() ); + return diffs; + } + + // Perform a real diff. + if ( checklines && ( text1.length() > 100 ) && ( text2.length() > 100 ) ) + { + return diff_lineMode( text1, text2, deadline ); + } + + return diff_bisect( text1, text2, deadline ); +} + +TDiffVector diff_match_patch::diff_lineMode( std::wstring text1, std::wstring text2, clock_t deadline ) +{ + // Scan the text on a line-by-line basis first. + auto a = diff_linesToChars( text1, text2 ); + text1 = std::get< std::wstring >( a[ 0 ] ); + text2 = std::get< std::wstring >( a[ 1 ] ); + auto linearray = std::get< TStringVector >( a[ 2 ] ); + + auto diffs = diff_main( text1, text2, false, deadline ); + + // Convert the diff back to original text. + diff_charsToLines( diffs, linearray ); + // Eliminate freak matches (e.g. blank lines) + diff_cleanupSemantic( diffs ); + + // Rediff any replacement blocks, this time character-by-character. + // Add a dummy entry at the end. + diffs.emplace_back( EQUAL, L"" ); + std::size_t pointer = 0; + int count_delete = 0; + int count_insert = 0; + std::wstring text_delete; + std::wstring text_insert; + while ( pointer < diffs.size() ) + { + switch ( diffs[ pointer ].operation ) + { + case INSERT: + count_insert++; + text_insert += diffs[ pointer ].text; + break; + case DELETE: + count_delete++; + text_delete += diffs[ pointer ].text; + break; + case EQUAL: + // Upon reaching an equality, check for prior redundancies. + if ( count_delete >= 1 && count_insert >= 1 ) + { + // Delete the offending records and add the merged ones. + auto numElements = count_delete + count_insert; + auto start = diffs.begin() + pointer - numElements; + auto end = start + numElements; + diffs.erase( start, end ); + pointer = pointer - count_delete - count_insert; + auto subDiff = diff_main( text_delete, text_insert, false, deadline ); + diffs.insert( diffs.begin() + pointer, subDiff.begin(), subDiff.end() ); + pointer = pointer + subDiff.size(); + } + count_insert = 0; + count_delete = 0; + text_delete.clear(); + text_insert.clear(); + break; + } + pointer++; + } + diffs.pop_back(); // Remove the dummy entry at the end. + + return diffs; +} + +// using int64_t rather thant size_t due to the backward walking nature of the algorithm +TDiffVector diff_match_patch::diff_bisect( const std::wstring &text1, const std::wstring &text2, clock_t deadline ) +{ + // Cache the text lengths to prevent multiple calls. + auto text1_length = static_cast< int64_t >( text1.length() ); + auto text2_length = static_cast< int64_t >( text2.length() ); + auto max_d = ( text1_length + text2_length + 1 ) / 2; + auto v_offset = max_d; + auto v_length = 2 * max_d; + auto v1 = std::vector< int64_t >( v_length, -1 ); + auto v2 = std::vector< int64_t >( v_length, -1 ); + v1[ v_offset + 1 ] = 0; + v2[ v_offset + 1 ] = 0; + auto delta = text1_length - text2_length; + // If the total number of characters is odd, then the front path will + // collide with the reverse path. + bool front = ( delta % 2 != 0 ); + // Offsets for start and end of k loop. + // Prevents mapping of space beyond the grid. + int64_t k1start = 0; + int64_t k1end = 0; + int64_t k2start = 0; + int64_t k2end = 0; + for ( int64_t d = 0; d < max_d; d++ ) + { + // Bail out if deadline is reached. + if ( clock() > deadline ) + { + break; + } + + // Walk the front path one step. + for ( auto k1 = -d + k1start; k1 <= d - k1end; k1 += 2 ) + { + auto k1_offset = v_offset + k1; + int64_t x1; + if ( ( k1 == -d ) || ( k1 != d ) && ( v1[ k1_offset - 1 ] < v1[ k1_offset + 1 ] ) ) + { + x1 = v1[ k1_offset + 1 ]; + } + else + { + x1 = v1[ k1_offset - 1 ] + 1; + } + int64_t y1 = x1 - k1; + while ( ( x1 < text1_length ) && ( y1 < text2_length ) && ( text1[ x1 ] == text2[ y1 ] ) ) + { + x1++; + y1++; + } + v1[ k1_offset ] = x1; + if ( x1 > text1_length ) + { + // Ran off the right of the graph. + k1end += 2; + } + else if ( y1 > text2_length ) + { + // Ran off the bottom of the graph. + k1start += 2; + } + else if ( front ) + { + auto k2_offset = v_offset + delta - k1; + if ( ( k2_offset >= 0 ) && ( k2_offset < v_length ) && ( v2[ k2_offset ] != -1 ) ) + { + // Mirror x2 onto top-left coordinate system. + auto x2 = text1_length - v2[ k2_offset ]; + if ( x1 >= x2 ) + { + // Overlap detected. + return diff_bisectSplit( text1, text2, x1, y1, deadline ); + } + } + } + } + + // Walk the reverse path one step. + for ( auto k2 = -d + k2start; k2 <= d - k2end; k2 += 2 ) + { + auto k2_offset = v_offset + k2; + int64_t x2; + if ( ( k2 == -d ) || ( k2 != d ) && ( v2[ k2_offset - 1 ] < v2[ k2_offset + 1 ] ) ) + { + x2 = v2[ k2_offset + 1 ]; + } + else + { + x2 = v2[ k2_offset - 1 ] + 1; + } + auto y2 = x2 - k2; + while ( ( x2 < text1_length ) && ( y2 < text2_length ) && ( text1[ text1_length - x2 - 1 ] == text2[ text2_length - y2 - 1 ] ) ) + { + x2++; + y2++; + } + v2[ k2_offset ] = x2; + if ( x2 > text1_length ) + { + // Ran off the left of the graph. + k2end += 2; + } + else if ( y2 > text2_length ) + { + // Ran off the top of the graph. + k2start += 2; + } + else if ( !front ) + { + auto k1_offset = v_offset + delta - k2; + if ( ( k1_offset >= 0 ) && ( k1_offset < v_length ) && ( v1[ k1_offset ] != -1 ) ) + { + auto x1 = v1[ k1_offset ]; + auto y1 = v_offset + x1 - k1_offset; + // Mirror x2 onto top-left coordinate system. + x2 = text1_length - v2[ k2_offset ]; + if ( x1 >= x2 ) + { + // Overlap detected. + return diff_bisectSplit( text1, text2, x1, y1, deadline ); + } + } + } + } + } + // Diff took too long and hit the deadline or + // number of diffs equals number of characters, no commonality at all. + auto diffs = TDiffVector( { Diff( DELETE, text1 ), Diff( INSERT, text2 ) } ); + return diffs; +} + +TDiffVector diff_match_patch::diff_bisectSplit( const std::wstring &text1, const std::wstring &text2, std::size_t x, std::size_t y, clock_t deadline ) +{ + auto text1a = text1.substr( 0, x ); + auto text2a = text2.substr( 0, y ); + auto text1b = safeMid( text1, x ); + auto text2b = safeMid( text2, y ); + + // Compute both diffs serially. + TDiffVector diffs = diff_main( text1a, text2a, false, deadline ); + TDiffVector diffsb = diff_main( text1b, text2b, false, deadline ); + + diffs.insert( diffs.end(), diffsb.begin(), diffsb.end() ); + return diffs; +} + +TVariantVector diff_match_patch::diff_linesToChars( const std::wstring &text1, const std::wstring &text2 ) +{ + TStringVector lineArray; + std::unordered_map< std::wstring, std::size_t > lineHash; + // e.g. linearray[4] == "Hello\n" + // e.g. linehash.get("Hello\n") == 4 + + // "\x00" is a valid character, but various debuggers don't like it. + // So we'll insert a junk entry to avoid generating a nullptr character. + lineArray.emplace_back( L"" ); + + const std::wstring chars1 = diff_linesToCharsMunge( text1, lineArray, lineHash ); + const std::wstring chars2 = diff_linesToCharsMunge( text2, lineArray, lineHash ); + + TVariantVector listRet; + listRet.emplace_back( chars1 ); + listRet.emplace_back( chars2 ); + listRet.emplace_back( lineArray ); + return listRet; +} + +std::wstring diff_match_patch::diff_linesToCharsMunge( const std::wstring &text, TStringVector &lineArray, std::unordered_map< std::wstring, std::size_t > &lineHash ) +{ + std::size_t lineStart = 0; + std::size_t lineEnd = std::string::npos; + std::wstring line; + std::wstring chars; + // Walk the text, pulling out a substring for each line. + // text.split('\n') would would temporarily double our memory footprint. + // Modifying text would create many large strings to garbage collect. + bool firstTime = true; + while ( ( firstTime && ( lineEnd == -1 ) ) || lineEnd < ( text.length() - 1 ) ) + { + firstTime = false; + lineEnd = text.find( '\n', lineStart ); + if ( lineEnd == -1 ) + { + lineEnd = text.length() - 1; + } + line = safeMid( text, lineStart, lineEnd + 1 - lineStart ); + + auto pos = lineHash.find( line ); + if ( pos != lineHash.end() ) + { + chars += static_cast< char >( ( *pos ).second ); + } + else + { + lineArray.emplace_back( line ); + lineHash[ line ] = lineArray.size() - 1; + chars += static_cast< char >( lineArray.size() - 1 ); + } + + lineStart = lineEnd + 1; + } + return chars; +} + +void diff_match_patch::diff_charsToLines( TDiffVector &diffs, const TStringVector &lineArray ) +{ + // Qt has no mutable Qforeach construct. + for ( auto &&diff : diffs ) + { + std::wstring text; + for ( auto &&y : diff.text ) + { + text += lineArray[ y ]; + } + diff.text = text; + } +} + +std::size_t diff_match_patch::diff_commonPrefix( const std::wstring &text1, const std::wstring &text2 ) +{ + // Performance analysis: http://neil.fraser.name/news/2007/10/09/ + const auto n = std::min( text1.length(), text2.length() ); + for ( std::size_t i = 0; i < n; i++ ) + { + if ( text1[ i ] != text2[ i ] ) + { + return i; + } + } + return n; +} + +std::size_t diff_match_patch::diff_commonSuffix( const std::wstring &text1, const std::wstring &text2 ) +{ + // Performance analysis: http://neil.fraser.name/news/2007/10/09/ + const auto text1_length = text1.length(); + const auto text2_length = text2.length(); + const auto n = std::min( text1_length, text2_length ); + for ( std::size_t i = 1; i <= n; i++ ) + { + if ( text1[ text1_length - i ] != text2[ text2_length - i ] ) + { + return i - 1; + } + } + return n; +} + +std::size_t diff_match_patch::diff_commonOverlap( const std::wstring &text1, const std::wstring &text2 ) +{ + // Cache the text lengths to prevent multiple calls. + const auto text1_length = text1.length(); + const auto text2_length = text2.length(); + // Eliminate the nullptr case. + if ( text1_length == 0 || text2_length == 0 ) + { + return 0; + } + // Truncate the longer string. + std::wstring text1_trunc = text1; + std::wstring text2_trunc = text2; + if ( text1_length > text2_length ) + { + text1_trunc = text1.substr( text1_length - text2_length ); + } + else if ( text1_length < text2_length ) + { + text2_trunc = text2.substr( 0, text1_length ); + } + const auto text_length = std::min( text1_length, text2_length ); + // Quick check for the worst case. + if ( text1_trunc == text2_trunc ) + { + return text_length; + } + + // Start by looking for a single character match + // and increase length until no match is found. + // Performance analysis: http://neil.fraser.name/news/2010/11/04/ + std::size_t best = 0; + std::size_t length = 1; + while ( true ) + { + std::wstring pattern = ( length < text1_trunc.length() ) ? text1_trunc.substr( text_length - length ) : std::wstring(); + if ( pattern.empty() ) + return best; + + auto found = text2_trunc.find( pattern ); + if ( found == std::string::npos ) + { + return best; + } + length += found; + if ( found == 0 || text1_trunc.substr( text_length - length ) == text2_trunc.substr( 0, length ) ) + { + best = length; + length++; + } + } +} + +TStringVector diff_match_patch::diff_halfMatch( const std::wstring &text1, const std::wstring &text2 ) +{ + if ( Diff_Timeout <= 0 ) + { + // Don't risk returning a non-optimal diff if we have unlimited time. + return {}; + } + const std::wstring longtext = text1.length() > text2.length() ? text1 : text2; + const std::wstring shorttext = text1.length() > text2.length() ? text2 : text1; + if ( longtext.length() < 4 || shorttext.length() * 2 < longtext.length() ) + { + return {}; // Pointless. + } + + // First check if the second quarter is the seed for a half-match. + const TStringVector hm1 = diff_halfMatchI( longtext, shorttext, ( longtext.length() + 3 ) / 4 ); + // Check again based on the third quarter. + const TStringVector hm2 = diff_halfMatchI( longtext, shorttext, ( longtext.length() + 1 ) / 2 ); + TStringVector hm; + if ( hm1.empty() && hm2.empty() ) + { + return {}; + } + else if ( hm2.empty() ) + { + hm = hm1; + } + else if ( hm1.empty() ) + { + hm = hm2; + } + else + { + // Both matched. Select the longest. + hm = hm1[ 4 ].length() > hm2[ 4 ].length() ? hm1 : hm2; + } + + // A half-match was found, sort out the return data. + if ( text1.length() > text2.length() ) + { + return hm; + } + else + { + TStringVector listRet( { hm[ 2 ], hm[ 3 ], hm[ 0 ], hm[ 1 ], hm[ 4 ] } ); + return listRet; + } +} + +TStringVector diff_match_patch::diff_halfMatchI( const std::wstring &longtext, const std::wstring &shorttext, std::size_t i ) +{ + // Start with a 1/4 length substring at position i as a seed. + const std::wstring seed = safeMid( longtext, i, longtext.length() / 4 ); + std::size_t j = std::string::npos; + std::wstring best_common; + std::wstring best_longtext_a, best_longtext_b; + std::wstring best_shorttext_a, best_shorttext_b; + while ( ( j = shorttext.find( seed, j + 1 ) ) != std::string::npos ) + { + const auto prefixLength = diff_commonPrefix( safeMid( longtext, i ), safeMid( shorttext, j ) ); + const auto suffixLength = diff_commonSuffix( longtext.substr( 0, i ), shorttext.substr( 0, j ) ); + if ( best_common.length() < suffixLength + prefixLength ) + { + best_common = safeMid( shorttext, j - suffixLength, suffixLength ) + safeMid( shorttext, j, prefixLength ); + best_longtext_a = longtext.substr( 0, i - suffixLength ); + best_longtext_b = safeMid( longtext, i + prefixLength ); + best_shorttext_a = shorttext.substr( 0, j - suffixLength ); + best_shorttext_b = safeMid( shorttext, j + prefixLength ); + } + } + if ( best_common.length() * 2 >= longtext.length() ) + { + TStringVector listRet( { best_longtext_a, best_longtext_b, best_shorttext_a, best_shorttext_b, best_common } ); + return listRet; + } + else + { + return {}; + } +} + +void diff_match_patch::diff_cleanupSemantic( TDiffVector &diffs ) +{ + if ( diffs.empty() ) + return; + + bool changes = false; + // Stack of indices where equalities are found. + std::stack< std::size_t > equalities; // stack of equalities + // Always equal to equalities[equalitiesLength-1][1] + std::wstring lastEquality; + std::size_t pointer = 0; // Index of current position. + // Number of characters that changed prior to the equality. + std::size_t length_insertions1 = 0; + std::size_t length_deletions1 = 0; + // Number of characters that changed after the equality. + std::size_t length_insertions2 = 0; + std::size_t length_deletions2 = 0; + while ( pointer < diffs.size() ) + { + if ( diffs[ pointer ].operation == EQUAL ) + { // Equality found. + equalities.push( pointer ); + length_insertions1 = length_insertions2; + length_deletions1 = length_deletions2; + length_insertions2 = 0; + length_deletions2 = 0; + lastEquality = diffs[ pointer ].text; + } + else + { // an insertion or deletion + if ( diffs[ pointer ].operation == INSERT ) + { + length_insertions2 += diffs[ pointer ].text.length(); + } + else + { + length_deletions2 += diffs[ pointer ].text.length(); + } + // Eliminate an equality that is smaller or equal to the edits on both + // sides of it. + if ( !lastEquality.empty() && ( lastEquality.length() <= std::max( length_insertions1, length_deletions1 ) ) && ( lastEquality.length() <= std::max( length_insertions2, length_deletions2 ) ) ) + { + // Duplicate record. + diffs[ equalities.top() ] = Diff( DELETE, lastEquality ); + // Change second copy to insert. + diffs[ equalities.top() + 1 ].operation = INSERT; + // Throw away the equality we just deleted. + equalities.pop(); + if ( !equalities.empty() ) + { + equalities.pop(); + } + pointer = !equalities.empty() ? equalities.top() : -1; + length_insertions1 = 0; // Reset the counters. + length_deletions1 = 0; + length_insertions2 = 0; + length_deletions2 = 0; + lastEquality.clear(); + changes = true; + } + } + pointer++; + } + + // Normalize the diff. + if ( changes ) + { + diff_cleanupMerge( diffs ); + } + diff_cleanupSemanticLossless( diffs ); + + // Find any overlaps between deletions and insertions. + // e.g: abcxxxxxxdef + // -> abcxxxdef + // e.g: xxxabcdefxxx + // -> defxxxabc + // Only extract an overlap if it is as big as the edit ahead or behind it. + pointer = 1; + while ( pointer < diffs.size() ) + { + if ( diffs[ pointer - 1 ].operation == DELETE && diffs[ pointer ].operation == INSERT ) + { + auto deletion = diffs[ pointer - 1 ].text; + auto insertion = diffs[ pointer ].text; + std::size_t overlap_length1 = diff_commonOverlap( deletion, insertion ); + std::size_t overlap_length2 = diff_commonOverlap( insertion, deletion ); + if ( overlap_length1 >= overlap_length2 ) + { + if ( overlap_length1 >= deletion.length() / 2.0 || overlap_length1 >= insertion.length() / 2.0 ) + { + // Overlap found. + // Insert an equality and trim the surrounding edits. + diffs.emplace( diffs.begin() + pointer, EQUAL, insertion.substr( 0, overlap_length1 ) ); + diffs[ pointer - 1 ].text = deletion.substr( 0, deletion.length() - overlap_length1 ); + diffs[ pointer + 1 ].text = insertion.substr( overlap_length1 ); + pointer++; + } + } + else + { + if ( overlap_length2 >= deletion.length() / 2.0 || overlap_length2 >= insertion.length() / 2.0 ) + { + // Reverse overlap found. + // Insert an equality and swap and trim the surrounding edits. + diffs.emplace( diffs.begin() + pointer, EQUAL, deletion.substr( 0, overlap_length2 ) ); + diffs[ pointer - 1 ].operation = INSERT; + diffs[ pointer - 1 ].text = insertion.substr( 0, insertion.length() - overlap_length2 ); + diffs[ pointer + 1 ].operation = DELETE; + diffs[ pointer + 1 ].text = deletion.substr( overlap_length2 ); + pointer++; + } + } + pointer++; + } + pointer++; + } +} + +void diff_match_patch::diff_cleanupSemanticLossless( TDiffVector &diffs ) +{ + int pointer = 1; + // Intentionally ignore the first and last element (don't need checking). + while ( ( pointer != -1 ) && !diffs.empty() && ( pointer < ( diffs.size() - 1 ) ) ) + { + if ( diffs[ pointer - 1 ].operation == EQUAL && diffs[ pointer + 1 ].operation == EQUAL ) + { + // This is a single edit surrounded by equalities. + auto equality1 = diffs[ pointer - 1 ].text; + auto edit = diffs[ pointer ].text; + auto equality2 = diffs[ pointer + 1 ].text; + + // First, shift the edit as far left as possible. + auto commonOffset = diff_commonSuffix( equality1, edit ); + if ( commonOffset > 0 ) + { + auto commonString = safeMid( edit, edit.length() - commonOffset ); + equality1 = equality1.substr( 0, equality1.length() - commonOffset ); + edit = commonString + edit.substr( 0, edit.length() - commonOffset ); + equality2 = commonString + equality2; + } + + // Second, step character by character right, + // looking for the best fit. + auto bestEquality1 = equality1; + auto bestEdit = edit; + auto bestEquality2 = equality2; + auto bestScore = diff_cleanupSemanticScore( equality1, edit ) + diff_cleanupSemanticScore( edit, equality2 ); + while ( !edit.empty() && !equality2.empty() && edit[ 0 ] == equality2[ 0 ] ) + { + equality1 += edit[ 0 ]; + edit = edit.substr( 1 ) + equality2[ 0 ]; + equality2 = equality2.substr( 1 ); + auto score = diff_cleanupSemanticScore( equality1, edit ) + diff_cleanupSemanticScore( edit, equality2 ); + // The >= encourages trailing rather than leading whitespace on + // edits. + if ( score >= bestScore ) + { + bestScore = score; + bestEquality1 = equality1; + bestEdit = edit; + bestEquality2 = equality2; + } + } + + if ( diffs[ pointer - 1 ].text != bestEquality1 ) + { + // We have an improvement, save it back to the diff. + if ( !bestEquality1.empty() ) + { + diffs[ pointer - 1 ].text = bestEquality1; + } + else + { + diffs.erase( diffs.begin() + pointer - 1 ); + pointer--; + } + diffs[ pointer ].text = bestEdit; + if ( !bestEquality2.empty() ) + { + diffs[ pointer + 1 ].text = bestEquality2; + } + else + { + diffs.erase( diffs.begin() + pointer + 1 ); + pointer--; + } + } + } + pointer++; + } +} + +int64_t diff_match_patch::diff_cleanupSemanticScore( const std::wstring &one, const std::wstring &two ) +{ + if ( one.empty() || two.empty() ) + { + // Edges are the best. + return 6; + } + + // Each port of this function behaves slightly differently due to + // subtle differences in each language's definition of things like + // 'whitespace'. Since this function's purpose is largely cosmetic, + // the choice has been made to use each language's native features + // rather than force total conformity. + auto char1 = one[ one.length() - 1 ]; + auto char2 = two[ 0 ]; + bool nonAlphaNumeric1 = !std::iswalnum( char1 ); + bool nonAlphaNumeric2 = !std::iswalnum( char2 ); + bool whitespace1 = nonAlphaNumeric1 && std::iswspace( char1 ); + bool whitespace2 = nonAlphaNumeric2 && std::iswspace( char2 ); + bool lineBreak1 = whitespace1 && std::iswcntrl( char1 ); + bool lineBreak2 = whitespace2 && std::iswcntrl( char2 ); + bool blankLine1 = lineBreak1 && std::regex_search( one, BLANKLINEEND ); + bool blankLine2 = lineBreak2 && std::regex_search( two, BLANKLINESTART ); + + if ( blankLine1 || blankLine2 ) + { + // Five points for blank lines. + return 5; + } + else if ( lineBreak1 || lineBreak2 ) + { + // Four points for line breaks. + return 4; + } + else if ( nonAlphaNumeric1 && !whitespace1 && whitespace2 ) + { + // Three points for end of sentences. + return 3; + } + else if ( whitespace1 || whitespace2 ) + { + // Two points for whitespace. + return 2; + } + else if ( nonAlphaNumeric1 || nonAlphaNumeric2 ) + { + // One point for non-alphanumeric. + return 1; + } + return 0; +} + +// Define some regex patterns for matching boundaries. +std::wregex diff_match_patch::BLANKLINEEND = std::wregex( LR"(\n\r?\n$)" ); +std::wregex diff_match_patch::BLANKLINESTART = std::wregex( LR"(^\r?\n\r?\n)" ); + +void diff_match_patch::diff_cleanupEfficiency( TDiffVector &diffs ) +{ + bool changes = false; + // Stack of indices where equalities are found. + std::stack< std::size_t > equalities; + // Always equal to equalities[equalitiesLength-1][1] + std::wstring lastEquality; + std::size_t pointer = 0; // Index of current position. + // Is there an insertion operation before the last equality. + bool pre_ins = false; + // Is there a deletion operation before the last equality. + bool pre_del = false; + // Is there an insertion operation after the last equality. + bool post_ins = false; + // Is there a deletion operation after the last equality. + bool post_del = false; + while ( pointer < diffs.size() ) + { + if ( diffs[ pointer ].operation == EQUAL ) + { // Equality found. + if ( diffs[ pointer ].text.length() < Diff_EditCost && ( post_ins || post_del ) ) + { + // Candidate found. + equalities.push( pointer ); + pre_ins = post_ins; + pre_del = post_del; + lastEquality = diffs[ pointer ].text; + } + else + { + // Not a candidate, and can never become one. + equalities = {}; + lastEquality.clear(); + } + post_ins = post_del = false; + } + else + { // An insertion or deletion. + if ( diffs[ pointer ].operation == DELETE ) + { + post_del = true; + } + else + { + post_ins = true; + } + /* + * Five types to be split: + * ABXYCD + * AXCD + * ABXC + * AXCD + * ABXC + */ + if ( ( lastEquality.length() != 0 ) && ( ( pre_ins && pre_del && post_ins && post_del ) || ( ( lastEquality.length() < Diff_EditCost / 2 ) && ( ( pre_ins ? 1 : 0 ) + ( pre_del ? 1 : 0 ) + ( post_ins ? 1 : 0 ) + ( post_del ? 1 : 0 ) ) == 3 ) ) ) + { + // Duplicate record. + diffs.emplace( diffs.begin() + equalities.top(), DELETE, lastEquality ); + // Change second copy to insert. + diffs[ equalities.top() + 1 ].operation = INSERT; + equalities.pop(); // Throw away the equality we just deleted. + lastEquality.clear(); + if ( pre_ins && pre_del ) + { + // No changes made which could affect previous entry, keep going. + post_ins = post_del = true; + equalities = {}; + } + else + { + if ( !equalities.empty() ) + { + equalities.pop(); + } + + pointer = !equalities.empty() ? equalities.top() : -1; + post_ins = post_del = false; + } + changes = true; + } + } + pointer++; + } + + if ( changes ) + { + diff_cleanupMerge( diffs ); + } +} + +void diff_match_patch::diff_cleanupMerge( TDiffVector &diffs ) +{ + diffs.emplace_back( EQUAL, L"" ); + int pointer = 0; + int count_delete = 0; + int count_insert = 0; + std::wstring text_delete; + std::wstring text_insert; + + while ( pointer < diffs.size() ) + { + switch ( diffs[ pointer ].operation ) + { + case INSERT: + count_insert++; + text_insert += diffs[ pointer ].text; + pointer++; + break; + case DELETE: + count_delete++; + text_delete += diffs[ pointer ].text; + pointer++; + break; + case EQUAL: + // Upon reaching an equality, check for prior redundancies. + if ( count_delete + count_insert > 1 ) + { + if ( count_delete != 0 && count_insert != 0 ) + { + // Factor out any common prefixies. + auto commonlength = diff_commonPrefix( text_insert, text_delete ); + if ( commonlength != 0 ) + { + if ( ( pointer > ( count_delete + count_insert ) ) && diffs[ pointer - ( count_delete + count_insert ) - 1 ].operation == EQUAL ) + { + diffs[ pointer - count_delete - count_insert - 1 ].text += text_insert.substr( 0, commonlength ); + } + else + { + diffs.emplace( diffs.begin(), EQUAL, text_insert.substr( 0, commonlength ) ); + pointer++; + } + text_insert = text_insert.substr( commonlength ); + text_delete = text_delete.substr( commonlength ); + } + // Factor out any common suffixies. + commonlength = diff_commonSuffix( text_insert, text_delete ); + if ( commonlength != 0 ) + { + diffs[ pointer ].text = safeMid( text_insert, text_insert.length() - commonlength ) + diffs[ pointer ].text; + text_insert = text_insert.substr( 0, text_insert.length() - commonlength ); + text_delete = text_delete.substr( 0, text_delete.length() - commonlength ); + } + } + // Delete the offending records and add the merged ones. + pointer -= count_delete + count_insert; + Splice( diffs, pointer, count_delete + count_insert ); + if ( !text_delete.empty() ) + { + Splice( diffs, pointer, 0, { Diff( DELETE, text_delete ) } ); + pointer++; + } + if ( !text_insert.empty() ) + { + Splice( diffs, pointer, 0, { Diff( INSERT, text_insert ) } ); + pointer++; + } + pointer++; + } + else if ( pointer != 0 && diffs[ pointer - 1 ].operation == EQUAL ) + { + // Merge this equality with the previous one. + diffs[ pointer - 1 ].text += diffs[ pointer ].text; + diffs.erase( diffs.begin() + pointer ); + } + else + { + pointer++; + } + count_insert = 0; + count_delete = 0; + text_delete.clear(); + text_insert.clear(); + break; + } + } + if ( diffs.back().text.empty() ) + { + diffs.pop_back(); // Remove the dummy entry at the end. + } + + // Second pass: look for single edits surrounded on both sides by + // equalities which can be shifted sideways to eliminate an equality. + // e.g: ABAC -> ABAC + bool changes = false; + pointer = 1; + // Intentionally ignore the first and last element (don't need checking). + while ( !diffs.empty() && pointer < ( diffs.size() - 1 ) ) + { + if ( diffs[ pointer - 1 ].operation == EQUAL && diffs[ pointer + 1 ].operation == EQUAL ) + { + // This is a single edit surrounded by equalities. + if ( endsWith( diffs[ pointer ].text, diffs[ pointer - 1 ].text ) ) + { + // Shift the edit over the previous equality. + diffs[ pointer ].text = diffs[ pointer - 1 ].text + diffs[ pointer ].text.substr( 0, diffs[ pointer ].text.length() - diffs[ pointer - 1 ].text.length() ); + diffs[ pointer + 1 ].text = diffs[ pointer - 1 ].text + diffs[ pointer + 1 ].text; + Splice( diffs, pointer - 1, 1 ); + changes = true; + } + else if ( diffs[ pointer ].text.find( diffs[ pointer + 1 ].text ) == 0 ) + { + // Shift the edit over the next equality. + diffs[ pointer - 1 ].text += diffs[ pointer + 1 ].text; + diffs[ pointer ].text = diffs[ pointer ].text.substr( diffs[ pointer + 1 ].text.length() ) + diffs[ pointer + 1 ].text; + Splice( diffs, pointer + 1, 1 ); + changes = true; + } + } + pointer++; + } + // If shifts were made, the diff needs reordering and another shift sweep. + if ( changes ) + { + diff_cleanupMerge( diffs ); + } +} +std::size_t diff_match_patch::diff_xIndex( const TDiffVector &diffs, std::size_t loc ) +{ + std::size_t chars1 = 0; + std::size_t chars2 = 0; + std::size_t last_chars1 = 0; + std::size_t last_chars2 = 0; + Diff lastDiff; + for ( auto &&aDiff : diffs ) + { + if ( aDiff.operation != INSERT ) + { + // Equality or deletion. + chars1 += aDiff.text.length(); + } + if ( aDiff.operation != DELETE ) + { + // Equality or insertion. + chars2 += aDiff.text.length(); + } + if ( chars1 > loc ) + { + // Overshot the location. + lastDiff = aDiff; + break; + } + last_chars1 = chars1; + last_chars2 = chars2; + } + if ( lastDiff.operation == DELETE ) + { + // The location was deleted. + return last_chars2; + } + // Add the remaining character length. + return last_chars2 + ( loc - last_chars1 ); +} + +std::wstring diff_match_patch::diff_prettyHtml( const TDiffVector &diffs ) +{ + std::wstring html; + std::wstring text; + for ( auto &&aDiff : diffs ) + { + text = aDiff.text; + replace( text, L"&", L"&" ); + replace( text, L"<", L"<" ); + replace( text, L">", L">" ); + replace( text, L"\n", L"¶
" ); + switch ( aDiff.operation ) + { + case INSERT: + html += std::wstring( L"" ) + text + std::wstring( L"" ); + break; + case DELETE: + html += std::wstring( L"" ) + text + std::wstring( L"" ); + break; + case EQUAL: + html += std::wstring( L"" ) + text + std::wstring( L"" ); + break; + } + } + return html; +} + +std::wstring diff_match_patch::diff_text1( const TDiffVector &diffs ) +{ + std::wstring text; + for ( auto &&aDiff : diffs ) + { + if ( aDiff.operation != INSERT ) + { + text += aDiff.text; + } + } + return text; +} + +std::wstring diff_match_patch::diff_text2( const TDiffVector &diffs ) +{ + std::wstring text; + for ( auto &&aDiff : diffs ) + { + if ( aDiff.operation != DELETE ) + { + text += aDiff.text; + } + } + return text; +} + +std::size_t diff_match_patch::diff_levenshtein( const TDiffVector &diffs ) +{ + std::size_t levenshtein = 0; + std::size_t insertions = 0; + std::size_t deletions = 0; + for ( auto &&aDiff : diffs ) + { + switch ( aDiff.operation ) + { + case INSERT: + insertions += aDiff.text.length(); + break; + case DELETE: + deletions += aDiff.text.length(); + break; + case EQUAL: + // A deletion and an insertion is one substitution. + levenshtein += std::max( insertions, deletions ); + insertions = 0; + deletions = 0; + break; + } + } + levenshtein += std::max( insertions, deletions ); + return levenshtein; +} + +std::wstring diff_match_patch::diff_toDelta( const TDiffVector &diffs ) +{ + std::wstring text; + for ( auto &&aDiff : diffs ) + { + switch ( aDiff.operation ) + { + case INSERT: + text += L"+" + toPercentEncoding( aDiff.text, L" !~*'();/?:@&=+$,#" ) + L"\t"; + break; + case DELETE: + text += L"-" + std::to_wstring( aDiff.text.length() ) + L"\t"; + break; + case EQUAL: + text += L"=" + std::to_wstring( aDiff.text.length() ) + L"\t"; + break; + } + } + if ( !text.empty() ) + { + // Strip off trailing tab character. + text = text.substr( 0, text.length() - 1 ); + } + return text; +} + +TDiffVector diff_match_patch::diff_fromDelta( const std::wstring &text1, const std::wstring &delta ) +{ + TDiffVector diffs; + std::size_t pointer = 0; // Cursor in text1 + TStringVector tokens = splitString( delta, L"\t", false ); + for ( auto &&token : tokens ) + { + if ( token.empty() ) + { + // Blank tokens are ok (from a trailing \t). + continue; + } + // Each token begins with a one character parameter which specifies the + // operation of this token (delete, insert, equality). + std::wstring param = safeMid( token, 1 ); + switch ( token[ 0 ] ) + { + case '+': + replace( param, L"+", L"%2b" ); + param = fromPercentEncoding( param ); + diffs.emplace_back( INSERT, param ); + break; + case '-': + // Fall through. + case '=': + { + auto n = toInt( param ); + if ( n < 0 ) + { + throw std::wstring( L"Negative number in diff_fromDelta: " + param ); + } + std::wstring text; + if ( ( pointer + n ) > text1.length() ) + { + throw std::wstring( L"Delta length (" + std::to_wstring( pointer + n ) + L") larger than source text length (" + std::to_wstring( text1.length() ) + L")." ); + } + + text = safeMid( text1, pointer, n ); + pointer += n; + if ( token[ 0 ] == L'=' ) + { + diffs.emplace_back( EQUAL, text ); + } + else + { + diffs.emplace_back( DELETE, text ); + } + break; + } + default: + throw std::wstring( L"Invalid diff operation in diff_fromDelta: " + token[ 0 ] ); + } + } + if ( pointer != text1.length() ) + { + throw std::wstring( L"Delta length (" ) + std::to_wstring( pointer ) + L") smaller than source text length (" + std::to_wstring( text1.length() ) + L")"; + } + return diffs; +} + +// MATCH FUNCTIONS + +std::size_t diff_match_patch::match_main( const std::wstring &text, const std::wstring &pattern, std::size_t loc ) +{ + // Check for null inputs not needed since null can't be passed via std::wstring + + loc = std::max( 0ULL, std::min( loc, text.length() ) ); + if ( text == pattern ) + { + // Shortcut (potentially not guaranteed by the algorithm) + return 0; + } + else if ( text.empty() ) + { + // Nothing to match. + return -1; + } + else if ( loc + pattern.length() <= text.length() && safeMid( text, loc, pattern.length() ) == pattern ) + { + // Perfect match at the perfect spot! (Includes case of nullptr pattern) + return loc; + } + else + { + // Do a fuzzy compare. + return match_bitap( text, pattern, loc ); + } +} + +std::size_t diff_match_patch::match_bitap( const std::wstring &text, const std::wstring &pattern, std::size_t loc ) +{ + if ( !( Match_MaxBits == 0 || pattern.length() <= Match_MaxBits ) ) + { + throw "Pattern too long for this application."; + } + + // Initialise the alphabet. + auto &&s = match_alphabet( pattern ); + + // Highest score beyond which we give up. + double score_threshold = Match_Threshold; + // Is there a nearby exact match? (speedup) + auto best_loc = text.find( pattern, loc ); + if ( best_loc != std::string::npos ) + { + score_threshold = std::min( match_bitapScore( 0, best_loc, loc, pattern ), score_threshold ); + // What about in the other direction? (speedup) + auto start = std::min( loc + pattern.length(), text.length() ); + best_loc = text.rfind( pattern, start ); + if ( best_loc != std::string::npos ) + { + score_threshold = std::min( match_bitapScore( 0, best_loc, loc, pattern ), score_threshold ); + } + } + + // Initialise the bit arrays. + auto matchmask = 1 << ( pattern.length() - 1 ); + best_loc = std::string::npos; + + std::size_t bin_min, bin_mid; + auto bin_max = pattern.length() + text.length(); + std::vector< int64_t > rd; + std::vector< int64_t > last_rd; + for ( int d = 0; d < pattern.length(); d++ ) + { + // Scan for the best match; each iteration allows for one more error. + // Run a binary search to determine how far from 'loc' we can stray at + // this error level. + bin_min = 0; + bin_mid = bin_max; + while ( bin_min < bin_mid ) + { + if ( match_bitapScore( d, loc + bin_mid, loc, pattern ) <= score_threshold ) + { + bin_min = bin_mid; + } + else + { + bin_max = bin_mid; + } + bin_mid = ( bin_max - bin_min ) / 2 + bin_min; + } + // Use the result from this iteration as the maximum for the next. + bin_max = bin_mid; + auto start = std::max( 1ULL, ( loc > bin_mid ) ? ( loc - bin_mid + 1 ) : 0 ); + auto finish = std::min( loc + bin_mid, text.length() ) + pattern.length(); + + rd = std::vector< int64_t >( finish + 2, 0 ); + rd[ finish + 1 ] = ( 1 << d ) - 1; + for ( auto j = finish; ( j != -1 ) && ( j >= start ); j-- ) + { + int64_t charMatch; + if ( text.length() <= j - 1 ) + { + // Out of range. + charMatch = 0; + } + else + { + auto pos = s.find( text[ j - 1 ] ); + if ( pos == s.end() ) + charMatch = 0; + else + charMatch = ( *pos ).second; + } + if ( d == 0 ) + { + // First pass: exact match. + rd[ j ] = ( ( rd[ j + 1 ] << 1 ) | 1 ) & charMatch; + } + else + { + // Subsequent passes: fuzzy match. + rd[ j ] = ( ( rd[ j + 1 ] << 1 ) | 1 ) & charMatch | ( ( ( last_rd[ j + 1 ] | last_rd[ j ] ) << 1 ) | 1 ) | last_rd[ j + 1 ]; + } + if ( ( rd[ j ] & matchmask ) != 0 ) + { + double score = match_bitapScore( d, j - 1, loc, pattern ); + // This match will almost certainly be better than any existing + // match. But check anyway. + if ( score <= score_threshold ) + { + // Told you so. + score_threshold = score; + best_loc = j - 1; + if ( best_loc > loc ) + { + // When passing loc, don't exceed our current distance from loc. + start = std::max( 1ULL, ( 2 * loc > best_loc ) ? 2 * loc - best_loc : 1 ); + } + else + { + // Already passed loc, downhill from here on in. + break; + } + } + } + } + if ( match_bitapScore( d + 1, loc, loc, pattern ) > score_threshold ) + { + // No hope for a (better) match at greater error levels. + break; + } + last_rd = std::move( rd ); + } + return best_loc; +} + +double diff_match_patch::match_bitapScore( int64_t e, int64_t x, int64_t loc, const std::wstring &pattern ) +{ + const float accuracy = static_cast< float >( e ) / pattern.length(); + const auto proximity = std::abs( loc - x ); + if ( Match_Distance == 0 ) + { + // Dodge divide by zero error. + return proximity == 0 ? accuracy : 1.0; + } + return accuracy + ( proximity / static_cast< float >( Match_Distance ) ); +} + +TCharPosMap diff_match_patch::match_alphabet( const std::wstring &pattern ) +{ + TCharPosMap s; + std::size_t i; + for ( i = 0; i < pattern.length(); i++ ) + { + auto c = pattern[ i ]; + s[ c ] = 0; + } + for ( i = 0; i < pattern.length(); i++ ) + { + auto c = pattern[ i ]; + auto pos = s.find( c ); + std::size_t prev = 0; + if ( pos != s.end() ) + prev = ( *pos ).second; + s[ c ] = prev | ( 1ULL << ( pattern.length() - i - 1 ) ); + } + return s; +} + +// PATCH FUNCTIONS + +void diff_match_patch::patch_addContext( Patch &patch, const std::wstring &text ) +{ + if ( text.empty() ) + { + return; + } + std::wstring pattern = safeMid( text, patch.start2, patch.length1 ); + std::size_t padding = 0; + + // Look for the first and last matches of pattern in text. If two different + // matches are found, increase the pattern length. + while ( ( text.find( pattern ) != text.rfind( pattern ) ) && ( pattern.length() < ( Match_MaxBits - Patch_Margin - Patch_Margin ) ) ) + { + padding += Patch_Margin; + pattern = safeMid( text, std::max( 0ULL, ( ( patch.start2 > padding ) ? patch.start2 - padding : 0ULL ) ), std::min( text.length(), patch.start2 + patch.length1 + padding ) - std::max( 0ULL, ( patch.start2 > padding ) ? patch.start2 - padding : 0 ) ); + } + // Add one chunk for good luck. + padding += Patch_Margin; + + // Add the prefix. + std::wstring prefix = safeMid( text, std::max( 0ULL, ( ( patch.start2 > padding ) ? patch.start2 - padding : 0ULL ) ), patch.start2 - std::max( 0ULL, ( ( patch.start2 > padding ) ? patch.start2 - padding : 0ULL ) ) ); + if ( !prefix.empty() ) + { + patch.diffs.emplace( patch.diffs.begin(), EQUAL, prefix ); + } + // Add the suffix. + std::wstring suffix = safeMid( text, patch.start2 + patch.length1, std::min( text.length(), patch.start2 + patch.length1 + padding ) - ( patch.start2 + patch.length1 ) ); + if ( !suffix.empty() ) + { + patch.diffs.emplace_back( EQUAL, suffix ); + } + + // Roll back the start points. + patch.start1 -= prefix.length(); + patch.start2 -= prefix.length(); + // Extend the lengths. + patch.length1 += prefix.length() + suffix.length(); + patch.length2 += prefix.length() + suffix.length(); +} + +TPatchVector diff_match_patch::patch_make( const std::wstring &text1, const std::wstring &text2 ) +{ + // Check for null inputs not needed since null can't be passed via std::wstring + + // No diffs provided, compute our own. + TDiffVector diffs = diff_main( text1, text2, true ); + if ( diffs.size() > 2 ) + { + diff_cleanupSemantic( diffs ); + diff_cleanupEfficiency( diffs ); + } + + return patch_make( text1, diffs ); +} + +TPatchVector diff_match_patch::patch_make( const TDiffVector &diffs ) +{ + // No origin string provided, compute our own. + const std::wstring text1 = diff_text1( diffs ); + return patch_make( text1, diffs ); +} + +TPatchVector diff_match_patch::patch_make( const std::wstring &text1, const std::wstring & /*text2*/, const TDiffVector &diffs ) +{ + // text2 is entirely unused. + return patch_make( text1, diffs ); +} + +TPatchVector diff_match_patch::patch_make( const std::wstring &text1, const TDiffVector &diffs ) +{ + // Check for null inputs not needed since null can't be passed via std::wstring + + TPatchVector patches; + if ( diffs.empty() ) + { + return patches; // Get rid of the nullptr case. + } + Patch patch; + std::size_t char_count1 = 0; // Number of characters into the text1 string. + std::size_t char_count2 = 0; // Number of characters into the text2 string. + // Start with text1 (prepatch_text) and apply the diffs until we arrive at + // text2 (postpatch_text). We recreate the patches one by one to determine + // context info. + std::wstring prepatch_text = text1; + std::wstring postpatch_text = text1; + for ( auto &&aDiff : diffs ) + { + if ( patch.diffs.empty() && aDiff.operation != EQUAL ) + { + // A new patch starts here. + patch.start1 = char_count1; + patch.start2 = char_count2; + } + + switch ( aDiff.operation ) + { + case INSERT: + patch.diffs.emplace_back( aDiff ); + patch.length2 += aDiff.text.length(); + postpatch_text = postpatch_text.substr( 0, char_count2 ) + aDiff.text + safeMid( postpatch_text, char_count2 ); + break; + case DELETE: + patch.length1 += aDiff.text.length(); + patch.diffs.emplace_back( aDiff ); + postpatch_text = postpatch_text.substr( 0, char_count2 ) + safeMid( postpatch_text, char_count2 + aDiff.text.length() ); + break; + case EQUAL: + if ( aDiff.text.length() <= 2 * Patch_Margin && !patch.diffs.empty() && !( aDiff == diffs.back() ) ) + { + // Small equality inside a patch. + patch.diffs.emplace_back( aDiff ); + patch.length1 += aDiff.text.length(); + patch.length2 += aDiff.text.length(); + } + + if ( aDiff.text.length() >= 2 * Patch_Margin ) + { + // Time for a new patch. + if ( !patch.diffs.empty() ) + { + patch_addContext( patch, prepatch_text ); + patches.emplace_back( patch ); + patch = Patch(); + // Unlike Unidiff, our patch lists have a rolling context. + // http://code.google.com/p/google-diff-match-patch/wiki/Unidiff + // Update prepatch text & pos to reflect the application of the + // just completed patch. + prepatch_text = postpatch_text; + char_count1 = char_count2; + } + } + break; + } + + // Update the current character count. + if ( aDiff.operation != INSERT ) + { + char_count1 += aDiff.text.length(); + } + if ( aDiff.operation != DELETE ) + { + char_count2 += aDiff.text.length(); + } + } + // Pick up the leftover patch if not empty. + if ( !patch.diffs.empty() ) + { + patch_addContext( patch, prepatch_text ); + patches.emplace_back( patch ); + } + + return patches; +} + +TPatchVector diff_match_patch::patch_deepCopy( const TPatchVector &patches ) +{ + TPatchVector patchesCopy; + for ( auto &&aPatch : patches ) + { + Patch patchCopy = Patch(); + for ( auto &&aDiff : aPatch.diffs ) + { + patchCopy.diffs.emplace_back( aDiff.operation, aDiff.text ); + } + patchCopy.start1 = aPatch.start1; + patchCopy.start2 = aPatch.start2; + patchCopy.length1 = aPatch.length1; + patchCopy.length2 = aPatch.length2; + patchesCopy.emplace_back( patchCopy ); + } + return patchesCopy; +} + +std::pair< std::wstring, std::vector< bool > > diff_match_patch::patch_apply( TPatchVector patches, std::wstring text ) +{ + if ( patches.empty() ) + { + return { text, std::vector< bool >( 0 ) }; + } + + // Deep copy the patches so that no changes are made to originals. + patches = patch_deepCopy( patches ); + + std::wstring nullPadding = patch_addPadding( patches ); + text = nullPadding + text + nullPadding; + patch_splitMax( patches ); + + std::size_t x = 0; + // delta keeps track of the offset between the expected and actual location + // of the previous patch. If there are patches expected at positions 10 and + // 20, but the first patch was found at 12, delta is 2 and the second patch + // has an effective expected position of 22. + uint64_t delta = 0; + std::vector< bool > results( patches.size() ); + for ( auto &&aPatch : patches ) + { + auto expected_loc = aPatch.start2 + delta; + std::wstring text1 = diff_text1( aPatch.diffs ); + std::size_t start_loc; + std::size_t end_loc = std::string::npos; + if ( text1.length() > Match_MaxBits ) + { + // patch_splitMax will only provide an oversized pattern in the case of + // a monster delete. + start_loc = match_main( text, text1.substr( 0, Match_MaxBits ), expected_loc ); + if ( start_loc != -1 ) + { + end_loc = match_main( text, text1.substr( text1.length() - Match_MaxBits ), expected_loc + text1.length() - Match_MaxBits ); + if ( end_loc == -1 || start_loc >= end_loc ) + { + // Can't find valid trailing context. Drop this patch. + start_loc = -1; + } + } + } + else + { + start_loc = match_main( text, text1, expected_loc ); + } + if ( start_loc == -1 ) + { + // No match found. :( + results[ x ] = false; + // Subtract the delta for this failed patch from subsequent patches. + delta -= aPatch.length2 - aPatch.length1; + } + else + { + // Found a match. :) + results[ x ] = true; + delta = start_loc - expected_loc; + std::wstring text2; + if ( end_loc == -1 ) + { + text2 = safeMid( text, start_loc, text1.length() ); + } + else + { + text2 = safeMid( text, start_loc, end_loc + Match_MaxBits - start_loc ); + } + if ( text1 == text2 ) + { + // Perfect match, just shove the replacement text in. + text = text.substr( 0, start_loc ) + diff_text2( aPatch.diffs ) + safeMid( text, start_loc + text1.length() ); + } + else + { + // Imperfect match. Run a diff to get a framework of equivalent + // indices. + TDiffVector diffs = diff_main( text1, text2, false ); + if ( text1.length() > Match_MaxBits && diff_levenshtein( diffs ) / static_cast< float >( text1.length() ) > Patch_DeleteThreshold ) + { + // The end points match, but the content is unacceptably bad. + results[ x ] = false; + } + else + { + diff_cleanupSemanticLossless( diffs ); + std::size_t index1 = 0; + for ( auto &&aDiff : aPatch.diffs ) + { + if ( aDiff.operation != EQUAL ) + { + auto index2 = diff_xIndex( diffs, index1 ); + if ( aDiff.operation == INSERT ) + { + // Insertion + text = text.substr( 0, start_loc + index2 ) + aDiff.text + safeMid( text, start_loc + index2 ); + } + else if ( aDiff.operation == DELETE ) + { + // Deletion + text = text.substr( 0, start_loc + index2 ) + safeMid( text, start_loc + diff_xIndex( diffs, index1 + aDiff.text.length() ) ); + } + } + if ( aDiff.operation != DELETE ) + { + index1 += aDiff.text.length(); + } + } + } + } + } + x++; + } + // Strip the padding off. + text = safeMid( text, nullPadding.length(), text.length() - 2 * nullPadding.length() ); + return { text, results }; +} +std::wstring diff_match_patch::patch_addPadding( TPatchVector &patches ) +{ + auto paddingLength = Patch_Margin; + std::wstring nullPadding; + for ( char x = 1; x <= paddingLength; x++ ) + { + nullPadding += to_wstring( x ); + } + + // Bump all the patches forward. + for( auto && aPatch : patches ) + { + aPatch.start1 += paddingLength; + aPatch.start2 += paddingLength; + } + + // Add some padding on start of first diff. + //auto && patch = patches.front(); + //TDiffVector & diffs = patch.diffs; + if ( patches.front().diffs.empty() || patches.front().diffs.front().operation != EQUAL ) + { + // Add nullPadding equality. + patches.front().diffs.emplace( patches.front().diffs.begin(), EQUAL, nullPadding ); + patches.front().start1 -= paddingLength; // Should be 0. + patches.front().start2 -= paddingLength; // Should be 0. + patches.front().length1 += paddingLength; + patches.front().length2 += paddingLength; + } + else if ( paddingLength > patches.front().diffs.front().text.length() ) + { + // Grow first equality. + auto &&firstDiff = patches.front().diffs.front(); + auto extraLength = paddingLength - firstDiff.text.length(); + firstDiff.text = nullPadding.substr( firstDiff.text.length() ) + firstDiff.text; + patches.front().start1 -= extraLength; + patches.front().start2 -= extraLength; + patches.front().length1 += extraLength; + patches.front().length2 += extraLength; + } + + // Add some padding on end of last diff. + //patch = patches.back(); + //diffs = patch.diffs; + if ( ( patches.back().diffs.size() == 0 ) || patches.back().diffs.back().operation != EQUAL ) + { + // Add nullPadding equality. + patches.back().diffs.emplace_back( EQUAL, nullPadding ); + patches.back().length1 += paddingLength; + patches.back().length2 += paddingLength; + } + else if ( paddingLength > patches.back().diffs.back().text.length() ) + { + // Grow last equality. + //Diff &lastDiff = patches.back().diffs.back(); + auto extraLength = paddingLength - patches.back().diffs.back().text.length(); + patches.back().diffs.back().text += nullPadding.substr( 0, extraLength ); + patches.back().length1 += extraLength; + patches.back().length2 += extraLength; + } + + return nullPadding; +} + +#ifdef NO +std::wstring diff_match_patch::patch_addPadding( TPatchVector &patches ) +{ + short paddingLength = Patch_Margin; + std::wstring nullPadding; + for ( short x = 1; x <= paddingLength; x++ ) + { + nullPadding += static_cast< wchar_t >( x ); + } + + // Bump all the patches forward. + auto pointer = patches.begin(); + while ( pointer != patches.end() ) + { + auto &&aPatch = *pointer; + aPatch.start1 += paddingLength; + aPatch.start2 += paddingLength; + } + + // Add some padding on start of first diff. + auto &&firstPatch = patches.front(); + TDiffVector &firstPatchDiffs = firstPatch.diffs; + if ( firstPatchDiffs.empty() || firstPatchDiffs.front().operation != EQUAL ) + { + // Add nullPadding equality. + firstPatchDiffs.emplace( firstPatchDiffs.begin(), EQUAL, nullPadding ); + firstPatch.start1 -= paddingLength; // Should be 0. + firstPatch.start2 -= paddingLength; // Should be 0. + firstPatch.length1 += paddingLength; + firstPatch.length2 += paddingLength; + } + else if ( paddingLength > firstPatchDiffs.front().text.length() ) + { + // Grow first equality. + Diff &firstDiff = firstPatchDiffs.front(); + auto extraLength = paddingLength - firstDiff.text.length(); + firstDiff.text = safeMid( nullPadding, firstDiff.text.length(), paddingLength - firstDiff.text.length() ) + firstDiff.text; + firstPatch.start1 -= extraLength; + firstPatch.start2 -= extraLength; + firstPatch.length1 += extraLength; + firstPatch.length2 += extraLength; + } + + // Add some padding on end of last diff. + Patch &lastPatch = patches.front(); + TDiffVector &lastPatchDiffs = lastPatch.diffs; + if ( lastPatchDiffs.empty() || lastPatchDiffs.back().operation != EQUAL ) + { + // Add nullPadding equality. + lastPatchDiffs.emplace_back( EQUAL, nullPadding ); + lastPatch.length1 += paddingLength; + lastPatch.length2 += paddingLength; + } + else if ( paddingLength > lastPatchDiffs.back().text.length() ) + { + // Grow last equality. + Diff &lastDiff = lastPatchDiffs.back(); + auto extraLength = paddingLength - lastDiff.text.length(); + lastDiff.text += nullPadding.substr( 0, extraLength ); + lastPatch.length1 += extraLength; + lastPatch.length2 += extraLength; + } + + return nullPadding; +} +#endif +void diff_match_patch::patch_splitMax( TPatchVector &patches ) +{ + auto patch_size = Match_MaxBits; + for ( int x = 0; x < patches.size(); x++ ) + { + if ( patches[ x ].length1 <= patch_size ) + { + continue; + } + Patch bigpatch = patches[ x ]; + // Remove the big old patch. + Splice( patches, x--, 1 ); + auto start1 = bigpatch.start1; + auto start2 = bigpatch.start2; + std::wstring precontext; + while ( !bigpatch.diffs.empty() ) + { + // Create one of several smaller patches. + Patch patch; + bool empty = true; + patch.start1 = start1 - precontext.length(); + patch.start2 = start2 - precontext.length(); + if ( precontext.length() != 0 ) + { + patch.length1 = patch.length2 = precontext.length(); + patch.diffs.emplace_back( EQUAL, precontext ); + } + while ( !bigpatch.diffs.empty() && ( patch.length1 < ( patch_size - Patch_Margin ) ) ) + { + auto diff_type = bigpatch.diffs[ 0 ].operation; + auto diff_text = bigpatch.diffs[ 0 ].text; + if ( diff_type == INSERT ) + { + // Insertions are harmless. + patch.length2 += diff_text.length(); + start2 += diff_text.length(); + patch.diffs.push_back( bigpatch.diffs.front() ); + bigpatch.diffs.erase( bigpatch.diffs.begin() ); + empty = false; + } + else if ( ( diff_type == DELETE ) && ( patch.diffs.size() == 1 ) && ( patch.diffs.front().operation == EQUAL ) && ( diff_text.length() > 2 * patch_size ) ) + { + // This is a large deletion. Let it pass in one chunk. + patch.length1 += diff_text.length(); + start1 += diff_text.length(); + empty = false; + patch.diffs.emplace_back( diff_type, diff_text ); + bigpatch.diffs.erase( bigpatch.diffs.begin() ); + } + else + { + // Deletion or equality. Only take as much as we can stomach. + diff_text = diff_text.substr( 0, std::min( diff_text.length(), ( patch_size > ( patch.length1 + Patch_Margin ) ) ? ( patch_size - patch.length1 - Patch_Margin ) : ( -1 * 1ULL ) ) ); + patch.length1 += diff_text.length(); + start1 += diff_text.length(); + if ( diff_type == EQUAL ) + { + patch.length2 += diff_text.length(); + start2 += diff_text.length(); + } + else + { + empty = false; + } + patch.diffs.emplace_back( diff_type, diff_text ); + if ( diff_text == bigpatch.diffs[ 0 ].text ) + { + bigpatch.diffs.erase( bigpatch.diffs.begin() ); + } + else + { + bigpatch.diffs[ 0 ].text = bigpatch.diffs[ 0 ].text.substr( diff_text.length() ); + } + } + } + // Compute the head context for the next patch. + precontext = diff_text2( patch.diffs ); + precontext = precontext.substr( std::max( 0ULL, ( precontext.length() > Patch_Margin ) ? ( precontext.length() - Patch_Margin ) : 0 ) ); + + std::wstring postcontext; + // Append the end context for this patch. + if ( diff_text1( bigpatch.diffs ).length() > Patch_Margin ) + { + postcontext = diff_text1( bigpatch.diffs ).substr( 0, Patch_Margin ); + } + else + { + postcontext = diff_text1( bigpatch.diffs ); + } + + if ( postcontext.length() != 0 ) + { + patch.length1 += postcontext.length(); + patch.length2 += postcontext.length(); + if ( ( patch.diffs.size() != 0 ) && ( patch.diffs[ patch.diffs.size() - 1 ].operation == EQUAL ) ) + { + patch.diffs[ patch.diffs.size() - 1 ].text += postcontext; + } + else + { + patch.diffs.emplace_back( EQUAL, postcontext ); + } + } + if ( !empty ) + { + Splice( patches, ++x, 0ULL, patch ); + } + } + } +} + +#ifdef NO +void diff_match_patch::patch_splitMax( TPatchVector &patches ) +{ + short patch_size = Match_MaxBits; + std::wstring precontext, postcontext; + Patch patch; + std::size_t start1, start2; + bool empty; + Operation diff_type; + std::wstring diff_text; + auto pointer = patches.begin(); + Patch bigpatch; + + if ( pointer != patches.end() ) + { + bigpatch = *pointer; + } + + while ( !bigpatch.isNull() ) + { + if ( bigpatch.length1 <= patch_size ) + { + bigpatch = ( ( ++pointer ) != patches.end() ) ? *pointer : Patch(); + continue; + } + // Remove the big old patch. + pointer = patches.erase( pointer ); + start1 = bigpatch.start1; + start2 = bigpatch.start2; + precontext.clear(); + while ( !bigpatch.diffs.empty() ) + { + // Create one of several smaller patches. + patch = Patch(); + empty = true; + patch.start1 = start1 - precontext.length(); + patch.start2 = start2 - precontext.length(); + if ( !precontext.empty() ) + { + patch.length1 = patch.length2 = precontext.length(); + patch.diffs.emplace_back( EQUAL, precontext ); + } + while ( !bigpatch.diffs.empty() && patch.length1 < patch_size - Patch_Margin ) + { + diff_type = bigpatch.diffs.front().operation; + diff_text = bigpatch.diffs.front().text; + if ( diff_type == INSERT ) + { + // Insertions are harmless. + patch.length2 += diff_text.length(); + start2 += diff_text.length(); + patch.diffs.emplace_back( bigpatch.diffs.front() ); + bigpatch.diffs.erase( bigpatch.diffs.begin() ); + empty = false; + } + else if ( diff_type == DELETE && patch.diffs.size() == 1 && patch.diffs.front().operation == EQUAL && diff_text.length() > 2 * patch_size ) + { + // This is a large deletion. Let it pass in one chunk. + patch.length1 += diff_text.length(); + start1 += diff_text.length(); + empty = false; + patch.diffs.emplace_back( diff_type, diff_text ); + bigpatch.diffs.erase( bigpatch.diffs.begin() ); + } + else + { + // Deletion or equality. Only take as much as we can stomach. + diff_text = diff_text.substr( 0, std::min( diff_text.length(), ( patch_size > ( patch.length1 + Patch_Margin ) ) ? ( patch_size - patch.length1 - Patch_Margin ) : ( -1 * 1ULL ) ) ); + patch.length1 += diff_text.length(); + start1 += diff_text.length(); + if ( diff_type == EQUAL ) + { + patch.length2 += diff_text.length(); + start2 += diff_text.length(); + } + else + { + empty = false; + } + patch.diffs.emplace_back( diff_type, diff_text ); + if ( diff_text == bigpatch.diffs.front().text ) + { + bigpatch.diffs.erase( bigpatch.diffs.begin() ); + } + else + { + bigpatch.diffs.front().text = safeMid( bigpatch.diffs.front().text, diff_text.length() ); + } + } + } + // Compute the head context for the next patch. + precontext = diff_text2( patch.diffs ); + precontext = safeMid( precontext, precontext.length() - Patch_Margin ); + // Append the end context for this patch. + if ( diff_text1( bigpatch.diffs ).length() > Patch_Margin ) + { + postcontext = diff_text1( bigpatch.diffs ).substr( 0, Patch_Margin ); + } + else + { + postcontext = diff_text1( bigpatch.diffs ); + } + if ( !postcontext.empty() ) + { + patch.length1 += postcontext.length(); + patch.length2 += postcontext.length(); + if ( !patch.diffs.empty() && patch.diffs.back().operation == EQUAL ) + { + patch.diffs.back().text += postcontext; + } + else + { + patch.diffs.emplace_back( EQUAL, postcontext ); + } + } + if ( !empty ) + { + patches.emplace( pointer, patch ); + } + } + bigpatch = ( ( ++pointer ) != patches.end() ) ? *pointer : Patch(); + } +} +#endif +std::wstring diff_match_patch::patch_toText( const TPatchVector &patches ) +{ + std::wstring text; + for ( auto &&aPatch : patches ) + { + text += aPatch.toString(); + } + return text; +} + +TPatchVector diff_match_patch::patch_fromText( const std::wstring &textline ) +{ + TPatchVector patches; + if ( textline.empty() ) + { + return patches; + } + auto text = splitString( textline, L"\n", true ); + int textPointer = 0; + std::wstring line; + while ( textPointer < text.size() ) + { + patches.push_back( text[ textPointer ] ); + auto &patch = patches.back(); + textPointer++; + + while ( textPointer < text.size() ) + { + if ( text[ textPointer ].empty() ) + { + ++textPointer; + continue; + } + + auto sign = text[ textPointer ][ 0 ]; + + line = text[ textPointer ].substr( 1 ); + replace( line, L"+", L"%2b" ); + line = fromPercentEncoding( line ); + if ( sign == '-' ) + { + // Deletion. + patch.diffs.emplace_back( DELETE, line ); + } + else if ( sign == '+' ) + { + // Insertion. + patch.diffs.emplace_back( INSERT, line ); + } + else if ( sign == ' ' ) + { + // Minor equality. + patch.diffs.emplace_back( EQUAL, line ); + } + else if ( sign == '@' ) + { + // Start of next patch. + break; + } + else + { + // WTF? + throw std::wstring( std::wstring( L"Invalid patch mode '" ) + sign + std::wstring( L" in: " ) + line ); + return {}; + } + textPointer++; + } + } + return patches; +} + +std::wstring diff_match_patch::safeMid( const std::wstring &str, std::size_t pos ) +{ + return safeMid( str, pos, std::string::npos ); +} + +std::wstring diff_match_patch::safeMid( const std::wstring &str, std::size_t pos, std::size_t len ) +{ + return ( pos == str.length() ) ? std::wstring() : str.substr( pos, len ); +} + +void diff_match_patch::replace( std::wstring &inString, const std::wstring &from, const std::wstring &to ) +{ + std::size_t pos = inString.find( from ); + while ( pos != std::wstring::npos ) + { + inString.replace( pos, from.length(), to ); + pos = inString.find( from, pos + 1 ); + } +} + +wchar_t toHexUpper( wchar_t value ) +{ + return L"0123456789ABCDEF"[ value & 0xF ]; +} + +std::wstring diff_match_patch::toPercentEncoding( wchar_t c, const std::wstring &exclude, const std::wstring &include ) +{ + std::wstring retVal; + + if ( ( ( c >= 0x61 && c <= 0x7A ) // ALPHA + || ( c >= 0x41 && c <= 0x5A ) // ALPHA + || ( c >= 0x30 && c <= 0x39 ) // DIGIT + || c == 0x2D // - + || c == 0x2E // . + || c == 0x5F // _ + || c == 0x7E // ~ + || ( exclude.find( c ) != std::string::npos ) ) + && ( include.find( c ) == std::string::npos ) ) + { + retVal = std::wstring( 1, c ); + } + else + { + retVal = L'%'; + retVal += toHexUpper( ( c & 0xf0 ) >> 4 ); + retVal += toHexUpper( c & 0xf ); + } + return retVal; +} + +std::wstring diff_match_patch::toPercentEncoding( const std::wstring &input, const std::wstring &exclude /*= std::wstring()*/, const std::wstring &include /*= std::wstring() */ ) +{ + if ( input.empty() ) + return {}; + std::wstring retVal; + retVal.reserve( input.length() * 3 ); + + static_assert( sizeof( wchar_t ) <= 4, "wchar_t is greater that 32 bit" ); + + auto sz = sizeof( wchar_t ); + std::wstring_convert< std::codecvt_utf8< wchar_t > > utf8_conv; + for ( auto &&c : input ) + { + auto currStr = std::wstring( 1, c ); + auto asBytes = utf8_conv.to_bytes( currStr ); + for ( auto &&ii : asBytes ) + { + if ( ii ) + retVal += diff_match_patch::toPercentEncoding( ii, exclude, include ); + } + } + return retVal; +} + +wchar_t diff_match_patch::getValue( wchar_t ch ) +{ + if ( ch >= '0' && ch <= '9' ) + ch -= '0'; + else if ( ch >= 'a' && ch <= 'f' ) + ch = ch - 'a' + 10; + else if ( ch >= 'A' && ch <= 'F' ) + ch = ch - 'A' + 10; + else + throw std::wstring( L"Invalid Character %" ) + ch; + + return ch; +} + +std::wstring diff_match_patch::fromPercentEncoding( const std::wstring &input ) +{ + if ( input.empty() ) + return {}; + std::wstring retVal; + retVal.reserve( input.length() ); + for ( auto ii = 0ULL; ii < input.length(); ++ii ) + { + auto c = input[ ii ]; + if ( c == L'%' && ( ii + 2 ) < input.length() ) + { + auto a = input[ ++ii ]; + auto b = input[ ++ii ]; + a = getValue( a ); + b = getValue( b ); + retVal += wchar_t( ( a << 4 ) | b ); + } + else + { + retVal += c; + } + } + return retVal; +} + +bool diff_match_patch::endsWith( const std::wstring &string, const std::wstring &suffix ) +{ + if ( suffix.length() > string.length() ) + return false; + + return string.compare( string.length() - suffix.length(), suffix.length(), suffix ) == 0; +} + +TStringVector diff_match_patch::splitString( const std::wstring &string, const std::wstring &separator, bool skipEmptyParts ) +{ + if ( separator.empty() ) + { + if ( !skipEmptyParts || !string.empty() ) + return { string }; + return {}; + } + + TStringVector strings; + auto prevPos = 0ULL; + auto startPos = string.find_first_of( separator ); + while ( startPos != std::string::npos ) + { + auto start = prevPos ? prevPos + 1 : prevPos; + auto len = prevPos ? ( startPos - prevPos - 1 ) : startPos; + auto curr = string.substr( start, len ); + prevPos = startPos; + if ( !skipEmptyParts || !curr.empty() ) + strings.emplace_back( curr ); + startPos = string.find_first_of( separator, prevPos + 1 ); + } + auto remainder = string.substr( prevPos ? prevPos + 1 : prevPos ); + if ( !skipEmptyParts || !remainder.empty() ) + strings.emplace_back( remainder ); + + return strings; +} + +int64_t diff_match_patch::toInt( const std::wstring &string ) +{ + int64_t retVal = 0; + try + { + std::size_t lastPos{}; + retVal = std::stoul( string, &lastPos ); + if ( lastPos != string.length() ) + return 0; + } + catch ( ... ) + { + } + return retVal; +} diff --git a/cpp17/diff_match_patch.h b/cpp17/diff_match_patch.h new file mode 100644 index 00000000..56218606 --- /dev/null +++ b/cpp17/diff_match_patch.h @@ -0,0 +1,671 @@ +/* +/* + * Diff Match and Patch + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DIFF_MATCH_PATCH_H +#define DIFF_MATCH_PATCH_H + +#include +#include +#include +#include +#include +#include +#include +#include +/* + * Functions for diff, match and patch. + * Computes the difference between two texts to create a patch. + * Applies the patch onto another text, allowing for errors. + * + * @author fraser@google.com (Neil Fraser) + * + * Qt/C++ port by mikeslemmer@gmail.com (Mike Slemmer): + * Qt->C++17 with native STL use only, port by scott@towel42.com (Scott Aron Bloom) + * C++17 was intentionally chosen for variant support + * + * Code known to compile with C++17 + * + * Here is a trivial sample program which works properly when linked with this + * library: + * + + #include "diff_match_patch.h" + int main(int argc, char **argv) { + auto str1 = std::wstring("First string in diff"); + auto str2 = std::wstring("Second string in diff"); + + diff_match_patch dmp; + auto strPatch = dmp.patch_toText(dmp.patch_make(str1, str2)); + auto out = dmp.patch_apply(dmp.patch_fromText(strPatch), str1); + auto strResult = out.first; + + // here, strResult will equal str2 above. + return 0; + } + +*/ + +using TStringVector = std::vector< std::wstring >; +using TVariant = std::variant< std::wstring, TStringVector >; +using TVariantVector = std::vector< TVariant >; +using TCharPosMap = std::map< wchar_t, std::size_t >; + +/**- +* The data structure representing a diff is a Linked list of Diff objects: +* {Diff(Operation.DELETE, "Hello"), Diff(Operation.INSERT, "Goodbye"), +* Diff(Operation.EQUAL, " world.")} +* which means: delete "Hello", add "Goodbye" and keep " world." +*/ +enum Operation +{ + DELETE, + INSERT, + EQUAL +}; + +/** +* Class representing one diff operation. +*/ +class Diff +{ +public: + Operation operation{ DELETE }; + // One of: INSERT, DELETE or EQUAL. + std::wstring text; + // The text associated with this diff operation. + + /** + * Constructor. Initializes the diff with the provided values. + * @param operation One of INSERT, DELETE or EQUAL. + * @param text The text being applied. + */ + Diff( Operation _operation, const std::wstring &_text ); + Diff( Operation _operation, const wchar_t *_text ); + Diff(); + inline bool isNull() const; + std::wstring toString() const; + bool operator==( const Diff &d ) const; + bool operator!=( const Diff &d ) const; + + static std::wstring strOperation( Operation op ); +}; +using TDiffVector = std::vector< Diff >; + +/** +* Class representing one patch operation. +*/ +class Patch +{ +public: + TDiffVector diffs; + std::size_t length1{ 0 }; + std::size_t length2{ 0 }; + std::size_t start1{ 0 }; + std::size_t start2{ 0 }; + + /** + * Constructor. Initializes with an empty list of diffs. + */ + Patch(); + Patch( std::wstring &text ); // modifies text, and removes the text used + bool isNull() const; + std::wstring toString() const; + +private: + std::wstring getPatchHeader() const; + std::wstring getCoordinateString( std::size_t start, std::size_t length ) const; +}; +using TPatchVector = std::vector< Patch >; + +/** + * Class containing the diff, match and patch methods. + * Also contains the behaviour settings. + */ + +class diff_match_patch +{ + friend class diff_match_patch_test; + +public: + // Defaults. + // Set these on your diff_match_patch instance to override the defaults. + + // Number of seconds to map a diff before giving up (0 for infinity). + float Diff_Timeout{ 1.0f }; + // Cost of an empty edit operation in terms of edit characters. + short Diff_EditCost{ 4 }; + // At what point is no match declared (0.0 = perfection, 1.0 = very loose). + float Match_Threshold{ 0.5f }; + // How far to search for a match (0 = exact location, 1000+ = broad match). + // A match this many characters away from the expected location will add + // 1.0 to the score (0.0 is a perfect match). + int64_t Match_Distance{ 1000 }; + // When deleting a large block of text (over ~64 characters), how close does + // the contents have to match the expected contents. (0.0 = perfection, + // 1.0 = very loose). Note that Match_Threshold controls how closely the + // end points of a delete need to match. + float Patch_DeleteThreshold{ 0.5f }; + // Chunk size for context length. + short Patch_Margin{ 4 }; + + short Match_MaxBits{ 32 }; // unit tests are based on 32 bits + +private: + // Define some regex patterns for matching boundaries. + static std::wregex BLANKLINEEND; + static std::wregex BLANKLINESTART; + +public: + diff_match_patch(); + + // DIFF FUNCTIONS + + /** + * Find the differences between two texts. + * Run a faster slightly less optimal diff. + * This method allows the 'checklines' of diff_main() to be optional. + * Most of the time checklines is wanted, so default to true. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @return Linked List of Diff objects. + */ + TDiffVector diff_main( const std::wstring &text1, const std::wstring &text2 ); + + /** + * Find the differences between two texts. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @param checklines Speedup flag. If false, then don't run a + * line-level diff first to identify the changed areas. + * If true, then run a faster slightly less optimal diff. + * @return Linked List of Diff objects. + */ + TDiffVector diff_main( const std::wstring &text1, const std::wstring &text2, bool checklines ); + + /** + * Find the differences between two texts. Simplifies the problem by + * stripping any common prefix or suffix off the texts before diffing. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @param checklines Speedup flag. If false, then don't run a + * line-level diff first to identify the changed areas. + * If true, then run a faster slightly less optimal diff. + * @param deadline Time when the diff should be complete by. Used + * internally for recursive calls. Users should set DiffTimeout instead. + * @return Linked List of Diff objects. + */ +private: + TDiffVector diff_main( const std::wstring &text1, const std::wstring &text2, bool checklines, clock_t deadline ); + + /** + * Find the differences between two texts. Assumes that the texts do not + * have any common prefix or suffix. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @param checklines Speedup flag. If false, then don't run a + * line-level diff first to identify the changed areas. + * If true, then run a faster slightly less optimal diff. + * @param deadline Time when the diff should be complete by. + * @return Linked List of Diff objects. + */ +private: + TDiffVector diff_compute( const std::wstring &text1, const std::wstring &text2, bool checklines, clock_t deadline ); + + /** + * Do a quick line-level diff on both strings, then rediff the parts for + * greater accuracy. + * This speedup can produce non-minimal diffs. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @param deadline Time when the diff should be complete by. + * @return Linked List of Diff objects. + */ +private: + TDiffVector diff_lineMode( std::wstring text1, std::wstring text2, clock_t deadline ); + + /** + * Find the 'middle snake' of a diff, split the problem in two + * and return the recursively constructed diff. + * See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @return Linked List of Diff objects. + */ +protected: + TDiffVector diff_bisect( const std::wstring &text1, const std::wstring &text2, clock_t deadline ); + + /** + * Given the location of the 'middle snake', split the diff in two parts + * and recurse. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @param x Index of split point in text1. + * @param y Index of split point in text2. + * @param deadline Time at which to bail if not yet complete. + * @return LinkedList of Diff objects. + */ +private: + TDiffVector diff_bisectSplit( const std::wstring &text1, const std::wstring &text2, std::size_t x, std::size_t y, clock_t deadline ); + + /** + * Split two texts into a list of strings. Reduce the texts to a string of + * hashes where each Unicode character represents one line. + * @param text1 First string. + * @param text2 Second string. + * @return Three element Object array, containing the encoded text1, the + * encoded text2 and the List of unique strings. The zeroth element + * of the List of unique strings is intentionally blank. + */ +protected: + std::vector< TVariant > diff_linesToChars( const std::wstring &text1, const std::wstring &text2 ); // return elems 0 and 1 are std::wstring, elem 2 is TStringVector + + /** + * Split a text into a list of strings. Reduce the texts to a string of + * hashes where each Unicode character represents one line. + * @param text String to encode. + * @param lineArray List of unique strings. + * @param lineHash Map of strings to indices. + * @return Encoded string. + */ +private: + std::wstring diff_linesToCharsMunge( const std::wstring &text, TStringVector &lineArray, std::unordered_map< std::wstring, std::size_t > &lineHash ); + + /** + * Rehydrate the text in a diff from a string of line hashes to real lines of + * text. + * @param diffs LinkedList of Diff objects. + * @param lineArray List of unique strings. + */ +private: + void diff_charsToLines( TDiffVector &diffs, const TStringVector &lineArray ); + + /** + * Determine the common prefix of two strings. + * @param text1 First string. + * @param text2 Second string. + * @return The number of characters common to the start of each string. + */ +public: + std::size_t diff_commonPrefix( const std::wstring &text1, const std::wstring &text2 ); + + /** + * Determine the common suffix of two strings. + * @param text1 First string. + * @param text2 Second string. + * @return The number of characters common to the end of each string. + */ +public: + std::size_t diff_commonSuffix( const std::wstring &text1, const std::wstring &text2 ); + + /** + * Determine if the suffix of one string is the prefix of another. + * @param text1 First string. + * @param text2 Second string. + * @return The number of characters common to the end of the first + * string and the start of the second string. + */ +protected: + std::size_t diff_commonOverlap( const std::wstring &text1, const std::wstring &text2 ); + + /** + * Do the two texts share a substring which is at least half the length of + * the longer text? + * This speedup can produce non-minimal diffs. + * @param text1 First string. + * @param text2 Second string. + * @return Five element String array, containing the prefix of text1, the + * suffix of text1, the prefix of text2, the suffix of text2 and the + * common middle. Or null if there was no match. + */ +protected: + TStringVector diff_halfMatch( const std::wstring &text1, const std::wstring &text2 ); + + /** + * Does a substring of shorttext exist within longtext such that the + * substring is at least half the length of longtext? + * @param longtext Longer string. + * @param shorttext Shorter string. + * @param i Start index of quarter length substring within longtext. + * @return Five element String array, containing the prefix of longtext, the + * suffix of longtext, the prefix of shorttext, the suffix of shorttext + * and the common middle. Or null if there was no match. + */ +private: + TStringVector diff_halfMatchI( const std::wstring &longtext, const std::wstring &shorttext, std::size_t i ); + + /** + * Reduce the number of edits by eliminating semantically trivial equalities. + * @param diffs LinkedList of Diff objects. + */ +public: + void diff_cleanupSemantic( TDiffVector &diffs ); + + /** + * Look for single edits surrounded on both sides by equalities + * which can be shifted sideways to align the edit to a word boundary. + * e.g: The cat came. -> The cat came. + * @param diffs LinkedList of Diff objects. + */ +public: + void diff_cleanupSemanticLossless( TDiffVector &diffs ); + + /** + * Given two strings, compute a score representing whether the internal + * boundary falls on logical boundaries. + * Scores range from 6 (best) to 0 (worst). + * @param one First string. + * @param two Second string. + * @return The score. + */ +private: + int64_t diff_cleanupSemanticScore( const std::wstring &one, const std::wstring &two ); + + /** + * Reduce the number of edits by eliminating operationally trivial equalities. + * @param diffs LinkedList of Diff objects. + */ +public: + void diff_cleanupEfficiency( TDiffVector &diffs ); + + /** + * Reorder and merge like edit sections. Merge equalities. + * Any edit section can move as long as it doesn't cross an equality. + * @param diffs LinkedList of Diff objects. + */ +public: + void diff_cleanupMerge( TDiffVector &diffs ); + + /** + * loc is a location in text1, compute and return the equivalent location in + * text2. + * e.g. "The cat" vs "The big cat", 1->1, 5->8 + * @param diffs LinkedList of Diff objects. + * @param loc Location within text1. + * @return Location within text2. + */ +public: + std::size_t diff_xIndex( const TDiffVector &diffs, std::size_t loc ); + + /** + * Convert a Diff list into a pretty HTML report. + * @param diffs LinkedList of Diff objects. + * @return HTML representation. + */ +public: + std::wstring diff_prettyHtml( const TDiffVector &diffs ); + + /** + * Compute and return the source text (all equalities and deletions). + * @param diffs LinkedList of Diff objects. + * @return Source text. + */ +public: + std::wstring diff_text1( const TDiffVector &diffs ); + + /** + * Compute and return the destination text (all equalities and insertions). + * @param diffs LinkedList of Diff objects. + * @return Destination text. + */ +public: + std::wstring diff_text2( const TDiffVector &diffs ); + + /** + * Compute the Levenshtein distance; the number of inserted, deleted or + * substituted characters. + * @param diffs LinkedList of Diff objects. + * @return Number of changes. + */ +public: + std::size_t diff_levenshtein( const TDiffVector &diffs ); + + /** + * Crush the diff into an encoded string which describes the operations + * required to transform text1 into text2. + * E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'. + * Operations are tab-separated. Inserted text is escaped using %xx notation. + * @param diffs Array of diff tuples. + * @return Delta text. + */ +public: + std::wstring diff_toDelta( const TDiffVector &diffs ); + + /** + * Given the original text1, and an encoded string which describes the + * operations required to transform text1 into text2, compute the full diff. + * @param text1 Source string for the diff. + * @param delta Delta text. + * @return Array of diff tuples or null if invalid. + * @throws std::wstring If invalid input. + */ +public: + TDiffVector diff_fromDelta( const std::wstring &text1, const std::wstring &delta ); + + // MATCH FUNCTIONS + + /** + * Locate the best instance of 'pattern' in 'text' near 'loc'. + * Returns -1 if no match found. + * @param text The text to search. + * @param pattern The pattern to search for. + * @param loc The location to search around. + * @return Best match index or -1. + */ +public: + std::size_t match_main( const std::wstring &text, const std::wstring &pattern, std::size_t loc ); + + /** + * Locate the best instance of 'pattern' in 'text' near 'loc' using the + * Bitap algorithm. Returns -1 if no match found. + * @param text The text to search. + * @param pattern The pattern to search for. + * @param loc The location to search around. + * @return Best match index or -1. + */ +protected: + std::size_t match_bitap( const std::wstring &text, const std::wstring &pattern, std::size_t loc ); + + /** + * Compute and return the score for a match with e errors and x location. + * @param e Number of errors in match. + * @param x Location of match. + * @param loc Expected location of match. + * @param pattern Pattern being sought. + * @return Overall score for match (0.0 = good, 1.0 = bad). + */ +private: + double match_bitapScore( int64_t e, int64_t x, int64_t loc, const std::wstring &pattern ); + + /** + * Initialise the alphabet for the Bitap algorithm. + * @param pattern The text to encode. + * @return Hash of character locations. + */ +protected: + TCharPosMap match_alphabet( const std::wstring &pattern ); + + // PATCH FUNCTIONS + + /** + * Increase the context until it is unique, + * but don't let the pattern expand beyond Match_MaxBits. + * @param patch The patch to grow. + * @param text Source text. + */ +protected: + void patch_addContext( Patch &patch, const std::wstring &text ); + + /** + * Compute a list of patches to turn text1 into text2. + * A set of diffs will be computed. + * @param text1 Old text. + * @param text2 New text. + * @return LinkedList of Patch objects. + */ +public: + TPatchVector patch_make( const std::wstring &text1, const std::wstring &text2 ); + + /** + * Compute a list of patches to turn text1 into text2. + * text1 will be derived from the provided diffs. + * @param diffs Array of diff tuples for text1 to text2. + * @return LinkedList of Patch objects. + */ +public: + TPatchVector patch_make( const TDiffVector &diffs ); + + /** + * Compute a list of patches to turn text1 into text2. + * text2 is ignored, diffs are the delta between text1 and text2. + * @param text1 Old text. + * @param text2 Ignored. + * @param diffs Array of diff tuples for text1 to text2. + * @return LinkedList of Patch objects. + * @deprecated Prefer patch_make(const std::wstring &text1, const std::list< Diff > &diffs). + */ +public: + TPatchVector patch_make( const std::wstring &text1, const std::wstring &text2, const TDiffVector &diffs ); + + /** + * Compute a list of patches to turn text1 into text2. + * text2 is not provided, diffs are the delta between text1 and text2. + * @param text1 Old text. + * @param diffs Array of diff tuples for text1 to text2. + * @return LinkedList of Patch objects. + */ +public: + TPatchVector patch_make( const std::wstring &text1, const TDiffVector &diffs ); + + /** + * Given an array of patches, return another array that is identical. + * @param patches Array of patch objects. + * @return Array of patch objects. + */ +public: + TPatchVector patch_deepCopy( const TPatchVector &patches ); + + /** + * Merge a set of patches onto the text. Return a patched text, as well + * as an array of true/false values indicating which patches were applied. + * @param patches Array of patch objects. + * @param text Old text. + * @return Two element Object array, containing the new text and an array of + * boolean values. + */ +public: + std::pair< std::wstring, std::vector< bool > > patch_apply( TPatchVector patches, std::wstring text ); + + /** + * Add some padding on text start and end so that edges can match something. + * Intended to be called only from within patch_apply. + * @param patches Array of patch objects. + * @return The padding string added to each side. + */ +public: + std::wstring patch_addPadding( TPatchVector &patches ); + + /** + * Look through the patches and break up any which are longer than the + * maximum limit of the match algorithm. + * Intended to be called only from within patch_apply. + * @param patches LinkedList of Patch objects. + */ +public: + void patch_splitMax( TPatchVector &patches ); + + /** + * Take a list of patches and return a textual representation. + * @param patches List of Patch objects. + * @return Text representation of patches. + */ +public: + std::wstring patch_toText( const TPatchVector &patches ); + + /** + * Parse a textual representation of patches and return a List of Patch + * objects. + * @param textline Text representation of patches. + * @return List of Patch objects. + * @throws std::wstring If invalid input. + */ +public: + TPatchVector patch_fromText( const std::wstring &textline ); + + /** + * A safer version of std::wstring.mid(pos). This one returns "" instead of + * null when the postion equals the string length. + * @param str String to take a substring from. + * @param pos Position to start the substring from. + * @return Substring. + */ +private: + static std::wstring safeMid( const std::wstring &str, std::size_t pos ); + + /** + * A safer version of std::wstring.mid(pos, len). This one returns "" instead of + * null when the postion equals the string length. + * @param str String to take a substring from. + * @param pos Position to start the substring from. + * @param len Length of substring. + * @return Substring. + */ +private: + static std::wstring safeMid( const std::wstring &str, std::size_t pos, std::size_t len ); + + // utilities to replace built in Qt functionality +public: + static void replace( std::wstring &inString, const std::wstring &from, const std::wstring &to ); + + static std::wstring toPercentEncoding( wchar_t c, const std::wstring &exclude, const std::wstring &include ); + static std::wstring toPercentEncoding( const std::wstring &input, const std::wstring &exclude = std::wstring(), const std::wstring &include = std::wstring() ); + + static wchar_t getValue( wchar_t ch ); + static std::wstring fromPercentEncoding( const std::wstring &input ); + + static bool endsWith( const std::wstring &string, const std::wstring &suffix ); + static TStringVector splitString( const std::wstring &string, const std::wstring &separator, bool skipEmptyParts ); + + static int64_t toInt( const std::wstring &string ); + + static std::wstring to_wstring( const char &value, bool doubleQuoteEmpty = false ) + { + if ( doubleQuoteEmpty && ( value == 0 ) ) + return LR"("")"; + + return std::wstring( 1, static_cast< wchar_t >( value ) ); + } + + template< typename T > + static std::vector< T > Splice( std::vector< T > &input, std::size_t start, std::size_t count, const std::vector< T > &objects = {} ) + { + auto deletedRange = std::vector< T >( { input.begin() + start, input.begin() + start + count } ); + input.erase( input.begin() + start, input.begin() + start + count ); + input.insert( input.begin() + start, objects.begin(), objects.end() ); + + return deletedRange; + } + + template< typename T > + static std::vector< T > Splice( std::vector< T > &input, std::size_t start, std::size_t count, const T &object ) + { + return Splice( input, start, count, std::vector< T >( { object } ) ); + } +}; + +#endif // DIFF_MATCH_PATCH_H diff --git a/cpp17/diff_match_patch.pro b/cpp17/diff_match_patch.pro new file mode 100644 index 00000000..80527977 --- /dev/null +++ b/cpp17/diff_match_patch.pro @@ -0,0 +1,19 @@ +#QT += sql xml network +TEMPLATE = app +CONFIG += qt debug_and_release + +mac { + CONFIG -= app_bundle +} + +# don't embed the manifest for now (doesn't work :( ) +#CONFIG -= embed_manifest_exe + +FORMS = + +HEADERS = diff_match_patch.h diff_match_patch_test.h + +SOURCES = diff_match_patch.cpp diff_match_patch_test.cpp + +RESOURCES = + diff --git a/cpp17/diff_match_patch_test.cpp b/cpp17/diff_match_patch_test.cpp new file mode 100644 index 00000000..6180c9c5 --- /dev/null +++ b/cpp17/diff_match_patch_test.cpp @@ -0,0 +1,1128 @@ +/* + * Diff Match and Patch -- Test Harness + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Code known to compile and run with Qt 4.3 through Qt 4.7. +#include "diff_match_patch.h" +#include "diff_match_patch_test.h" + +#include +#include + +int main( int /*argc*/, char ** /*argv*/ ) +{ + diff_match_patch_test dmp_test; + std::cerr << "Starting diff_match_patch unit tests.\n"; + dmp_test.run_all_tests(); + std::cerr << "Done.\n"; + return 0; +} + +static wchar_t kZero{ 0 }; +static wchar_t kOne{ 1 }; +static wchar_t kTwo{ 2 }; + +diff_match_patch_test::diff_match_patch_test() +{ +} + +void diff_match_patch_test::runTest( std::function< void() > test ) +{ + try + { + test(); + numPassedTests++; + } + //catch ( const char *msg ) + //{ + // std::cerr << "Test failed: " << msg << "\n"; + //} + catch ( std::string msg ) + { + std::cerr << "Test failed: " << msg << "\n"; + numFailedTests++; + } +} + +void diff_match_patch_test::run_all_tests() +{ + auto startTime = std::chrono::high_resolution_clock::now(); + + runTest( std::bind( &diff_match_patch_test::testDiffCommonPrefix, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffCommonSuffix, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffCommonOverlap, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffHalfmatch, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffLinesToChars, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffCharsToLines, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffCleanupMerge, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffCleanupSemanticLossless, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffCleanupSemantic, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffCleanupEfficiency, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffPrettyHtml, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffText, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffDelta, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffXIndex, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffLevenshtein, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffBisect, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffMain, this ) ); + + runTest( std::bind( &diff_match_patch_test::testMatchAlphabet, this ) ); + runTest( std::bind( &diff_match_patch_test::testMatchBitap, this ) ); + runTest( std::bind( &diff_match_patch_test::testMatchMain, this ) ); + + runTest( std::bind( &diff_match_patch_test::testPatchObj, this ) ); + runTest( std::bind( &diff_match_patch_test::testPatchFromText, this ) ); + runTest( std::bind( &diff_match_patch_test::testPatchToText, this ) ); + runTest( std::bind( &diff_match_patch_test::testPatchAddContext, this ) ); + runTest( std::bind( &diff_match_patch_test::testPatchMake, this ) ); + runTest( std::bind( &diff_match_patch_test::testPatchSplitMax, this ) ); + runTest( std::bind( &diff_match_patch_test::testPatchAddPadding, this ) ); + runTest( std::bind( &diff_match_patch_test::testPatchApply, this ) ); + if ( numFailedTests == 0 ) + std::cout << numPassedTests << " Tests Passed\n" << numFailedTests << " Tests Failed\n"; + else + std::cerr << numPassedTests << " Tests Passed\n" << numFailedTests << " Tests Failed\n"; + auto endTime = std::chrono::high_resolution_clock::now(); + auto elapsed = std::chrono::duration_cast< std::chrono::milliseconds >( endTime - startTime ).count(); + std::wcout << "Total time: " << elapsed << " ms\n"; +} + +// DIFF TEST FUNCTIONS + +void diff_match_patch_test::testDiffCommonPrefix() +{ + // Detect any common prefix. + assertEquals( "diff_commonPrefix: nullptr case.", 0, dmp.diff_commonPrefix( L"abc", L"xyz" ) ); + + assertEquals( "diff_commonPrefix: Non-nullptr case.", 4, dmp.diff_commonPrefix( L"1234abcdef", L"1234xyz" ) ); + + assertEquals( "diff_commonPrefix: Whole case.", 4, dmp.diff_commonPrefix( L"1234", L"1234xyz" ) ); +} + +void diff_match_patch_test::testDiffCommonSuffix() +{ + // Detect any common suffix. + assertEquals( "diff_commonSuffix: nullptr case.", 0, dmp.diff_commonSuffix( L"abc", L"xyz" ) ); + + assertEquals( "diff_commonSuffix: Non-nullptr case.", 4, dmp.diff_commonSuffix( L"abcdef1234", L"xyz1234" ) ); + + assertEquals( "diff_commonSuffix: Whole case.", 4, dmp.diff_commonSuffix( L"1234", L"xyz1234" ) ); +} + +void diff_match_patch_test::testDiffCommonOverlap() +{ + // Detect any suffix/prefix overlap. + assertEquals( "diff_commonOverlap: nullptr case.", 0, dmp.diff_commonOverlap( L"", L"abcd" ) ); + + assertEquals( "diff_commonOverlap: Whole case.", 3, dmp.diff_commonOverlap( L"abc", L"abcd" ) ); + + assertEquals( "diff_commonOverlap: No overlap.", 0, dmp.diff_commonOverlap( L"123456", L"abcd" ) ); + + assertEquals( "diff_commonOverlap: Overlap.", 3, dmp.diff_commonOverlap( L"123456xxx", L"xxxabcd" ) ); + + // Some overly clever languages (C#) may treat ligatures as equal to their + // component letters. E.g. U+FB01 == 'fi' + assertEquals( "diff_commonOverlap: Unicode.", 0, dmp.diff_commonOverlap( L"fi", std::wstring( L"\ufb01i" ) ) ); +} + +void diff_match_patch_test::testDiffHalfmatch() +{ + // Detect a halfmatch. + dmp.Diff_Timeout = 1; + assertEmpty( "diff_halfMatch: No match #1.", dmp.diff_halfMatch( L"1234567890", L"abcdef" ) ); + + assertEmpty( "diff_halfMatch: No match #2.", dmp.diff_halfMatch( L"12345", L"23" ) ); + + assertEquals( "diff_halfMatch: Single Match #1.", { L"12", L"90", L"a", L"z", L"345678" }, dmp.diff_halfMatch( L"1234567890", L"a345678z" ) ); + + assertEquals( "diff_halfMatch: Single Match #2.", { L"a", L"z", L"12", L"90", L"345678" }, dmp.diff_halfMatch( L"a345678z", L"1234567890" ) ); + + assertEquals( "diff_halfMatch: Single Match #3.", { L"abc", L"z", L"1234", L"0", L"56789" }, dmp.diff_halfMatch( L"abc56789z", L"1234567890" ) ); + + assertEquals( "diff_halfMatch: Single Match #4.", { L"a", L"xyz", L"1", L"7890", L"23456" }, dmp.diff_halfMatch( L"a23456xyz", L"1234567890" ) ); + + assertEquals( "diff_halfMatch: Multiple Matches #1.", { L"12123", L"123121", L"a", L"z", L"1234123451234" }, dmp.diff_halfMatch( L"121231234123451234123121", L"a1234123451234z" ) ); + + assertEquals( "diff_halfMatch: Multiple Matches #2.", { L"", L"-=-=-=-=-=", L"x", L"", L"x-=-=-=-=-=-=-=" }, dmp.diff_halfMatch( L"x-=-=-=-=-=-=-=-=-=-=-=-=", L"xx-=-=-=-=-=-=-=" ) ); + + assertEquals( "diff_halfMatch: Multiple Matches #3.", { L"-=-=-=-=-=", L"", L"", L"y", L"-=-=-=-=-=-=-=y" }, dmp.diff_halfMatch( L"-=-=-=-=-=-=-=-=-=-=-=-=y", L"-=-=-=-=-=-=-=yy" ) ); + + // Optimal diff would be -q+x=H-i+e=lloHe+Hu=llo-Hew+y not -qHillo+x=HelloHe-w+Hulloy + assertEquals( "diff_halfMatch: Non-optimal halfmatch.", { L"qHillo", L"w", L"x", L"Hulloy", L"HelloHe" }, dmp.diff_halfMatch( L"qHilloHelloHew", L"xHelloHeHulloy" ) ); + + dmp.Diff_Timeout = 0; + assertEmpty( "diff_halfMatch: Optimal no halfmatch.", dmp.diff_halfMatch( L"qHilloHelloHew", L"xHelloHeHulloy" ) ); +} + +void diff_match_patch_test::testDiffLinesToChars() +{ + // Convert lines down to characters. + TStringVector tmpVector = TStringVector( { L"", L"alpha\n", L"beta\n" } ); + TVariantVector tmpVarList; + tmpVarList.emplace_back( to_wstring( { 1, 2, 1 } ) ); //(("\u0001\u0002\u0001")); + tmpVarList.emplace_back( to_wstring( { 2, 1, 2 } ) ); // (("\u0002\u0001\u0002")); + tmpVarList.emplace_back( tmpVector ); + assertEquals( "diff_linesToChars:", tmpVarList, dmp.diff_linesToChars( L"alpha\nbeta\nalpha\n", L"beta\nalpha\nbeta\n" ) ); + + tmpVector.clear(); + tmpVarList.clear(); + tmpVector.emplace_back( L"" ); + tmpVector.emplace_back( L"alpha\r\n" ); + tmpVector.emplace_back( L"beta\r\n" ); + tmpVector.emplace_back( L"\r\n" ); + tmpVarList.emplace_back( std::wstring() ); + tmpVarList.emplace_back( to_wstring( { 1, 2, 3, 3 } ) ); // (("\u0001\u0002\u0003\u0003")); + tmpVarList.emplace_back( tmpVector ); + assertEquals( "diff_linesToChars:", tmpVarList, dmp.diff_linesToChars( L"", L"alpha\r\nbeta\r\n\r\n\r\n" ) ); + + tmpVector.clear(); + tmpVarList.clear(); + tmpVector.emplace_back( L"" ); + tmpVector.emplace_back( L"a" ); + tmpVector.emplace_back( L"b" ); + tmpVarList.emplace_back( to_wstring( 1 ) ); // (("\u0001")); + tmpVarList.emplace_back( to_wstring( 2 ) ); // (("\u0002")); + tmpVarList.emplace_back( tmpVector ); + assertEquals( "diff_linesToChars:", tmpVarList, dmp.diff_linesToChars( L"a", L"b" ) ); + + // More than 256 to reveal any 8-bit limitations. + int n = 300; + tmpVector.clear(); + tmpVarList.clear(); + std::wstring lines; + std::wstring chars; + for ( int x = 1; x < n + 1; x++ ) + { + tmpVector.emplace_back( std::to_wstring( x ) + L"\n" ); + lines += std::to_wstring( x ) + L"\n"; + chars += to_wstring( x ); + } + assertEquals( "diff_linesToChars: More than 256 (setup).", n, tmpVector.size() ); + assertEquals( "diff_linesToChars: More than 256 (setup).", n, chars.length() ); + tmpVector.emplace( tmpVector.begin(), L"" ); + tmpVarList.emplace_back( chars ); + tmpVarList.emplace_back( std::wstring() ); + tmpVarList.emplace_back( tmpVector ); + assertEquals( "diff_linesToChars: More than 256.", tmpVarList, dmp.diff_linesToChars( lines, L"" ) ); +} + +void diff_match_patch_test::testDiffCharsToLines() +{ + // First check that Diff equality works. + assertTrue( "diff_charsToLines:", Diff( EQUAL, L"a" ) == Diff( EQUAL, L"a" ) ); + + assertEquals( "diff_charsToLines:", Diff( EQUAL, L"a" ), Diff( EQUAL, L"a" ) ); + + // Convert chars up to lines. + TDiffVector diffs; + diffs.emplace_back( EQUAL, to_wstring( { 1, 2, 1 } ) ); // ("\u0001\u0002\u0001"); + diffs.emplace_back( INSERT, to_wstring( { 2, 1, 2 } ) ); // ("\u0002\u0001\u0002"); + TStringVector tmpVector; + tmpVector.emplace_back( L"" ); + tmpVector.emplace_back( L"alpha\n" ); + tmpVector.emplace_back( L"beta\n" ); + dmp.diff_charsToLines( diffs, tmpVector ); + assertEquals( "diff_charsToLines:", { Diff( EQUAL, L"alpha\nbeta\nalpha\n" ), Diff( INSERT, L"beta\nalpha\nbeta\n" ) }, diffs ); + + // More than 256 to reveal any 8-bit limitations. + int n = 300; + tmpVector.clear(); + std::vector< TVariant > tmpVarList; + std::wstring lines; + std::wstring chars; + for ( int x = 1; x < n + 1; x++ ) + { + tmpVector.emplace_back( std::to_wstring( x ) + L"\n" ); + lines += std::to_wstring( x ) + L"\n"; + chars += to_wstring( x ); + } + assertEquals( "diff_linesToChars: More than 256 (setup).", n, tmpVector.size() ); + assertEquals( "diff_linesToChars: More than 256 (setup).", n, chars.length() ); + tmpVector.emplace( tmpVector.begin(), L"" ); + diffs = { Diff( DELETE, chars ) }; + dmp.diff_charsToLines( diffs, tmpVector ); + assertEquals( "diff_charsToLines: More than 256.", { Diff( DELETE, lines ) }, diffs ); +} + +void diff_match_patch_test::testDiffCleanupMerge() +{ + // Cleanup a messy diff. + TDiffVector diffs; + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: nullptr case.", {}, diffs ); + + diffs = { Diff( EQUAL, L"a" ), Diff( DELETE, L"b" ), Diff( INSERT, L"c" ) }; + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: No change case.", { Diff( EQUAL, L"a" ), Diff( DELETE, L"b" ), Diff( INSERT, L"c" ) }, diffs ); + + diffs = { Diff( EQUAL, L"a" ), Diff( EQUAL, L"b" ), Diff( EQUAL, L"c" ) }; + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Merge equalities.", { Diff( EQUAL, L"abc" ) }, diffs ); + + diffs = { Diff( DELETE, L"a" ), Diff( DELETE, L"b" ), Diff( DELETE, L"c" ) }; + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Merge deletions.", { Diff( DELETE, L"abc" ) }, diffs ); + + diffs = { Diff( INSERT, L"a" ), Diff( INSERT, L"b" ), Diff( INSERT, L"c" ) }; + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Merge insertions.", { Diff( INSERT, L"abc" ) }, diffs ); + + diffs = { Diff( DELETE, L"a" ), Diff( INSERT, L"b" ), Diff( DELETE, L"c" ), Diff( INSERT, L"d" ), Diff( EQUAL, L"e" ), Diff( EQUAL, L"f" ) }; + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Merge interweave.", { Diff( DELETE, L"ac" ), Diff( INSERT, L"bd" ), Diff( EQUAL, L"ef" ) }, diffs ); + + diffs = { Diff( DELETE, L"a" ), Diff( INSERT, L"abc" ), Diff( DELETE, L"dc" ) }; + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Prefix and suffix detection.", { Diff( EQUAL, L"a" ), Diff( DELETE, L"d" ), Diff( INSERT, L"b" ), Diff( EQUAL, L"c" ) }, diffs ); + + diffs = { Diff( EQUAL, L"x" ), Diff( DELETE, L"a" ), Diff( INSERT, L"abc" ), Diff( DELETE, L"dc" ), Diff( EQUAL, L"y" ) }; + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Prefix and suffix detection with equalities.", { Diff( EQUAL, L"xa" ), Diff( DELETE, L"d" ), Diff( INSERT, L"b" ), Diff( EQUAL, L"cy" ) }, diffs ); + + diffs = { Diff( EQUAL, L"a" ), Diff( INSERT, L"ba" ), Diff( EQUAL, L"c" ) }; + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Slide edit left.", { Diff( INSERT, L"ab" ), Diff( EQUAL, L"ac" ) }, diffs ); + + diffs = { Diff( EQUAL, L"c" ), Diff( INSERT, L"ab" ), Diff( EQUAL, L"a" ) }; + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Slide edit right.", { Diff( EQUAL, L"ca" ), Diff( INSERT, L"ba" ) }, diffs ); + + diffs = { Diff( EQUAL, L"a" ), Diff( DELETE, L"b" ), Diff( EQUAL, L"c" ), Diff( DELETE, L"ac" ), Diff( EQUAL, L"x" ) }; + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Slide edit left recursive.", { Diff( DELETE, L"abc" ), Diff( EQUAL, L"acx" ) }, diffs ); + + diffs = { Diff( EQUAL, L"x" ), Diff( DELETE, L"ca" ), Diff( EQUAL, L"c" ), Diff( DELETE, L"b" ), Diff( EQUAL, L"a" ) }; + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Slide edit right recursive.", { Diff( EQUAL, L"xca" ), Diff( DELETE, L"cba" ) }, diffs ); +} + +void diff_match_patch_test::testDiffCleanupSemanticLossless() +{ + // Slide diffs to match logical boundaries. + auto diffs = TDiffVector(); + dmp.diff_cleanupSemanticLossless( diffs ); + assertEquals( "diff_cleanupSemantic: nullptr case.", {}, diffs ); + + diffs = { Diff( EQUAL, L"AAA\r\n\r\nBBB" ), Diff( INSERT, L"\r\nDDD\r\n\r\nBBB" ), Diff( EQUAL, L"\r\nEEE" ) }; + dmp.diff_cleanupSemanticLossless( diffs ); + assertEquals( "diff_cleanupSemanticLossless: Blank lines.", { Diff( EQUAL, L"AAA\r\n\r\n" ), Diff( INSERT, L"BBB\r\nDDD\r\n\r\n" ), Diff( EQUAL, L"BBB\r\nEEE" ) }, diffs ); + + diffs = { Diff( EQUAL, L"AAA\r\nBBB" ), Diff( INSERT, L" DDD\r\nBBB" ), Diff( EQUAL, L" EEE" ) }; + dmp.diff_cleanupSemanticLossless( diffs ); + assertEquals( "diff_cleanupSemanticLossless: Line boundaries.", { Diff( EQUAL, L"AAA\r\n" ), Diff( INSERT, L"BBB DDD\r\n" ), Diff( EQUAL, L"BBB EEE" ) }, diffs ); + + diffs = { Diff( EQUAL, L"The c" ), Diff( INSERT, L"ow and the c" ), Diff( EQUAL, L"at." ) }; + dmp.diff_cleanupSemanticLossless( diffs ); + assertEquals( "diff_cleanupSemantic: Word boundaries.", { Diff( EQUAL, L"The " ), Diff( INSERT, L"cow and the " ), Diff( EQUAL, L"cat." ) }, diffs ); + + diffs = { Diff( EQUAL, L"The-c" ), Diff( INSERT, L"ow-and-the-c" ), Diff( EQUAL, L"at." ) }; + dmp.diff_cleanupSemanticLossless( diffs ); + assertEquals( "diff_cleanupSemantic: Alphanumeric boundaries.", { Diff( EQUAL, L"The-" ), Diff( INSERT, L"cow-and-the-" ), Diff( EQUAL, L"cat." ) }, diffs ); + + diffs = { Diff( EQUAL, L"a" ), Diff( DELETE, L"a" ), Diff( EQUAL, L"ax" ) }; + dmp.diff_cleanupSemanticLossless( diffs ); + assertEquals( "diff_cleanupSemantic: Hitting the start.", { Diff( DELETE, L"a" ), Diff( EQUAL, L"aax" ) }, diffs ); + + diffs = { Diff( EQUAL, L"xa" ), Diff( DELETE, L"a" ), Diff( EQUAL, L"a" ) }; + dmp.diff_cleanupSemanticLossless( diffs ); + assertEquals( "diff_cleanupSemantic: Hitting the end.", { Diff( EQUAL, L"xaa" ), Diff( DELETE, L"a" ) }, diffs ); + + diffs = { Diff( EQUAL, L"The xxx. The " ), Diff( INSERT, L"zzz. The " ), Diff( EQUAL, L"yyy." ) }; + dmp.diff_cleanupSemanticLossless( diffs ); + assertEquals( "diff_cleanupSemantic: Sentence boundaries.", { Diff( EQUAL, L"The xxx." ), Diff( INSERT, L" The zzz." ), Diff( EQUAL, L" The yyy." ) }, diffs ); +} + +void diff_match_patch_test::testDiffCleanupSemantic() +{ + // Cleanup semantically trivial equalities. + auto diffs = TDiffVector(); + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: nullptr case.", {}, diffs ); + + diffs = { Diff( DELETE, L"ab" ), Diff( INSERT, L"cd" ), Diff( EQUAL, L"12" ), Diff( DELETE, L"e" ) }; + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: No elimination #1.", { Diff( DELETE, L"ab" ), Diff( INSERT, L"cd" ), Diff( EQUAL, L"12" ), Diff( DELETE, L"e" ) }, diffs ); + + diffs = { Diff( DELETE, L"abc" ), Diff( INSERT, L"ABC" ), Diff( EQUAL, L"1234" ), Diff( DELETE, L"wxyz" ) }; + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: No elimination #2.", { Diff( DELETE, L"abc" ), Diff( INSERT, L"ABC" ), Diff( EQUAL, L"1234" ), Diff( DELETE, L"wxyz" ) }, diffs ); + + diffs = { Diff( DELETE, L"a" ), Diff( EQUAL, L"b" ), Diff( DELETE, L"c" ) }; + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: Simple elimination.", { Diff( DELETE, L"abc" ), Diff( INSERT, L"b" ) }, diffs ); + + diffs = { Diff( DELETE, L"ab" ), Diff( EQUAL, L"cd" ), Diff( DELETE, L"e" ), Diff( EQUAL, L"f" ), Diff( INSERT, L"g" ) }; + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: Backpass elimination.", { Diff( DELETE, L"abcdef" ), Diff( INSERT, L"cdfg" ) }, diffs ); + + diffs = { Diff( INSERT, L"1" ), Diff( EQUAL, L"a" ), Diff( DELETE, L"b" ), Diff( INSERT, L"2" ), Diff( EQUAL, L"_" ), Diff( INSERT, L"1" ), Diff( EQUAL, L"a" ), Diff( DELETE, L"b" ), Diff( INSERT, L"2" ) }; + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: Multiple elimination.", { Diff( DELETE, L"AB_AB" ), Diff( INSERT, L"1A2_1A2" ) }, diffs ); + + diffs = { Diff( EQUAL, L"The c" ), Diff( DELETE, L"ow and the c" ), Diff( EQUAL, L"at." ) }; + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: Word boundaries.", { Diff( EQUAL, L"The " ), Diff( DELETE, L"cow and the " ), Diff( EQUAL, L"cat." ) }, diffs ); + + diffs = { Diff( DELETE, L"abcxx" ), Diff( INSERT, L"xxdef" ) }; + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: No overlap elimination.", { Diff( DELETE, L"abcxx" ), Diff( INSERT, L"xxdef" ) }, diffs ); + + diffs = { Diff( DELETE, L"abcxxx" ), Diff( INSERT, L"xxxdef" ) }; + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: Overlap elimination.", { Diff( DELETE, L"abc" ), Diff( EQUAL, L"xxx" ), Diff( INSERT, L"def" ) }, diffs ); + + diffs = { Diff( DELETE, L"xxxabc" ), Diff( INSERT, L"defxxx" ) }; + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: Reverse overlap elimination.", { Diff( INSERT, L"def" ), Diff( EQUAL, L"xxx" ), Diff( DELETE, L"abc" ) }, diffs ); + + diffs = { Diff( DELETE, L"abcd1212" ), Diff( INSERT, L"1212efghi" ), Diff( EQUAL, L"----" ), Diff( DELETE, L"A3" ), Diff( INSERT, L"3BC" ) }; + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: Two overlap eliminations.", { Diff( DELETE, L"abcd" ), Diff( EQUAL, L"1212" ), Diff( INSERT, L"efghi" ), Diff( EQUAL, L"----" ), Diff( DELETE, L"a" ), Diff( EQUAL, L"3" ), Diff( INSERT, L"BC" ) }, diffs ); +} + +void diff_match_patch_test::testDiffCleanupEfficiency() +{ + // Cleanup operationally trivial equalities. + dmp.Diff_EditCost = 4; + auto diffs = TDiffVector(); + dmp.diff_cleanupEfficiency( diffs ); + assertEquals( "diff_cleanupEfficiency: nullptr case.", {}, diffs ); + + diffs = { Diff( DELETE, L"ab" ), Diff( INSERT, L"12" ), Diff( EQUAL, L"wxyz" ), Diff( DELETE, L"cd" ), Diff( INSERT, L"34" ) }; + dmp.diff_cleanupEfficiency( diffs ); + assertEquals( "diff_cleanupEfficiency: No elimination.", { Diff( DELETE, L"ab" ), Diff( INSERT, L"12" ), Diff( EQUAL, L"wxyz" ), Diff( DELETE, L"cd" ), Diff( INSERT, L"34" ) }, diffs ); + + diffs = { Diff( DELETE, L"ab" ), Diff( INSERT, L"12" ), Diff( EQUAL, L"xyz" ), Diff( DELETE, L"cd" ), Diff( INSERT, L"34" ) }; + dmp.diff_cleanupEfficiency( diffs ); + assertEquals( "diff_cleanupEfficiency: Four-edit elimination.", { Diff( DELETE, L"abxyzcd" ), Diff( INSERT, L"12xyz34" ) }, diffs ); + + diffs = { Diff( INSERT, L"12" ), Diff( EQUAL, L"x" ), Diff( DELETE, L"cd" ), Diff( INSERT, L"34" ) }; + dmp.diff_cleanupEfficiency( diffs ); + assertEquals( "diff_cleanupEfficiency: Three-edit elimination.", { Diff( DELETE, L"xcd" ), Diff( INSERT, L"12x34" ) }, diffs ); + + diffs = { Diff( DELETE, L"ab" ), Diff( INSERT, L"12" ), Diff( EQUAL, L"xy" ), Diff( INSERT, L"34" ), Diff( EQUAL, L"z" ), Diff( DELETE, L"cd" ), Diff( INSERT, L"56" ) }; + dmp.diff_cleanupEfficiency( diffs ); + assertEquals( "diff_cleanupEfficiency: Backpass elimination.", { Diff( DELETE, L"abxyzcd" ), Diff( INSERT, L"12xy34z56" ) }, diffs ); + + dmp.Diff_EditCost = 5; + diffs = { Diff( DELETE, L"ab" ), Diff( INSERT, L"12" ), Diff( EQUAL, L"wxyz" ), Diff( DELETE, L"cd" ), Diff( INSERT, L"34" ) }; + dmp.diff_cleanupEfficiency( diffs ); + assertEquals( "diff_cleanupEfficiency: High cost elimination.", { Diff( DELETE, L"abwxyzcd" ), Diff( INSERT, L"12wxyz34" ) }, diffs ); + dmp.Diff_EditCost = 4; +} + +void diff_match_patch_test::testDiffPrettyHtml() +{ + // Pretty print. + auto diffs = TDiffVector( { Diff( EQUAL, L"a\n" ), Diff( DELETE, L"b" ), Diff( INSERT, L"c&d" ) } ); + assertEquals( "diff_prettyHtml:", L"
<B>b</B>c&d", dmp.diff_prettyHtml( diffs ) ); +} + +void diff_match_patch_test::testDiffText() +{ + // Compute the source and destination texts. + auto diffs = TDiffVector( { Diff( EQUAL, L"jump" ), Diff( DELETE, L"s" ), Diff( INSERT, L"ed" ), Diff( EQUAL, L" over " ), Diff( DELETE, L"the" ), Diff( INSERT, L"a" ), Diff( EQUAL, L" lazy" ) } ); + assertEquals( "diff_text1:", L"jumps over the lazy", dmp.diff_text1( diffs ) ); + assertEquals( "diff_text2:", L"jumped over a lazy", dmp.diff_text2( diffs ) ); +} + +void diff_match_patch_test::testDiffDelta() +{ + // Convert a diff into delta string. + auto diffs = TDiffVector( { Diff( EQUAL, L"jump" ), Diff( DELETE, L"s" ), Diff( INSERT, L"ed" ), Diff( EQUAL, L" over " ), Diff( DELETE, L"the" ), Diff( INSERT, L"a" ), Diff( EQUAL, L" lazy" ), Diff( INSERT, L"old dog" ) } ); + std::wstring text1 = dmp.diff_text1( diffs ); + assertEquals( "diff_text1: Base text.", L"jumps over the lazy", text1 ); + + std::wstring delta = dmp.diff_toDelta( diffs ); + std::wstring golden = L"=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog"; + assertEquals( "diff_toDelta:", L"=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog", delta ); + + // Convert delta string into a diff. + assertEquals( "diff_fromDelta: Normal.", diffs, dmp.diff_fromDelta( text1, delta ) ); + + // Generates error (19 < 20). + bool exceptionTriggered = false; + try + { + dmp.diff_fromDelta( text1 + L"x", delta ); + assertFalse( "diff_fromDelta: Too long.", true ); + } + catch ( std::wstring ex ) + { + exceptionTriggered = true; + // Exception expected. + } + assertEquals( "diff_fromDelta: Too long - Exception triggered", true, exceptionTriggered ); + // Generates error (19 > 18). + + exceptionTriggered = false; + try + { + dmp.diff_fromDelta( text1.substr( 1 ), delta ); + assertFalse( "diff_fromDelta: Too short.", true ); + } + catch ( std::wstring ex ) + { + exceptionTriggered = true; + // Exception expected. + } + assertEquals( "diff_fromDelta: Too short - Exception triggered", true, exceptionTriggered ); + // Generates error (%c3%xy invalid Unicode). + // This test does not work because QUrl::fromPercentEncoding("%xy") ->"?" + exceptionTriggered = false; + try + { + dmp.diff_fromDelta( L"", L"+%c3%xy" ); + assertFalse( "diff_fromDelta: Invalid character.", true ); + } + catch ( std::wstring ex ) + { + exceptionTriggered = true; + // Exception expected. + } + assertEquals( "diff_fromDelta: Invalid character - Exception triggered", true, exceptionTriggered ); + + // Test deltas with special characters. + diffs = { Diff( EQUAL, std::wstring( L"\u0680 " ) + kZero + std::wstring( L" \t %" ) ), Diff( DELETE, std::wstring( L"\u0681 " ) + kOne + std::wstring( L" \n ^" ) ), Diff( INSERT, std::wstring( L"\u0682 " ) + kTwo + std::wstring( L" \\ |" ) ) }; + + text1 = dmp.diff_text1( diffs ); + golden = std::wstring( L"\u0680 " ) + kZero + std::wstring( L" \t %\u0681 " ) + kOne + std::wstring( L" \n ^" ); + assertEquals( "diff_text1: Unicode text", golden, text1 ); + + delta = dmp.diff_toDelta( diffs ); + assertEquals( "diff_toDelta: Unicode", L"=7\t-7\t+%DA%82 %02 %5C %7C", delta ); + + assertEquals( "diff_fromDelta: Unicode", diffs, dmp.diff_fromDelta( text1, delta ) ); + + // Verify pool of unchanged characters. + diffs = { Diff( INSERT, L"A-Z a-z 0-9 - _ . ! ~ * ' ( ) ; / ? : @ & = + $ , # " ) }; + std::wstring text2 = dmp.diff_text2( diffs ); + assertEquals( "diff_text2: Unchanged characters.", L"A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", text2 ); + + delta = dmp.diff_toDelta( diffs ); + assertEquals( "diff_toDelta: Unchanged characters.", L"+A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", delta ); + + // Convert delta string into a diff. + assertEquals( "diff_fromDelta: Unchanged characters.", diffs, dmp.diff_fromDelta( L"", delta ) ); +} + +void diff_match_patch_test::testDiffXIndex() +{ + // Translate a location in text1 to text2. + auto diffs = TDiffVector( { Diff( DELETE, L"a" ), Diff( INSERT, L"1234" ), Diff( EQUAL, L"xyz" ) } ); + assertEquals( "diff_xIndex: Translation on equality.", 5, dmp.diff_xIndex( diffs, 2 ) ); + + diffs = { Diff( EQUAL, L"a" ), Diff( DELETE, L"1234" ), Diff( EQUAL, L"xyz" ) }; + assertEquals( "diff_xIndex: Translation on deletion.", 1, dmp.diff_xIndex( diffs, 3 ) ); +} + +void diff_match_patch_test::testDiffLevenshtein() +{ + auto diffs = TDiffVector( { Diff( DELETE, L"abc" ), Diff( INSERT, L"1234" ), Diff( EQUAL, L"xyz" ) } ); + assertEquals( "diff_levenshtein: Trailing equality.", 4, dmp.diff_levenshtein( diffs ) ); + + diffs = { Diff( EQUAL, L"xyz" ), Diff( DELETE, L"abc" ), Diff( INSERT, L"1234" ) }; + assertEquals( "diff_levenshtein: Leading equality.", 4, dmp.diff_levenshtein( diffs ) ); + + diffs = { Diff( DELETE, L"abc" ), Diff( EQUAL, L"xyz" ), Diff( INSERT, L"1234" ) }; + assertEquals( "diff_levenshtein: Middle equality.", 7, dmp.diff_levenshtein( diffs ) ); +} + +void diff_match_patch_test::testDiffBisect() +{ + // Normal. + std::wstring a = L"cat"; + std::wstring b = L"map"; + // Since the resulting diff hasn't been normalized, it would be ok if + // the insertion and deletion pairs are swapped. + // If the order changes, tweak this test as required. + auto diffs = TDiffVector( { Diff( DELETE, L"c" ), Diff( INSERT, L"m" ), Diff( EQUAL, L"a" ), Diff( DELETE, L"t" ), Diff( INSERT, L"p" ) } ); + auto results = dmp.diff_bisect( a, b, std::numeric_limits< clock_t >::max() ); + assertEquals( "diff_bisect: Normal.", diffs, results ); + + // Timeout. + diffs = { Diff( DELETE, L"cat" ), Diff( INSERT, L"map" ) }; + assertEquals( "diff_bisect: Timeout.", diffs, dmp.diff_bisect( a, b, 0 ) ); +} + +void diff_match_patch_test::testDiffMain() +{ + // Perform a trivial diff. + auto diffs = TDiffVector(); + assertEquals( "diff_main: nullptr case.", diffs, dmp.diff_main( L"", L"", false ) ); + + diffs = { Diff( DELETE, L"abc" ) }; + assertEquals( "diff_main: RHS side nullptr case.", diffs, dmp.diff_main( L"abc", L"", false ) ); + + diffs = { Diff( INSERT, L"abc" ) }; + assertEquals( "diff_main: LHS side nullptr case.", diffs, dmp.diff_main( L"", L"abc", false ) ); + + diffs = { Diff( EQUAL, L"abc" ) }; + assertEquals( "diff_main: Equality.", diffs, dmp.diff_main( L"abc", L"abc", false ) ); + + diffs = { Diff( EQUAL, L"ab" ), Diff( INSERT, L"123" ), Diff( EQUAL, L"c" ) }; + assertEquals( "diff_main: Simple insertion.", diffs, dmp.diff_main( L"abc", L"ab123c", false ) ); + + diffs = { Diff( EQUAL, L"a" ), Diff( DELETE, L"123" ), Diff( EQUAL, L"bc" ) }; + assertEquals( "diff_main: Simple deletion.", diffs, dmp.diff_main( L"a123bc", L"abc", false ) ); + + diffs = { Diff( EQUAL, L"a" ), Diff( INSERT, L"123" ), Diff( EQUAL, L"b" ), Diff( INSERT, L"456" ), Diff( EQUAL, L"c" ) }; + assertEquals( "diff_main: Two insertions.", diffs, dmp.diff_main( L"abc", L"a123b456c", false ) ); + + diffs = { Diff( EQUAL, L"a" ), Diff( DELETE, L"123" ), Diff( EQUAL, L"b" ), Diff( DELETE, L"456" ), Diff( EQUAL, L"c" ) }; + assertEquals( "diff_main: Two deletions.", diffs, dmp.diff_main( L"a123b456c", L"abc", false ) ); + + // Perform a real diff. + // Switch off the timeout. + dmp.Diff_Timeout = 0; + diffs = { Diff( DELETE, L"a" ), Diff( INSERT, L"b" ) }; + assertEquals( "diff_main: Simple case #1.", diffs, dmp.diff_main( L"a", L"b", false ) ); + + diffs = { Diff( DELETE, L"Apple" ), Diff( INSERT, L"Banana" ), Diff( EQUAL, L"s are a" ), Diff( INSERT, L"lso" ), Diff( EQUAL, L" fruit." ) }; + assertEquals( "diff_main: Simple case #2.", diffs, dmp.diff_main( L"Apples are a fruit.", L"Bananas are also fruit.", false ) ); + + diffs = { Diff( DELETE, L"a" ), Diff( INSERT, L"\u0680" ), Diff( EQUAL, L"x" ), Diff( DELETE, L"\t" ), Diff( INSERT, to_wstring( kZero ) ) }; + assertEquals( "diff_main: Simple case #3.", diffs, dmp.diff_main( L"ax\t", std::wstring( L"\u0680x" ) + kZero, false ) ); + + diffs = { Diff( DELETE, L"1" ), Diff( EQUAL, L"a" ), Diff( DELETE, L"y" ), Diff( EQUAL, L"b" ), Diff( DELETE, L"2" ), Diff( INSERT, L"xab" ) }; + assertEquals( "diff_main: Overlap #1.", diffs, dmp.diff_main( L"1ayb2", L"abxab", false ) ); + + diffs = { Diff( INSERT, L"xaxcx" ), Diff( EQUAL, L"abc" ), Diff( DELETE, L"y" ) }; + assertEquals( "diff_main: Overlap #2.", diffs, dmp.diff_main( L"abcy", L"xaxcxabc", false ) ); + + diffs = { Diff( DELETE, L"ABCD" ), Diff( EQUAL, L"a" ), Diff( DELETE, L"=" ), Diff( INSERT, L"-" ), Diff( EQUAL, L"bcd" ), Diff( DELETE, L"=" ), Diff( INSERT, L"-" ), Diff( EQUAL, L"efghijklmnopqrs" ), Diff( DELETE, L"EFGHIJKLMNOefg" ) }; + assertEquals( "diff_main: Overlap #3.", diffs, dmp.diff_main( L"ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg", L"a-bcd-efghijklmnopqrs", false ) ); + + diffs = { Diff( INSERT, L" " ), Diff( EQUAL, L"a" ), Diff( INSERT, L"nd" ), Diff( EQUAL, L" [[Pennsylvania]]" ), Diff( DELETE, L" and [[New" ) }; + assertEquals( "diff_main: Large equality.", diffs, dmp.diff_main( L"a [[Pennsylvania]] and [[New", L" and [[Pennsylvania]]", false ) ); + + dmp.Diff_Timeout = 0.1f; // 100ms + // This test may 'fail' on extremely fast computers. If so, just increase the text lengths. + std::wstring a = L"`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n"; + std::wstring b = L"I am the very model of a modern major general,\nI've information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n"; + // Increase the text lengths by 1024 times to ensure a timeout. + for ( int x = 0; x < 10; x++ ) + { + a = a + a; + b = b + b; + } + clock_t startTime = clock(); + dmp.diff_main( a, b ); + clock_t endTime = clock(); + // Test that we took at least the timeout period. + assertTrue( "diff_main: Timeout min.", ( dmp.Diff_Timeout * CLOCKS_PER_SEC ) <= ( endTime - startTime ) ); + // Test that we didn't take forever (be forgiving). + // Theoretically this test could fail very occasionally if the + // OS task swaps or locks up for a second at the wrong moment. + // Java seems to overrun by ~80% (compared with 10% for other languages). + // Therefore use an upper limit of 0.5s instead of 0.2s. + assertTrue( "diff_main: Timeout max.", ( dmp.Diff_Timeout * CLOCKS_PER_SEC * 2 ) > ( endTime - startTime ) ); + dmp.Diff_Timeout = 0; + + // Test the linemode speedup. + // Must be long to pass the 100 char cutoff. + a = L"1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n"; + b = L"abcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\n"; + assertEquals( "diff_main: Simple line-mode.", dmp.diff_main( a, b, true ), dmp.diff_main( a, b, false ) ); + + a = L"1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"; + b = L"abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij"; + assertEquals( "diff_main: Single line-mode.", dmp.diff_main( a, b, true ), dmp.diff_main( a, b, false ) ); + + a = L"1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n"; + b = L"abcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n"; + TStringVector texts_linemode = diff_rebuildtexts( dmp.diff_main( a, b, true ) ); + TStringVector texts_textmode = diff_rebuildtexts( dmp.diff_main( a, b, false ) ); + assertEquals( "diff_main: Overlap line-mode.", texts_textmode, texts_linemode ); +} + +// MATCH TEST FUNCTIONS + +void diff_match_patch_test::testMatchAlphabet() +{ + // Initialise the bitmasks for Bitap. + TCharPosMap bitmask; + bitmask[ 'a' ] = 4; + bitmask[ 'b' ] = 2; + bitmask[ 'c' ] = 1; + assertEquals( "match_alphabet: Unique.", bitmask, dmp.match_alphabet( L"abc" ) ); + + bitmask = TCharPosMap(); + bitmask[ 'a' ] = 37; + bitmask[ 'b' ] = 18; + bitmask[ 'c' ] = 8; + assertEquals( "match_alphabet: Duplicates.", bitmask, dmp.match_alphabet( L"abcaba" ) ); +} + +void diff_match_patch_test::testMatchBitap() +{ + // Bitap algorithm. + dmp.Match_Distance = 100; + dmp.Match_Threshold = 0.5f; + assertEquals( "match_bitap: Exact match #1.", 5, dmp.match_bitap( L"abcdefghijk", L"fgh", 5 ) ); + + assertEquals( "match_bitap: Exact match #2.", 5, dmp.match_bitap( L"abcdefghijk", L"fgh", 0 ) ); + + assertEquals( "match_bitap: Fuzzy match #1.", 4, dmp.match_bitap( L"abcdefghijk", L"efxhi", 0 ) ); + + assertEquals( "match_bitap: Fuzzy match #2.", 2, dmp.match_bitap( L"abcdefghijk", L"cdefxyhijk", 5 ) ); + + assertEquals( "match_bitap: Fuzzy match #3.", -1, dmp.match_bitap( L"abcdefghijk", L"bxy", 1 ) ); + + assertEquals( "match_bitap: Overflow.", 2, dmp.match_bitap( L"123456789xx0", L"3456789x0", 2 ) ); + + assertEquals( "match_bitap: Before start match.", 0, dmp.match_bitap( L"abcdef", L"xxabc", 4 ) ); + + assertEquals( "match_bitap: Beyond end match.", 3, dmp.match_bitap( L"abcdef", L"defyy", 4 ) ); + + assertEquals( "match_bitap: Oversized pattern.", 0, dmp.match_bitap( L"abcdef", L"xabcdefy", 0 ) ); + + dmp.Match_Threshold = 0.4f; + assertEquals( "match_bitap: Threshold #1.", 4, dmp.match_bitap( L"abcdefghijk", L"efxyhi", 1 ) ); + + dmp.Match_Threshold = 0.3f; + assertEquals( "match_bitap: Threshold #2.", -1, dmp.match_bitap( L"abcdefghijk", L"efxyhi", 1 ) ); + + dmp.Match_Threshold = 0.0f; + assertEquals( "match_bitap: Threshold #3.", 1, dmp.match_bitap( L"abcdefghijk", L"bcdef", 1 ) ); + + dmp.Match_Threshold = 0.5f; + assertEquals( "match_bitap: Multiple select #1.", 0, dmp.match_bitap( L"abcdexyzabcde", L"abccde", 3 ) ); + + assertEquals( "match_bitap: Multiple select #2.", 8, dmp.match_bitap( L"abcdexyzabcde", L"abccde", 5 ) ); + + dmp.Match_Distance = 10; // Strict location. + assertEquals( "match_bitap: Distance test #1.", -1, dmp.match_bitap( L"abcdefghijklmnopqrstuvwxyz", L"abcdefg", 24 ) ); + + assertEquals( "match_bitap: Distance test #2.", 0, dmp.match_bitap( L"abcdefghijklmnopqrstuvwxyz", L"abcdxxefg", 1 ) ); + + dmp.Match_Distance = 1000; // Loose location. + assertEquals( "match_bitap: Distance test #3.", 0, dmp.match_bitap( L"abcdefghijklmnopqrstuvwxyz", L"abcdefg", 24 ) ); +} + +void diff_match_patch_test::testMatchMain() +{ + // Full match. + assertEquals( "match_main: Equality.", 0, dmp.match_main( L"abcdef", L"abcdef", 1000 ) ); + + assertEquals( "match_main: nullptr text.", -1, dmp.match_main( L"", L"abcdef", 1 ) ); + + assertEquals( "match_main: nullptr pattern.", 3, dmp.match_main( L"abcdef", L"", 3 ) ); + + assertEquals( "match_main: Exact match.", 3, dmp.match_main( L"abcdef", L"de", 3 ) ); + + dmp.Match_Threshold = 0.7f; + assertEquals( "match_main: Complex match.", 4, dmp.match_main( L"I am the very model of a modern major general.", L" that berry ", 5 ) ); + dmp.Match_Threshold = 0.5f; +} + +// PATCH TEST FUNCTIONS + +void diff_match_patch_test::testPatchObj() +{ + // Patch Object. + Patch p; + p.start1 = 20; + p.start2 = 21; + p.length1 = 18; + p.length2 = 17; + p.diffs = { Diff( EQUAL, L"jump" ), Diff( DELETE, L"s" ), Diff( INSERT, L"ed" ), Diff( EQUAL, L" over " ), Diff( DELETE, L"the" ), Diff( INSERT, L"a" ), Diff( EQUAL, L"\nlaz" ) }; + std::wstring strp = L"@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n"; + assertEquals( "Patch: toString.", strp, p.toString() ); +} + +void diff_match_patch_test::testPatchFromText() +{ + assertTrue( "patch_fromText: #0.", dmp.patch_fromText( L"" ).empty() ); + + std::wstring strp = L"@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n"; + assertEquals( "patch_fromText: #1.", strp, dmp.patch_fromText( strp )[ 0 ].toString() ); + + assertEquals( "patch_fromText: #2.", L"@@ -1 +1 @@\n-a\n+b\n", dmp.patch_fromText( L"@@ -1 +1 @@\n-a\n+b\n" )[ 0 ].toString() ); + + assertEquals( "patch_fromText: #3.", L"@@ -1,3 +0,0 @@\n-abc\n", dmp.patch_fromText( L"@@ -1,3 +0,0 @@\n-abc\n" )[ 0 ].toString() ); + + assertEquals( "patch_fromText: #4.", L"@@ -0,0 +1,3 @@\n+abc\n", dmp.patch_fromText( L"@@ -0,0 +1,3 @@\n+abc\n" )[ 0 ].toString() ); + + // Generates error. + bool exceptionTriggered = false; + try + { + dmp.patch_fromText( L"Bad\nPatch\n" ); + assertFalse( "patch_fromText: #5.", true ); + } + catch ( std::wstring ex ) + { + exceptionTriggered = true; + // Exception expected. + } + assertEquals( "patch_fromText: #5 - Exception triggered", true, exceptionTriggered ); +} + +void diff_match_patch_test::testPatchToText() +{ + std::wstring strp = L"@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n"; + auto patches = dmp.patch_fromText( strp ); + assertEquals( "patch_toText: Single", strp, dmp.patch_toText( patches ) ); + + strp = L"@@ -1,9 +1,9 @@\n-f\n+F\n oo+fooba\n@@ -7,9 +7,9 @@\n obar\n-,\n+.\n tes\n"; + patches = dmp.patch_fromText( strp ); + assertEquals( "patch_toText: Dual", strp, dmp.patch_toText( patches ) ); +} + +void diff_match_patch_test::testPatchAddContext() +{ + dmp.Patch_Margin = 4; + auto p = dmp.patch_fromText( L"@@ -21,4 +21,10 @@\n-jump\n+somersault\n" )[ 0 ]; + dmp.patch_addContext( p, L"The quick brown fox jumps over the lazy dog." ); + assertEquals( "patch_addContext: Simple case.", L"@@ -17,12 +17,18 @@\n fox \n-jump\n+somersault\n s ov\n", p.toString() ); + + p = dmp.patch_fromText( L"@@ -21,4 +21,10 @@\n-jump\n+somersault\n" )[ 0 ]; + dmp.patch_addContext( p, L"The quick brown fox jumps." ); + assertEquals( "patch_addContext: Not enough trailing context.", L"@@ -17,10 +17,16 @@\n fox \n-jump\n+somersault\n s.\n", p.toString() ); + + p = dmp.patch_fromText( L"@@ -3 +3,2 @@\n-e\n+at\n" )[ 0 ]; + dmp.patch_addContext( p, L"The quick brown fox jumps." ); + assertEquals( "patch_addContext: Not enough leading context.", L"@@ -1,7 +1,8 @@\n Th\n-e\n+at\n qui\n", p.toString() ); + + p = dmp.patch_fromText( L"@@ -3 +3,2 @@\n-e\n+at\n" )[ 0 ]; + dmp.patch_addContext( p, L"The quick brown fox jumps. The quick brown fox crashes." ); + assertEquals( "patch_addContext: Ambiguity.", L"@@ -1,27 +1,28 @@\n Th\n-e\n+at\n quick brown fox jumps. \n", p.toString() ); +} + +void diff_match_patch_test::testPatchMake() +{ + TPatchVector patches; + patches = dmp.patch_make( L"", L"" ); + assertEquals( "patch_make: nullptr case", L"", dmp.patch_toText( patches ) ); + + std::wstring text1 = L"The quick brown fox jumps over the lazy dog."; + std::wstring text2 = L"That quick brown fox jumped over a lazy dog."; + std::wstring expectedPatch = L"@@ -1,8 +1,7 @@\n Th\n-at\n+e\n qui\n@@ -21,17 +21,18 @@\n jump\n-ed\n+s\n over \n-a\n+the\n laz\n"; + // The second patch must be "-21,17 +21,18", not "-22,17 +21,18" due to rolling context. + patches = dmp.patch_make( text2, text1 ); + assertEquals( "patch_make: Text2+Text1 inputs", expectedPatch, dmp.patch_toText( patches ) ); + + expectedPatch = L"@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n"; + patches = dmp.patch_make( text1, text2 ); + assertEquals( "patch_make: Text1+Text2 inputs", expectedPatch, dmp.patch_toText( patches ) ); + + auto diffs = dmp.diff_main( text1, text2, false ); + patches = dmp.patch_make( diffs ); + assertEquals( "patch_make: Diff input", expectedPatch, dmp.patch_toText( patches ) ); + + patches = dmp.patch_make( text1, diffs ); + assertEquals( "patch_make: Text1+Diff inputs", expectedPatch, dmp.patch_toText( patches ) ); + + patches = dmp.patch_make( text1, text2, diffs ); + assertEquals( "patch_make: Text1+Text2+Diff inputs (deprecated)", expectedPatch, dmp.patch_toText( patches ) ); + + patches = dmp.patch_make( L"`1234567890-=[]\\;',./", L"~!@#$%^&*()_+{}|:\"<>?" ); + assertEquals( "patch_toText: Character encoding.", L"@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n", dmp.patch_toText( patches ) ); + + diffs = { Diff( DELETE, L"`1234567890-=[]\\;',./" ), Diff( INSERT, L"~!@#$%^&*()_+{}|:\"<>?" ) }; + assertEquals( "patch_fromText: Character decoding.", diffs, dmp.patch_fromText( L"@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n" )[ 0 ].diffs ); + + text1 = L""; + for ( int x = 0; x < 100; x++ ) + { + text1 += L"abcdef"; + } + text2 = text1 + L"123"; + expectedPatch = L"@@ -573,28 +573,31 @@\n cdefabcdefabcdefabcdefabcdef\n+123\n"; + patches = dmp.patch_make( text1, text2 ); + assertEquals( "patch_make: Long string with repeats.", expectedPatch, dmp.patch_toText( patches ) ); +} + +void diff_match_patch_test::testPatchSplitMax() +{ + // Confirm Match_MaxBits is 32. + TPatchVector patches; + patches = dmp.patch_make( L"abcdefghijklmnopqrstuvwxyz01234567890", L"XabXcdXefXghXijXklXmnXopXqrXstXuvXwxXyzX01X23X45X67X89X0" ); + dmp.patch_splitMax( patches ); + assertEquals( "patch_splitMax: #1.", L"@@ -1,32 +1,46 @@\n+X\n ab\n+X\n cd\n+X\n ef\n+X\n gh\n+X\n ij\n+X\n kl\n+X\n mn\n+X\n op\n+X\n qr\n+X\n st\n+X\n uv\n+X\n wx\n+X\n yz\n+X\n 012345\n@@ -25,13 +39,18 @@\n zX01\n+X\n 23\n+X\n 45\n+X\n 67\n+X\n 89\n+X\n 0\n", dmp.patch_toText( patches ) ); + + patches = dmp.patch_make( L"abcdef1234567890123456789012345678901234567890123456789012345678901234567890uvwxyz", L"abcdefuvwxyz" ); + std::wstring oldToText = dmp.patch_toText( patches ); + dmp.patch_splitMax( patches ); + assertEquals( "patch_splitMax: #2.", oldToText, dmp.patch_toText( patches ) ); + + patches = dmp.patch_make( L"1234567890123456789012345678901234567890123456789012345678901234567890", L"abc" ); + dmp.patch_splitMax( patches ); + assertEquals( "patch_splitMax: #3.", L"@@ -1,32 +1,4 @@\n-1234567890123456789012345678\n 9012\n@@ -29,32 +1,4 @@\n-9012345678901234567890123456\n 7890\n@@ -57,14 +1,3 @@\n-78901234567890\n+abc\n", dmp.patch_toText( patches ) ); + + patches = dmp.patch_make( L"abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1", L"abcdefghij , h : 1 , t : 1 abcdefghij , h : 1 , t : 1 abcdefghij , h : 0 , t : 1" ); + dmp.patch_splitMax( patches ); + assertEquals( "patch_splitMax: #4.", L"@@ -2,32 +2,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n@@ -29,32 +29,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n", dmp.patch_toText( patches ) ); +} + +void diff_match_patch_test::testPatchAddPadding() +{ + TPatchVector patches; + patches = dmp.patch_make( L"", L"test" ); + assertEquals( "patch_addPadding: Both edges full", L"@@ -0,0 +1,4 @@\n+test\n", dmp.patch_toText( patches ) ); + dmp.patch_addPadding( patches ); + assertEquals( "patch_addPadding: Both edges full.", L"@@ -1,8 +1,12 @@\n %01%02%03%04\n+test\n %01%02%03%04\n", dmp.patch_toText( patches ) ); + + patches = dmp.patch_make( L"XY", L"XtestY" ); + assertEquals( "patch_addPadding: Both edges partial.", L"@@ -1,2 +1,6 @@\n X\n+test\n Y\n", dmp.patch_toText( patches ) ); + dmp.patch_addPadding( patches ); + assertEquals( "patch_addPadding: Both edges partial.", L"@@ -2,8 +2,12 @@\n %02%03%04X\n+test\n Y%01%02%03\n", dmp.patch_toText( patches ) ); + + patches = dmp.patch_make( L"XXXXYYYY", L"XXXXtestYYYY" ); + assertEquals( "patch_addPadding: Both edges none.", L"@@ -1,8 +1,12 @@\n XXXX\n+test\n YYYY\n", dmp.patch_toText( patches ) ); + dmp.patch_addPadding( patches ); + assertEquals( "patch_addPadding: Both edges none.", L"@@ -5,8 +5,12 @@\n XXXX\n+test\n YYYY\n", dmp.patch_toText( patches ) ); +} + +void diff_match_patch_test::testPatchApply() +{ + dmp.Match_Distance = 1000; + dmp.Match_Threshold = 0.5f; + dmp.Patch_DeleteThreshold = 0.5f; + TPatchVector patches; + patches = dmp.patch_make( L"", L"" ); + auto results = dmp.patch_apply( patches, L"Hello world." ); + auto &&boolArray = results.second; + + std::wstring resultStr = results.first + L"\t" + std::to_wstring( boolArray.size() ); + assertEquals( "patch_apply: nullptr case.", L"Hello world.\t0", resultStr ); + + patches = dmp.patch_make( L"The quick brown fox jumps over the lazy dog.", L"That quick brown fox jumped over a lazy dog." ); + assertEquals( "patch_apply: Exact match.", L"@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n", dmp.patch_toText( patches ) ); + + results = dmp.patch_apply( patches, L"The quick brown fox jumps over the lazy dog." ); + boolArray = results.second; + resultStr = results.first + L"\t" + ( boolArray[ 0 ] ? L"true" : L"false" ) + L"\t" + ( boolArray[ 1 ] ? L"true" : L"false" ); + assertEquals( "patch_apply: Exact match.", L"That quick brown fox jumped over a lazy dog.\ttrue\ttrue", resultStr ); + + results = dmp.patch_apply( patches, L"The quick red rabbit jumps over the tired tiger." ); + boolArray = results.second; + resultStr = results.first + L"\t" + ( boolArray[ 0 ] ? L"true" : L"false" ) + L"\t" + ( boolArray[ 1 ] ? L"true" : L"false" ); + assertEquals( "patch_apply: Partial match.", L"That quick red rabbit jumped over a tired tiger.\ttrue\ttrue", resultStr ); + + results = dmp.patch_apply( patches, L"I am the very model of a modern major general." ); + boolArray = results.second; + resultStr = results.first + L"\t" + ( boolArray[ 0 ] ? L"true" : L"false" ) + L"\t" + ( boolArray[ 1 ] ? L"true" : L"false" ); + assertEquals( "patch_apply: Failed match.", L"I am the very model of a modern major general.\tfalse\tfalse", resultStr ); + + patches = dmp.patch_make( L"x1234567890123456789012345678901234567890123456789012345678901234567890y", L"xabcy" ); + results = dmp.patch_apply( patches, L"x123456789012345678901234567890-----++++++++++-----123456789012345678901234567890y" ); + boolArray = results.second; + resultStr = results.first + L"\t" + ( boolArray[ 0 ] ? L"true" : L"false" ) + L"\t" + ( boolArray[ 1 ] ? L"true" : L"false" ); + assertEquals( "patch_apply: Big delete, small change.", L"xabcy\ttrue\ttrue", resultStr ); + + patches = dmp.patch_make( L"x1234567890123456789012345678901234567890123456789012345678901234567890y", L"xabcy" ); + results = dmp.patch_apply( patches, L"x12345678901234567890---------------++++++++++---------------12345678901234567890y" ); + boolArray = results.second; + resultStr = results.first + L"\t" + ( boolArray[ 0 ] ? L"true" : L"false" ) + L"\t" + ( boolArray[ 1 ] ? L"true" : L"false" ); + assertEquals( "patch_apply: Big delete, large change 1.", L"xabc12345678901234567890---------------++++++++++---------------12345678901234567890y\tfalse\ttrue", resultStr ); + + dmp.Patch_DeleteThreshold = 0.6f; + patches = dmp.patch_make( L"x1234567890123456789012345678901234567890123456789012345678901234567890y", L"xabcy" ); + results = dmp.patch_apply( patches, L"x12345678901234567890---------------++++++++++---------------12345678901234567890y" ); + boolArray = results.second; + resultStr = results.first + L"\t" + ( boolArray[ 0 ] ? L"true" : L"false" ) + L"\t" + ( boolArray[ 1 ] ? L"true" : L"false" ); + assertEquals( "patch_apply: Big delete, large change 2.", L"xabcy\ttrue\ttrue", resultStr ); + dmp.Patch_DeleteThreshold = 0.5f; + + dmp.Match_Threshold = 0.0f; + dmp.Match_Distance = 0; + patches = dmp.patch_make( L"abcdefghijklmnopqrstuvwxyz--------------------1234567890", L"abcXXXXXXXXXXdefghijklmnopqrstuvwxyz--------------------1234567YYYYYYYYYY890" ); + results = dmp.patch_apply( patches, L"ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567890" ); + boolArray = results.second; + resultStr = results.first + L"\t" + ( boolArray[ 0 ] ? L"true" : L"false" ) + L"\t" + ( boolArray[ 1 ] ? L"true" : L"false" ); + assertEquals( "patch_apply: Compensate for failed patch.", L"ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567YYYYYYYYYY890\tfalse\ttrue", resultStr ); + dmp.Match_Threshold = 0.5f; + dmp.Match_Distance = 1000; + + patches = dmp.patch_make( L"", L"test" ); + std::wstring patchStr = dmp.patch_toText( patches ); + dmp.patch_apply( patches, L"" ); + assertEquals( "patch_apply: No side effects.", patchStr, dmp.patch_toText( patches ) ); + + patches = dmp.patch_make( L"The quick brown fox jumps over the lazy dog.", L"Woof" ); + patchStr = dmp.patch_toText( patches ); + dmp.patch_apply( patches, L"The quick brown fox jumps over the lazy dog." ); + assertEquals( "patch_apply: No side effects with major delete.", patchStr, dmp.patch_toText( patches ) ); + + patches = dmp.patch_make( L"", L"test" ); + results = dmp.patch_apply( patches, L"" ); + boolArray = results.second; + resultStr = results.first + L"\t" + ( boolArray[ 0 ] ? L"true" : L"false" ); + assertEquals( "patch_apply: Edge exact match.", L"test\ttrue", resultStr ); + + patches = dmp.patch_make( L"XY", L"XtestY" ); + results = dmp.patch_apply( patches, L"XY" ); + boolArray = results.second; + resultStr = results.first + L"\t" + ( boolArray[ 0 ] ? L"true" : L"false" ); + assertEquals( "patch_apply: Near edge exact match.", L"XtestY\ttrue", resultStr ); + + patches = dmp.patch_make( L"y", L"y123" ); + results = dmp.patch_apply( patches, L"x" ); + boolArray = results.second; + resultStr = results.first + L"\t" + ( boolArray[ 0 ] ? L"true" : L"false" ); + assertEquals( "patch_apply: Edge partial match.", L"x123\ttrue", resultStr ); +} + +void diff_match_patch_test::reportFailure( const std::string &strCase, const std::wstring &expected, const std::wstring &actual ) +{ + std::cout << "FAILED : " + strCase + "\n"; + std::wcerr << " Expected: " << expected << L"\n Actual: " << actual << "\n"; + numFailedTests++; + //throw strCase; +} + +void diff_match_patch_test::reportPassed( const std::string &strCase ) +{ + std::cout << "PASSED: " + strCase + "\n"; +} + +void diff_match_patch_test::assertEquals( const std::string &strCase, std::size_t n1, std::size_t n2 ) +{ + if ( n1 != n2 ) + { + reportFailure( strCase, std::to_wstring( n1 ), std::to_wstring( n2 ) ); + } + reportPassed( strCase ); +} + +void diff_match_patch_test::assertEquals( const std::string &strCase, const std::wstring &s1, const std::wstring &s2 ) +{ + if ( s1 != s2 ) + { + reportFailure( strCase, s1, s2 ); + } + reportPassed( strCase ); +} + +void diff_match_patch_test::assertEquals( const std::string &strCase, const std::string &s1, const std::string &s2 ) +{ + if ( s1 != s2 ) + { + reportFailure( strCase, to_wstring( s1 ), to_wstring( s2 ) ); + } + reportPassed( strCase ); +} + +void diff_match_patch_test::assertEquals( const std::string &strCase, const Diff &d1, const Diff &d2 ) +{ + if ( d1 != d2 ) + { + reportFailure( strCase, d1.toString(), d2.toString() ); + } + reportPassed( strCase ); +} + +void diff_match_patch_test::assertEquals( const std::string &strCase, const TVariant &var1, const TVariant &var2 ) +{ + if ( var1 != var2 ) + { + reportFailure( strCase, to_wstring( var1 ), to_wstring( var2 ) ); + } + reportPassed( strCase ); +} + +void diff_match_patch_test::assertEquals( const std::string &strCase, const TCharPosMap &m1, const TCharPosMap &m2 ) +{ + for ( auto &&ii : m1 ) + { + auto rhs = m2.find( ii.first ); + if ( rhs == m2.end() ) + { + reportFailure( strCase, L"(" + to_wstring( ii.first ) + L"," + std::to_wstring( ii.second ) + L")", L"" ); + } + } + + for ( auto &&ii : m2 ) + { + auto rhs = m1.find( ii.first ); + if ( rhs == m1.end() ) + { + reportFailure( strCase, L"(" + to_wstring( ii.first ) + L"," + std::to_wstring( ii.second ) + L")", L"" ); + } + } + + reportPassed( strCase ); +} + +void diff_match_patch_test::assertEquals( const std::string &strCase, bool lhs, bool rhs ) +{ + if ( lhs != rhs ) + { + reportFailure( strCase, lhs ? L"true" : L"false", rhs ? L"true" : L"false" ); + } + reportPassed( strCase ); +} + +void diff_match_patch_test::assertTrue( const std::string &strCase, bool value ) +{ + if ( !value ) + { + reportFailure( strCase, L"true", L"false" ); + } + reportPassed( strCase ); +} + +void diff_match_patch_test::assertFalse( const std::string &strCase, bool value ) +{ + if ( value ) + { + reportFailure( strCase, L"false", L"true" ); + } + reportPassed( strCase ); +} + +// Construct the two texts which made up the diff originally. +TStringVector diff_match_patch_test::diff_rebuildtexts( const TDiffVector &diffs ) +{ + TStringVector text( { L"", L"" } ); + for ( auto &&myDiff : diffs ) + { + if ( myDiff.operation != INSERT ) + { + text[ 0 ] += myDiff.text; + } + if ( myDiff.operation != DELETE ) + { + text[ 1 ] += myDiff.text; + } + } + return text; +} + +void diff_match_patch_test::assertEmpty( const std::string &strCase, const TStringVector &list ) +{ + if ( !list.empty() ) + { + throw strCase; + } +} + +/* +Compile instructions for cmake on Windows: +mkdir build +cd build +cmake .. +make +diff_match_patch_test.exe + +Compile insructions for OS X: +qmake -spec macx-g++ +make +./diff_match_patch +*/ diff --git a/cpp17/diff_match_patch_test.h b/cpp17/diff_match_patch_test.h new file mode 100644 index 00000000..4c8b9a5f --- /dev/null +++ b/cpp17/diff_match_patch_test.h @@ -0,0 +1,236 @@ +/* + * Diff Match and Patch -- Test Harness + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DIFF_MATCH_PATCH_TEST_H +#define DIFF_MATCH_PATCH_TEST_H + +#include + +template< typename T > +std::wstring to_wstring( const T & /*value*/, bool /*doubleQuoteEmpty*/ = false ) +{ + assert( false ); + return {}; +} + +template<> +std::wstring to_wstring( const std::string &string, bool doubleQuoteEmpty ) +{ + if ( doubleQuoteEmpty && string.empty() ) + return LR"("")"; + + std::wstring wstring( string.size(), L' ' ); // Overestimate number of code points. + wstring.resize( std::mbstowcs( &wstring[ 0 ], string.c_str(), string.size() ) ); // Shrink to fit. + return wstring; +} + +template<> +std::wstring to_wstring( const TVariant &variant, bool doubleQuoteEmpty ) +{ + std::wstring retVal; + if ( std::holds_alternative< std::wstring >( variant ) ) + retVal = std::get< std::wstring >( variant ); + + if ( doubleQuoteEmpty && retVal.empty() ) + return LR"("")"; + + return retVal; +} + +template<> +std::wstring to_wstring( const Diff &diff, bool doubleQuoteEmpty ) +{ + auto retVal = diff.toString(); + if ( doubleQuoteEmpty && retVal.empty() ) + return LR"("")"; + return retVal; +} + +template<> +std::wstring to_wstring( const Patch &patch, bool doubleQuoteEmpty ) +{ + auto retVal = patch.toString(); + if ( doubleQuoteEmpty && retVal.empty() ) + return LR"("")"; + return retVal; +} + +template<> +std::wstring to_wstring( const wchar_t &value, bool doubleQuoteEmpty ) +{ + if ( doubleQuoteEmpty && ( value == 0 ) ) + return LR"("")"; + + return std::wstring( 1, value ); +} + +template<> +std::wstring to_wstring( const int &value, bool doubleQuoteEmpty ) +{ + return to_wstring( static_cast< wchar_t >( value ), doubleQuoteEmpty ); +} + +template<> +std::wstring to_wstring( const std::wstring &value, bool doubleQuoteEmpty ) +{ + if ( doubleQuoteEmpty && value.empty() ) + return LR"("")"; + + return value; +} + +template< typename T > +std::wstring to_wstring( const std::vector< T > &values, bool doubleQuoteEmpty = false ) +{ + std::wstring retVal = L"("; + bool first = true; + for ( auto &&curr : values ) + { + if ( !first ) + { + retVal += L", "; + } + retVal += to_wstring( curr, doubleQuoteEmpty ); + first = false; + } + retVal += L")"; + return retVal; +} + +template< typename T > +typename std::enable_if_t< std::is_integral_v< T >, std::wstring > to_wstring( const std::initializer_list< T > &values, bool doubleQuoteEmpty = false ) +{ + if ( doubleQuoteEmpty && ( values.size() == 0 ) ) + return LR"(\"\")"; + + std::wstring retVal; + for ( auto &&curr : values ) + { + retVal += to_wstring( curr, false ); + } + return retVal; +} + +template< typename T > +typename std::enable_if_t< !std::is_integral_v< T >, std::wstring > to_wstring( const std::initializer_list< T > &values, bool doubleQuoteEmpty = false ) +{ + std::wstring retVal = L"("; + bool first = true; + for ( auto &&curr : values ) + { + if ( !first ) + { + retVal += L", "; + } + retVal += to_wstring( curr, doubleQuoteEmpty ); + first = false; + } + retVal += L")"; + return retVal; +} + +class diff_match_patch_test +{ +public: + diff_match_patch_test(); + void run_all_tests(); + void runTest( std::function< void() > test ); + + // DIFF TEST FUNCTIONS + void testDiffCommonPrefix(); + void testDiffCommonSuffix(); + void testDiffCommonOverlap(); + void testDiffHalfmatch(); + void testDiffLinesToChars(); + void testDiffCharsToLines(); + void testDiffCleanupMerge(); + void testDiffCleanupSemanticLossless(); + void testDiffCleanupSemantic(); + void testDiffCleanupEfficiency(); + void testDiffPrettyHtml(); + void testDiffText(); + void testDiffDelta(); + void testDiffXIndex(); + void testDiffLevenshtein(); + void testDiffBisect(); + void testDiffMain(); + + // MATCH TEST FUNCTIONS + void testMatchAlphabet(); + void testMatchBitap(); + void testMatchMain(); + + // PATCH TEST FUNCTIONS + void testPatchObj(); + void testPatchFromText(); + void testPatchToText(); + void testPatchAddContext(); + void testPatchMake(); + void testPatchSplitMax(); + void testPatchAddPadding(); + void testPatchApply(); + +private: + std::size_t numPassedTests{ 0 }; + std::size_t numFailedTests{ 0 }; + diff_match_patch dmp; + + // Define equality. + template< typename T > + void assertEquals( const std::string &strCase, const T &lhs, const T &rhs ) + { + bool failed = ( lhs.size() != rhs.size() ); + if ( !failed ) + { + for ( auto ii = 0ULL; !failed && ( ii < lhs.size() ); ++ii ) + { + auto &&t1 = lhs[ ii ]; + auto &&t2 = rhs[ ii ]; + failed = t1 != t2; + } + } + else + { + // Build human readable description of both lists. + auto lhsString = to_wstring( lhs, true ); + auto rhsString = to_wstring( rhs, true ); + reportFailure( strCase, lhsString, rhsString ); + return; + } + reportPassed( strCase ); + } + void assertEquals( const std::string &strCase, bool lhs, bool rhs ); + void assertEquals( const std::string &strCase, std::size_t n1, std::size_t n2 ); + void assertEquals( const std::string &strCase, const std::wstring &s1, const std::wstring &s2 ); + void assertEquals( const std::string &strCase, const std::string &s1, const std::string &s2 ); + void assertEquals( const std::string &strCase, const Diff &d1, const Diff &d2 ); + void assertEquals( const std::string &strCase, const TVariant &var1, const TVariant &var2 ); + void assertEquals( const std::string &strCase, const TCharPosMap &m1, const TCharPosMap &m2 ); + + void assertTrue( const std::string &strCase, bool value ); + void assertFalse( const std::string &strCase, bool value ); + void assertEmpty( const std::string &strCase, const TStringVector &list ); + + void reportFailure( const std::string &strCase, const std::wstring &expected, const std::wstring &actual ); + void reportPassed( const std::string &strCase ); + + // Construct the two texts which made up the diff originally. + TStringVector diff_rebuildtexts( const std::vector< Diff > &diffs ); +}; + +#endif // DIFF_MATCH_PATCH_TEST_H diff --git a/cpp17/include.cmake b/cpp17/include.cmake new file mode 100644 index 00000000..654bee29 --- /dev/null +++ b/cpp17/include.cmake @@ -0,0 +1,23 @@ +set(_PROJECT_NAME diff_match_patch_cpp17) +set(FOLDER_NAME Libs) + +set(project_SRCS + diff_match_patch.cpp +) + +set(qtproject_H +) + +set(project_H + diff_match_patch.h +) + +set(qtproject_UIS +) + + +set(qtproject_QRC +) + +set( project_pub_DEPS +) From e0f9300a67b24fa3d9d455e2d5577fc9db03a135 Mon Sep 17 00:00:00 2001 From: Scott Aron Bloom Date: Mon, 29 Jan 2024 15:43:10 -0800 Subject: [PATCH 02/15] Take std::string as input parameters --- cpp17/diff_match_patch.cpp | 16 +- cpp17/diff_match_patch.h | 50 ++- cpp17/diff_match_patch_test.cpp | 530 ++++++++++++++++---------------- cpp17/diff_match_patch_test.h | 283 +++++++++-------- 4 files changed, 472 insertions(+), 407 deletions(-) diff --git a/cpp17/diff_match_patch.cpp b/cpp17/diff_match_patch.cpp index e99be5d5..0b799449 100644 --- a/cpp17/diff_match_patch.cpp +++ b/cpp17/diff_match_patch.cpp @@ -579,7 +579,7 @@ TDiffVector diff_match_patch::diff_bisectSplit( const std::wstring &text1, const return diffs; } -TVariantVector diff_match_patch::diff_linesToChars( const std::wstring &text1, const std::wstring &text2 ) +diff_match_patch::TVariantVector diff_match_patch::diff_linesToChars( const std::wstring &text1, const std::wstring &text2 ) { TStringVector lineArray; std::unordered_map< std::wstring, std::size_t > lineHash; @@ -734,7 +734,7 @@ std::size_t diff_match_patch::diff_commonOverlap( const std::wstring &text1, con } } -TStringVector diff_match_patch::diff_halfMatch( const std::wstring &text1, const std::wstring &text2 ) +diff_match_patch::TStringVector diff_match_patch::diff_halfMatch( const std::wstring &text1, const std::wstring &text2 ) { if ( Diff_Timeout <= 0 ) { @@ -783,7 +783,7 @@ TStringVector diff_match_patch::diff_halfMatch( const std::wstring &text1, const } } -TStringVector diff_match_patch::diff_halfMatchI( const std::wstring &longtext, const std::wstring &shorttext, std::size_t i ) +diff_match_patch::TStringVector diff_match_patch::diff_halfMatchI( const std::wstring &longtext, const std::wstring &shorttext, std::size_t i ) { // Start with a 1/4 length substring at position i as a seed. const std::wstring seed = safeMid( longtext, i, longtext.length() / 4 ); @@ -1643,7 +1643,7 @@ double diff_match_patch::match_bitapScore( int64_t e, int64_t x, int64_t loc, co return accuracy + ( proximity / static_cast< float >( Match_Distance ) ); } -TCharPosMap diff_match_patch::match_alphabet( const std::wstring &pattern ) +diff_match_patch::TCharPosMap diff_match_patch::match_alphabet( const std::wstring &pattern ) { TCharPosMap s; std::size_t i; @@ -2415,6 +2415,12 @@ wchar_t toHexUpper( wchar_t value ) return L"0123456789ABCDEF"[ value & 0xF ]; } +std::wstring to_wstring( const std::string &string ) +{ + std::wstring_convert< std::codecvt_utf8< wchar_t > > utf8_conv; + return utf8_conv.from_bytes( string ); +} + std::wstring diff_match_patch::toPercentEncoding( wchar_t c, const std::wstring &exclude, const std::wstring &include ) { std::wstring retVal; @@ -2511,7 +2517,7 @@ bool diff_match_patch::endsWith( const std::wstring &string, const std::wstring return string.compare( string.length() - suffix.length(), suffix.length(), suffix ) == 0; } -TStringVector diff_match_patch::splitString( const std::wstring &string, const std::wstring &separator, bool skipEmptyParts ) +diff_match_patch::TStringVector diff_match_patch::splitString( const std::wstring &string, const std::wstring &separator, bool skipEmptyParts ) { if ( separator.empty() ) { diff --git a/cpp17/diff_match_patch.h b/cpp17/diff_match_patch.h index 56218606..8dc04568 100644 --- a/cpp17/diff_match_patch.h +++ b/cpp17/diff_match_patch.h @@ -47,8 +47,8 @@ #include "diff_match_patch.h" int main(int argc, char **argv) { - auto str1 = std::wstring("First string in diff"); - auto str2 = std::wstring("Second string in diff"); + auto str1 = std::string("First string in diff"); + auto str2 = std::string("Second string in diff"); diff_match_patch dmp; auto strPatch = dmp.patch_toText(dmp.patch_make(str1, str2)); @@ -61,11 +61,6 @@ */ -using TStringVector = std::vector< std::wstring >; -using TVariant = std::variant< std::wstring, TStringVector >; -using TVariantVector = std::vector< TVariant >; -using TCharPosMap = std::map< wchar_t, std::size_t >; - /**- * The data structure representing a diff is a Linked list of Diff objects: * {Diff(Operation.DELETE, "Hello"), Diff(Operation.INSERT, "Goodbye"), @@ -79,6 +74,8 @@ enum Operation EQUAL }; +std::wstring to_wstring( const std::string &string ); + /** * Class representing one diff operation. */ @@ -97,6 +94,14 @@ class Diff */ Diff( Operation _operation, const std::wstring &_text ); Diff( Operation _operation, const wchar_t *_text ); + Diff( Operation _operation, const std::string &_text ) : + Diff( _operation, to_wstring( _text ) ) + { + } + Diff( Operation _operation, const char *_text ) : + Diff( _operation, std::string( _text ) ) + { + } Diff(); inline bool isNull() const; std::wstring toString() const; @@ -164,7 +169,7 @@ class diff_match_patch // Chunk size for context length. short Patch_Margin{ 4 }; - short Match_MaxBits{ 32 }; // unit tests are based on 32 bits + short Match_MaxBits{ 32 }; // unit tests are based on 32 bits private: // Define some regex patterns for matching boundaries. @@ -172,6 +177,11 @@ class diff_match_patch static std::wregex BLANKLINESTART; public: + using TStringVector = std::vector< std::wstring >; + using TVariant = std::variant< std::wstring, TStringVector >; + using TVariantVector = std::vector< TVariant >; + using TCharPosMap = std::map< wchar_t, std::size_t >; + diff_match_patch(); // DIFF FUNCTIONS @@ -186,6 +196,7 @@ class diff_match_patch * @return Linked List of Diff objects. */ TDiffVector diff_main( const std::wstring &text1, const std::wstring &text2 ); + TDiffVector diff_main( const std::string &text1, const std::string &text2 ) { return diff_main( ::to_wstring( text1 ), ::to_wstring( text2 ) ); } /** * Find the differences between two texts. @@ -197,6 +208,7 @@ class diff_match_patch * @return Linked List of Diff objects. */ TDiffVector diff_main( const std::wstring &text1, const std::wstring &text2, bool checklines ); + TDiffVector diff_main( const std::string &text1, const std::string &text2, bool checklines ) { return diff_main( ::to_wstring( text1 ), ::to_wstring( text2 ), checklines ); } /** * Find the differences between two texts. Simplifies the problem by @@ -212,6 +224,7 @@ class diff_match_patch */ private: TDiffVector diff_main( const std::wstring &text1, const std::wstring &text2, bool checklines, clock_t deadline ); + TDiffVector diff_main( const std::string &text1, const std::string &text2, bool checklines, clock_t deadline ) { return diff_main( ::to_wstring( text1 ), ::to_wstring( text2 ), checklines, deadline ); } /** * Find the differences between two texts. Assumes that the texts do not @@ -226,6 +239,7 @@ class diff_match_patch */ private: TDiffVector diff_compute( const std::wstring &text1, const std::wstring &text2, bool checklines, clock_t deadline ); + TDiffVector diff_compute( const std::string &text1, const std::string &text2, bool checklines, clock_t deadline ) { return diff_compute( ::to_wstring( text1 ), ::to_wstring( text2 ), checklines, deadline ); } /** * Do a quick line-level diff on both strings, then rediff the parts for @@ -238,6 +252,7 @@ class diff_match_patch */ private: TDiffVector diff_lineMode( std::wstring text1, std::wstring text2, clock_t deadline ); + TDiffVector diff_lineMode( std::string text1, std::string text2, clock_t deadline ) { return diff_lineMode( ::to_wstring( text1 ), ::to_wstring( text2 ), deadline ); } /** * Find the 'middle snake' of a diff, split the problem in two @@ -249,6 +264,7 @@ class diff_match_patch */ protected: TDiffVector diff_bisect( const std::wstring &text1, const std::wstring &text2, clock_t deadline ); + TDiffVector diff_bisect( const std::string &text1, const std::string &text2, clock_t deadline ) { return diff_bisect( ::to_wstring( text1 ), ::to_wstring( text2 ), deadline ); } /** * Given the location of the 'middle snake', split the diff in two parts @@ -262,6 +278,7 @@ class diff_match_patch */ private: TDiffVector diff_bisectSplit( const std::wstring &text1, const std::wstring &text2, std::size_t x, std::size_t y, clock_t deadline ); + TDiffVector diff_bisectSplit( const std::string &text1, const std::string &text2, std::size_t x, std::size_t y, clock_t deadline ) { return diff_bisectSplit( ::to_wstring( text1 ), ::to_wstring( text2 ), x, y, deadline ); } /** * Split two texts into a list of strings. Reduce the texts to a string of @@ -274,6 +291,7 @@ class diff_match_patch */ protected: std::vector< TVariant > diff_linesToChars( const std::wstring &text1, const std::wstring &text2 ); // return elems 0 and 1 are std::wstring, elem 2 is TStringVector + std::vector< TVariant > diff_linesToChars( const std::string &text1, const std::string &text2 ) { return diff_linesToChars( ::to_wstring( text1 ), ::to_wstring( text2 ) ); } /** * Split a text into a list of strings. Reduce the texts to a string of @@ -303,6 +321,7 @@ class diff_match_patch */ public: std::size_t diff_commonPrefix( const std::wstring &text1, const std::wstring &text2 ); + std::size_t diff_commonPrefix( const std::string &text1, const std::string &text2 ) { return diff_commonPrefix( ::to_wstring( text1 ), ::to_wstring( text2 ) ); } /** * Determine the common suffix of two strings. @@ -312,6 +331,7 @@ class diff_match_patch */ public: std::size_t diff_commonSuffix( const std::wstring &text1, const std::wstring &text2 ); + std::size_t diff_commonSuffix( const std::string &text1, const std::string &text2 ) { return diff_commonSuffix( ::to_wstring( text1 ), ::to_wstring( text2 ) ); } /** * Determine if the suffix of one string is the prefix of another. @@ -322,6 +342,7 @@ class diff_match_patch */ protected: std::size_t diff_commonOverlap( const std::wstring &text1, const std::wstring &text2 ); + std::size_t diff_commonOverlap( const std::string &text1, const std::string &text2 ) { return diff_commonOverlap( ::to_wstring( text1 ), ::to_wstring( text2 ) ); } /** * Do the two texts share a substring which is at least half the length of @@ -335,6 +356,7 @@ class diff_match_patch */ protected: TStringVector diff_halfMatch( const std::wstring &text1, const std::wstring &text2 ); + TStringVector diff_halfMatch( const std::string &text1, const std::string &text2 ) { return diff_halfMatch( ::to_wstring( text1 ), ::to_wstring( text2 ) ); } /** * Does a substring of shorttext exist within longtext such that the @@ -348,6 +370,7 @@ class diff_match_patch */ private: TStringVector diff_halfMatchI( const std::wstring &longtext, const std::wstring &shorttext, std::size_t i ); + TStringVector diff_halfMatchI( const std::string &longtext, const std::string &shorttext, std::size_t i ) { return diff_halfMatchI( ::to_wstring( longtext ), ::to_wstring( shorttext ), i ); } /** * Reduce the number of edits by eliminating semantically trivial equalities. @@ -375,6 +398,7 @@ class diff_match_patch */ private: int64_t diff_cleanupSemanticScore( const std::wstring &one, const std::wstring &two ); + int64_t diff_cleanupSemanticScore( const std::string &one, const std::string &two ) { return diff_cleanupSemanticScore( ::to_wstring( one ), ::to_wstring( two ) ); } /** * Reduce the number of edits by eliminating operationally trivial equalities. @@ -456,6 +480,7 @@ class diff_match_patch */ public: TDiffVector diff_fromDelta( const std::wstring &text1, const std::wstring &delta ); + TDiffVector diff_fromDelta( const std::string &text1, const std::string &delta ) { return diff_fromDelta( ::to_wstring( text1 ), ::to_wstring( delta ) ); } // MATCH FUNCTIONS @@ -469,6 +494,7 @@ class diff_match_patch */ public: std::size_t match_main( const std::wstring &text, const std::wstring &pattern, std::size_t loc ); + std::size_t match_main( const std::string &text, const std::string &pattern, std::size_t loc ) { return match_main( ::to_wstring( text ), ::to_wstring( pattern ), loc ); } /** * Locate the best instance of 'pattern' in 'text' near 'loc' using the @@ -480,6 +506,7 @@ class diff_match_patch */ protected: std::size_t match_bitap( const std::wstring &text, const std::wstring &pattern, std::size_t loc ); + std::size_t match_bitap( const std::string &text, const std::string &pattern, std::size_t loc ) { return match_bitap( ::to_wstring( text ), ::to_wstring( pattern ), loc ); } /** * Compute and return the score for a match with e errors and x location. @@ -499,6 +526,7 @@ class diff_match_patch */ protected: TCharPosMap match_alphabet( const std::wstring &pattern ); + TCharPosMap match_alphabet( const std::string &pattern ) { return match_alphabet( ::to_wstring( pattern ) ); } // PATCH FUNCTIONS @@ -510,6 +538,7 @@ class diff_match_patch */ protected: void patch_addContext( Patch &patch, const std::wstring &text ); + void patch_addContext( Patch &patch, const std::string &text ) { return patch_addContext( patch, ::to_wstring( text ) ); } /** * Compute a list of patches to turn text1 into text2. @@ -520,6 +549,7 @@ class diff_match_patch */ public: TPatchVector patch_make( const std::wstring &text1, const std::wstring &text2 ); + TPatchVector patch_make( const std::string &text1, const std::string &text2 ) { return patch_make( ::to_wstring( text1 ), ::to_wstring( text2 ) ); } /** * Compute a list of patches to turn text1 into text2. @@ -541,6 +571,7 @@ class diff_match_patch */ public: TPatchVector patch_make( const std::wstring &text1, const std::wstring &text2, const TDiffVector &diffs ); + TPatchVector patch_make( const std::string &text1, const std::string &text2, const TDiffVector &diffs ) { return patch_make( ::to_wstring( text1 ), ::to_wstring( text2 ), diffs ); } /** * Compute a list of patches to turn text1 into text2. @@ -551,6 +582,7 @@ class diff_match_patch */ public: TPatchVector patch_make( const std::wstring &text1, const TDiffVector &diffs ); + TPatchVector patch_make( const std::string &text1, const TDiffVector &diffs ) { return patch_make( ::to_wstring( text1 ), diffs ); } /** * Given an array of patches, return another array that is identical. @@ -570,6 +602,7 @@ class diff_match_patch */ public: std::pair< std::wstring, std::vector< bool > > patch_apply( TPatchVector patches, std::wstring text ); + std::pair< std::wstring, std::vector< bool > > patch_apply( TPatchVector patches, std::string text ) { return patch_apply( patches, ::to_wstring( text ) ); } /** * Add some padding on text start and end so that edges can match something. @@ -606,6 +639,7 @@ class diff_match_patch */ public: TPatchVector patch_fromText( const std::wstring &textline ); + TPatchVector patch_fromText( const std::string &textline ) { return patch_fromText( ::to_wstring( textline ) ); } /** * A safer version of std::wstring.mid(pos). This one returns "" instead of diff --git a/cpp17/diff_match_patch_test.cpp b/cpp17/diff_match_patch_test.cpp index 6180c9c5..0654a005 100644 --- a/cpp17/diff_match_patch_test.cpp +++ b/cpp17/diff_match_patch_test.cpp @@ -106,33 +106,33 @@ void diff_match_patch_test::run_all_tests() void diff_match_patch_test::testDiffCommonPrefix() { // Detect any common prefix. - assertEquals( "diff_commonPrefix: nullptr case.", 0, dmp.diff_commonPrefix( L"abc", L"xyz" ) ); + assertEquals( "diff_commonPrefix: nullptr case.", 0, dmp.diff_commonPrefix( "abc", "xyz" ) ); - assertEquals( "diff_commonPrefix: Non-nullptr case.", 4, dmp.diff_commonPrefix( L"1234abcdef", L"1234xyz" ) ); + assertEquals( "diff_commonPrefix: Non-nullptr case.", 4, dmp.diff_commonPrefix( "1234abcdef", "1234xyz" ) ); - assertEquals( "diff_commonPrefix: Whole case.", 4, dmp.diff_commonPrefix( L"1234", L"1234xyz" ) ); + assertEquals( "diff_commonPrefix: Whole case.", 4, dmp.diff_commonPrefix( "1234", "1234xyz" ) ); } void diff_match_patch_test::testDiffCommonSuffix() { // Detect any common suffix. - assertEquals( "diff_commonSuffix: nullptr case.", 0, dmp.diff_commonSuffix( L"abc", L"xyz" ) ); + assertEquals( "diff_commonSuffix: nullptr case.", 0, dmp.diff_commonSuffix( "abc", "xyz" ) ); - assertEquals( "diff_commonSuffix: Non-nullptr case.", 4, dmp.diff_commonSuffix( L"abcdef1234", L"xyz1234" ) ); + assertEquals( "diff_commonSuffix: Non-nullptr case.", 4, dmp.diff_commonSuffix( "abcdef1234", "xyz1234" ) ); - assertEquals( "diff_commonSuffix: Whole case.", 4, dmp.diff_commonSuffix( L"1234", L"xyz1234" ) ); + assertEquals( "diff_commonSuffix: Whole case.", 4, dmp.diff_commonSuffix( "1234", "xyz1234" ) ); } void diff_match_patch_test::testDiffCommonOverlap() { // Detect any suffix/prefix overlap. - assertEquals( "diff_commonOverlap: nullptr case.", 0, dmp.diff_commonOverlap( L"", L"abcd" ) ); + assertEquals( "diff_commonOverlap: nullptr case.", 0, dmp.diff_commonOverlap( "", "abcd" ) ); - assertEquals( "diff_commonOverlap: Whole case.", 3, dmp.diff_commonOverlap( L"abc", L"abcd" ) ); + assertEquals( "diff_commonOverlap: Whole case.", 3, dmp.diff_commonOverlap( "abc", "abcd" ) ); - assertEquals( "diff_commonOverlap: No overlap.", 0, dmp.diff_commonOverlap( L"123456", L"abcd" ) ); + assertEquals( "diff_commonOverlap: No overlap.", 0, dmp.diff_commonOverlap( "123456", "abcd" ) ); - assertEquals( "diff_commonOverlap: Overlap.", 3, dmp.diff_commonOverlap( L"123456xxx", L"xxxabcd" ) ); + assertEquals( "diff_commonOverlap: Overlap.", 3, dmp.diff_commonOverlap( "123456xxx", "xxxabcd" ) ); // Some overly clever languages (C#) may treat ligatures as equal to their // component letters. E.g. U+FB01 == 'fi' @@ -143,26 +143,26 @@ void diff_match_patch_test::testDiffHalfmatch() { // Detect a halfmatch. dmp.Diff_Timeout = 1; - assertEmpty( "diff_halfMatch: No match #1.", dmp.diff_halfMatch( L"1234567890", L"abcdef" ) ); + assertEmpty( "diff_halfMatch: No match #1.", dmp.diff_halfMatch( "1234567890", "abcdef" ) ); - assertEmpty( "diff_halfMatch: No match #2.", dmp.diff_halfMatch( L"12345", L"23" ) ); + assertEmpty( "diff_halfMatch: No match #2.", dmp.diff_halfMatch( "12345", "23" ) ); - assertEquals( "diff_halfMatch: Single Match #1.", { L"12", L"90", L"a", L"z", L"345678" }, dmp.diff_halfMatch( L"1234567890", L"a345678z" ) ); + assertEquals( "diff_halfMatch: Single Match #1.", { L"12", L"90", L"a", L"z", L"345678" }, dmp.diff_halfMatch( "1234567890", "a345678z" ) ); - assertEquals( "diff_halfMatch: Single Match #2.", { L"a", L"z", L"12", L"90", L"345678" }, dmp.diff_halfMatch( L"a345678z", L"1234567890" ) ); + assertEquals( "diff_halfMatch: Single Match #2.", { L"a", L"z", L"12", L"90", L"345678" }, dmp.diff_halfMatch( "a345678z", "1234567890" ) ); - assertEquals( "diff_halfMatch: Single Match #3.", { L"abc", L"z", L"1234", L"0", L"56789" }, dmp.diff_halfMatch( L"abc56789z", L"1234567890" ) ); + assertEquals( "diff_halfMatch: Single Match #3.", { L"abc", L"z", L"1234", L"0", L"56789" }, dmp.diff_halfMatch( "abc56789z", "1234567890" ) ); - assertEquals( "diff_halfMatch: Single Match #4.", { L"a", L"xyz", L"1", L"7890", L"23456" }, dmp.diff_halfMatch( L"a23456xyz", L"1234567890" ) ); + assertEquals( "diff_halfMatch: Single Match #4.", { L"a", L"xyz", L"1", L"7890", L"23456" }, dmp.diff_halfMatch( "a23456xyz", "1234567890" ) ); - assertEquals( "diff_halfMatch: Multiple Matches #1.", { L"12123", L"123121", L"a", L"z", L"1234123451234" }, dmp.diff_halfMatch( L"121231234123451234123121", L"a1234123451234z" ) ); + assertEquals( "diff_halfMatch: Multiple Matches #1.", { L"12123", L"123121", L"a", L"z", L"1234123451234" }, dmp.diff_halfMatch( "121231234123451234123121", "a1234123451234z" ) ); - assertEquals( "diff_halfMatch: Multiple Matches #2.", { L"", L"-=-=-=-=-=", L"x", L"", L"x-=-=-=-=-=-=-=" }, dmp.diff_halfMatch( L"x-=-=-=-=-=-=-=-=-=-=-=-=", L"xx-=-=-=-=-=-=-=" ) ); + assertEquals( "diff_halfMatch: Multiple Matches #2.", { L"", L"-=-=-=-=-=", L"x", L"", L"x-=-=-=-=-=-=-=" }, dmp.diff_halfMatch( "x-=-=-=-=-=-=-=-=-=-=-=-=", "xx-=-=-=-=-=-=-=" ) ); - assertEquals( "diff_halfMatch: Multiple Matches #3.", { L"-=-=-=-=-=", L"", L"", L"y", L"-=-=-=-=-=-=-=y" }, dmp.diff_halfMatch( L"-=-=-=-=-=-=-=-=-=-=-=-=y", L"-=-=-=-=-=-=-=yy" ) ); + assertEquals( "diff_halfMatch: Multiple Matches #3.", { L"-=-=-=-=-=", L"", L"", L"y", L"-=-=-=-=-=-=-=y" }, dmp.diff_halfMatch( "-=-=-=-=-=-=-=-=-=-=-=-=y", "-=-=-=-=-=-=-=yy" ) ); // Optimal diff would be -q+x=H-i+e=lloHe+Hu=llo-Hew+y not -qHillo+x=HelloHe-w+Hulloy - assertEquals( "diff_halfMatch: Non-optimal halfmatch.", { L"qHillo", L"w", L"x", L"Hulloy", L"HelloHe" }, dmp.diff_halfMatch( L"qHilloHelloHew", L"xHelloHeHulloy" ) ); + assertEquals( "diff_halfMatch: Non-optimal halfmatch.", { L"qHillo", L"w", L"x", L"Hulloy", L"HelloHe" }, dmp.diff_halfMatch( "qHilloHelloHew", "xHelloHeHulloy" ) ); dmp.Diff_Timeout = 0; assertEmpty( "diff_halfMatch: Optimal no halfmatch.", dmp.diff_halfMatch( L"qHilloHelloHew", L"xHelloHeHulloy" ) ); @@ -176,7 +176,7 @@ void diff_match_patch_test::testDiffLinesToChars() tmpVarList.emplace_back( to_wstring( { 1, 2, 1 } ) ); //(("\u0001\u0002\u0001")); tmpVarList.emplace_back( to_wstring( { 2, 1, 2 } ) ); // (("\u0002\u0001\u0002")); tmpVarList.emplace_back( tmpVector ); - assertEquals( "diff_linesToChars:", tmpVarList, dmp.diff_linesToChars( L"alpha\nbeta\nalpha\n", L"beta\nalpha\nbeta\n" ) ); + assertEquals( "diff_linesToChars:", tmpVarList, dmp.diff_linesToChars( "alpha\nbeta\nalpha\n", "beta\nalpha\nbeta\n" ) ); tmpVector.clear(); tmpVarList.clear(); @@ -187,7 +187,7 @@ void diff_match_patch_test::testDiffLinesToChars() tmpVarList.emplace_back( std::wstring() ); tmpVarList.emplace_back( to_wstring( { 1, 2, 3, 3 } ) ); // (("\u0001\u0002\u0003\u0003")); tmpVarList.emplace_back( tmpVector ); - assertEquals( "diff_linesToChars:", tmpVarList, dmp.diff_linesToChars( L"", L"alpha\r\nbeta\r\n\r\n\r\n" ) ); + assertEquals( "diff_linesToChars:", tmpVarList, dmp.diff_linesToChars( "", "alpha\r\nbeta\r\n\r\n\r\n" ) ); tmpVector.clear(); tmpVarList.clear(); @@ -197,7 +197,7 @@ void diff_match_patch_test::testDiffLinesToChars() tmpVarList.emplace_back( to_wstring( 1 ) ); // (("\u0001")); tmpVarList.emplace_back( to_wstring( 2 ) ); // (("\u0002")); tmpVarList.emplace_back( tmpVector ); - assertEquals( "diff_linesToChars:", tmpVarList, dmp.diff_linesToChars( L"a", L"b" ) ); + assertEquals( "diff_linesToChars:", tmpVarList, dmp.diff_linesToChars( "a", "b" ) ); // More than 256 to reveal any 8-bit limitations. int n = 300; @@ -217,15 +217,15 @@ void diff_match_patch_test::testDiffLinesToChars() tmpVarList.emplace_back( chars ); tmpVarList.emplace_back( std::wstring() ); tmpVarList.emplace_back( tmpVector ); - assertEquals( "diff_linesToChars: More than 256.", tmpVarList, dmp.diff_linesToChars( lines, L"" ) ); + assertEquals( "diff_linesToChars: More than 256.", tmpVarList, dmp.diff_linesToChars( lines, {} ) ); } void diff_match_patch_test::testDiffCharsToLines() { // First check that Diff equality works. - assertTrue( "diff_charsToLines:", Diff( EQUAL, L"a" ) == Diff( EQUAL, L"a" ) ); + assertTrue( "diff_charsToLines:", Diff( EQUAL, "a" ) == Diff( EQUAL, "a" ) ); - assertEquals( "diff_charsToLines:", Diff( EQUAL, L"a" ), Diff( EQUAL, L"a" ) ); + assertEquals( "diff_charsToLines:", Diff( EQUAL, "a" ), Diff( EQUAL, "a" ) ); // Convert chars up to lines. TDiffVector diffs; @@ -236,7 +236,7 @@ void diff_match_patch_test::testDiffCharsToLines() tmpVector.emplace_back( L"alpha\n" ); tmpVector.emplace_back( L"beta\n" ); dmp.diff_charsToLines( diffs, tmpVector ); - assertEquals( "diff_charsToLines:", { Diff( EQUAL, L"alpha\nbeta\nalpha\n" ), Diff( INSERT, L"beta\nalpha\nbeta\n" ) }, diffs ); + assertEquals( "diff_charsToLines:", { Diff( EQUAL, "alpha\nbeta\nalpha\n" ), Diff( INSERT, "beta\nalpha\nbeta\n" ) }, diffs ); // More than 256 to reveal any 8-bit limitations. int n = 300; @@ -265,49 +265,49 @@ void diff_match_patch_test::testDiffCleanupMerge() dmp.diff_cleanupMerge( diffs ); assertEquals( "diff_cleanupMerge: nullptr case.", {}, diffs ); - diffs = { Diff( EQUAL, L"a" ), Diff( DELETE, L"b" ), Diff( INSERT, L"c" ) }; + diffs = { Diff( EQUAL, "a" ), Diff( DELETE, "b" ), Diff( INSERT, "c" ) }; dmp.diff_cleanupMerge( diffs ); - assertEquals( "diff_cleanupMerge: No change case.", { Diff( EQUAL, L"a" ), Diff( DELETE, L"b" ), Diff( INSERT, L"c" ) }, diffs ); + assertEquals( "diff_cleanupMerge: No change case.", { Diff( EQUAL, "a" ), Diff( DELETE, "b" ), Diff( INSERT, "c" ) }, diffs ); - diffs = { Diff( EQUAL, L"a" ), Diff( EQUAL, L"b" ), Diff( EQUAL, L"c" ) }; + diffs = { Diff( EQUAL, "a" ), Diff( EQUAL, "b" ), Diff( EQUAL, "c" ) }; dmp.diff_cleanupMerge( diffs ); - assertEquals( "diff_cleanupMerge: Merge equalities.", { Diff( EQUAL, L"abc" ) }, diffs ); + assertEquals( "diff_cleanupMerge: Merge equalities.", { Diff( EQUAL, "abc" ) }, diffs ); - diffs = { Diff( DELETE, L"a" ), Diff( DELETE, L"b" ), Diff( DELETE, L"c" ) }; + diffs = { Diff( DELETE, "a" ), Diff( DELETE, "b" ), Diff( DELETE, "c" ) }; dmp.diff_cleanupMerge( diffs ); - assertEquals( "diff_cleanupMerge: Merge deletions.", { Diff( DELETE, L"abc" ) }, diffs ); + assertEquals( "diff_cleanupMerge: Merge deletions.", { Diff( DELETE, "abc" ) }, diffs ); - diffs = { Diff( INSERT, L"a" ), Diff( INSERT, L"b" ), Diff( INSERT, L"c" ) }; + diffs = { Diff( INSERT, "a" ), Diff( INSERT, "b" ), Diff( INSERT, "c" ) }; dmp.diff_cleanupMerge( diffs ); - assertEquals( "diff_cleanupMerge: Merge insertions.", { Diff( INSERT, L"abc" ) }, diffs ); + assertEquals( "diff_cleanupMerge: Merge insertions.", { Diff( INSERT, "abc" ) }, diffs ); - diffs = { Diff( DELETE, L"a" ), Diff( INSERT, L"b" ), Diff( DELETE, L"c" ), Diff( INSERT, L"d" ), Diff( EQUAL, L"e" ), Diff( EQUAL, L"f" ) }; + diffs = { Diff( DELETE, "a" ), Diff( INSERT, "b" ), Diff( DELETE, "c" ), Diff( INSERT, "d" ), Diff( EQUAL, "e" ), Diff( EQUAL, "f" ) }; dmp.diff_cleanupMerge( diffs ); - assertEquals( "diff_cleanupMerge: Merge interweave.", { Diff( DELETE, L"ac" ), Diff( INSERT, L"bd" ), Diff( EQUAL, L"ef" ) }, diffs ); + assertEquals( "diff_cleanupMerge: Merge interweave.", { Diff( DELETE, "ac" ), Diff( INSERT, "bd" ), Diff( EQUAL, "ef" ) }, diffs ); - diffs = { Diff( DELETE, L"a" ), Diff( INSERT, L"abc" ), Diff( DELETE, L"dc" ) }; + diffs = { Diff( DELETE, "a" ), Diff( INSERT, "abc" ), Diff( DELETE, "dc" ) }; dmp.diff_cleanupMerge( diffs ); - assertEquals( "diff_cleanupMerge: Prefix and suffix detection.", { Diff( EQUAL, L"a" ), Diff( DELETE, L"d" ), Diff( INSERT, L"b" ), Diff( EQUAL, L"c" ) }, diffs ); + assertEquals( "diff_cleanupMerge: Prefix and suffix detection.", { Diff( EQUAL, "a" ), Diff( DELETE, "d" ), Diff( INSERT, "b" ), Diff( EQUAL, "c" ) }, diffs ); - diffs = { Diff( EQUAL, L"x" ), Diff( DELETE, L"a" ), Diff( INSERT, L"abc" ), Diff( DELETE, L"dc" ), Diff( EQUAL, L"y" ) }; + diffs = { Diff( EQUAL, "x" ), Diff( DELETE, "a" ), Diff( INSERT, "abc" ), Diff( DELETE, "dc" ), Diff( EQUAL, "y" ) }; dmp.diff_cleanupMerge( diffs ); - assertEquals( "diff_cleanupMerge: Prefix and suffix detection with equalities.", { Diff( EQUAL, L"xa" ), Diff( DELETE, L"d" ), Diff( INSERT, L"b" ), Diff( EQUAL, L"cy" ) }, diffs ); + assertEquals( "diff_cleanupMerge: Prefix and suffix detection with equalities.", { Diff( EQUAL, "xa" ), Diff( DELETE, "d" ), Diff( INSERT, "b" ), Diff( EQUAL, "cy" ) }, diffs ); - diffs = { Diff( EQUAL, L"a" ), Diff( INSERT, L"ba" ), Diff( EQUAL, L"c" ) }; + diffs = { Diff( EQUAL, "a" ), Diff( INSERT, "ba" ), Diff( EQUAL, "c" ) }; dmp.diff_cleanupMerge( diffs ); - assertEquals( "diff_cleanupMerge: Slide edit left.", { Diff( INSERT, L"ab" ), Diff( EQUAL, L"ac" ) }, diffs ); + assertEquals( "diff_cleanupMerge: Slide edit left.", { Diff( INSERT, "ab" ), Diff( EQUAL, "ac" ) }, diffs ); - diffs = { Diff( EQUAL, L"c" ), Diff( INSERT, L"ab" ), Diff( EQUAL, L"a" ) }; + diffs = { Diff( EQUAL, "c" ), Diff( INSERT, "ab" ), Diff( EQUAL, "a" ) }; dmp.diff_cleanupMerge( diffs ); - assertEquals( "diff_cleanupMerge: Slide edit right.", { Diff( EQUAL, L"ca" ), Diff( INSERT, L"ba" ) }, diffs ); + assertEquals( "diff_cleanupMerge: Slide edit right.", { Diff( EQUAL, "ca" ), Diff( INSERT, "ba" ) }, diffs ); - diffs = { Diff( EQUAL, L"a" ), Diff( DELETE, L"b" ), Diff( EQUAL, L"c" ), Diff( DELETE, L"ac" ), Diff( EQUAL, L"x" ) }; + diffs = { Diff( EQUAL, "a" ), Diff( DELETE, "b" ), Diff( EQUAL, "c" ), Diff( DELETE, "ac" ), Diff( EQUAL, "x" ) }; dmp.diff_cleanupMerge( diffs ); - assertEquals( "diff_cleanupMerge: Slide edit left recursive.", { Diff( DELETE, L"abc" ), Diff( EQUAL, L"acx" ) }, diffs ); + assertEquals( "diff_cleanupMerge: Slide edit left recursive.", { Diff( DELETE, "abc" ), Diff( EQUAL, "acx" ) }, diffs ); - diffs = { Diff( EQUAL, L"x" ), Diff( DELETE, L"ca" ), Diff( EQUAL, L"c" ), Diff( DELETE, L"b" ), Diff( EQUAL, L"a" ) }; + diffs = { Diff( EQUAL, "x" ), Diff( DELETE, "ca" ), Diff( EQUAL, "c" ), Diff( DELETE, "b" ), Diff( EQUAL, "a" ) }; dmp.diff_cleanupMerge( diffs ); - assertEquals( "diff_cleanupMerge: Slide edit right recursive.", { Diff( EQUAL, L"xca" ), Diff( DELETE, L"cba" ) }, diffs ); + assertEquals( "diff_cleanupMerge: Slide edit right recursive.", { Diff( EQUAL, "xca" ), Diff( DELETE, "cba" ) }, diffs ); } void diff_match_patch_test::testDiffCleanupSemanticLossless() @@ -317,33 +317,33 @@ void diff_match_patch_test::testDiffCleanupSemanticLossless() dmp.diff_cleanupSemanticLossless( diffs ); assertEquals( "diff_cleanupSemantic: nullptr case.", {}, diffs ); - diffs = { Diff( EQUAL, L"AAA\r\n\r\nBBB" ), Diff( INSERT, L"\r\nDDD\r\n\r\nBBB" ), Diff( EQUAL, L"\r\nEEE" ) }; + diffs = { Diff( EQUAL, "AAA\r\n\r\nBBB" ), Diff( INSERT, "\r\nDDD\r\n\r\nBBB" ), Diff( EQUAL, "\r\nEEE" ) }; dmp.diff_cleanupSemanticLossless( diffs ); - assertEquals( "diff_cleanupSemanticLossless: Blank lines.", { Diff( EQUAL, L"AAA\r\n\r\n" ), Diff( INSERT, L"BBB\r\nDDD\r\n\r\n" ), Diff( EQUAL, L"BBB\r\nEEE" ) }, diffs ); + assertEquals( "diff_cleanupSemanticLossless: Blank lines.", { Diff( EQUAL, "AAA\r\n\r\n" ), Diff( INSERT, "BBB\r\nDDD\r\n\r\n" ), Diff( EQUAL, "BBB\r\nEEE" ) }, diffs ); - diffs = { Diff( EQUAL, L"AAA\r\nBBB" ), Diff( INSERT, L" DDD\r\nBBB" ), Diff( EQUAL, L" EEE" ) }; + diffs = { Diff( EQUAL, "AAA\r\nBBB" ), Diff( INSERT, " DDD\r\nBBB" ), Diff( EQUAL, " EEE" ) }; dmp.diff_cleanupSemanticLossless( diffs ); - assertEquals( "diff_cleanupSemanticLossless: Line boundaries.", { Diff( EQUAL, L"AAA\r\n" ), Diff( INSERT, L"BBB DDD\r\n" ), Diff( EQUAL, L"BBB EEE" ) }, diffs ); + assertEquals( "diff_cleanupSemanticLossless: Line boundaries.", { Diff( EQUAL, "AAA\r\n" ), Diff( INSERT, "BBB DDD\r\n" ), Diff( EQUAL, "BBB EEE" ) }, diffs ); - diffs = { Diff( EQUAL, L"The c" ), Diff( INSERT, L"ow and the c" ), Diff( EQUAL, L"at." ) }; + diffs = { Diff( EQUAL, "The c" ), Diff( INSERT, "ow and the c" ), Diff( EQUAL, "at." ) }; dmp.diff_cleanupSemanticLossless( diffs ); - assertEquals( "diff_cleanupSemantic: Word boundaries.", { Diff( EQUAL, L"The " ), Diff( INSERT, L"cow and the " ), Diff( EQUAL, L"cat." ) }, diffs ); + assertEquals( "diff_cleanupSemantic: Word boundaries.", { Diff( EQUAL, "The " ), Diff( INSERT, "cow and the " ), Diff( EQUAL, "cat." ) }, diffs ); - diffs = { Diff( EQUAL, L"The-c" ), Diff( INSERT, L"ow-and-the-c" ), Diff( EQUAL, L"at." ) }; + diffs = { Diff( EQUAL, "The-c" ), Diff( INSERT, "ow-and-the-c" ), Diff( EQUAL, "at." ) }; dmp.diff_cleanupSemanticLossless( diffs ); - assertEquals( "diff_cleanupSemantic: Alphanumeric boundaries.", { Diff( EQUAL, L"The-" ), Diff( INSERT, L"cow-and-the-" ), Diff( EQUAL, L"cat." ) }, diffs ); + assertEquals( "diff_cleanupSemantic: Alphanumeric boundaries.", { Diff( EQUAL, "The-" ), Diff( INSERT, "cow-and-the-" ), Diff( EQUAL, "cat." ) }, diffs ); - diffs = { Diff( EQUAL, L"a" ), Diff( DELETE, L"a" ), Diff( EQUAL, L"ax" ) }; + diffs = { Diff( EQUAL, "a" ), Diff( DELETE, "a" ), Diff( EQUAL, "ax" ) }; dmp.diff_cleanupSemanticLossless( diffs ); - assertEquals( "diff_cleanupSemantic: Hitting the start.", { Diff( DELETE, L"a" ), Diff( EQUAL, L"aax" ) }, diffs ); + assertEquals( "diff_cleanupSemantic: Hitting the start.", { Diff( DELETE, "a" ), Diff( EQUAL, "aax" ) }, diffs ); - diffs = { Diff( EQUAL, L"xa" ), Diff( DELETE, L"a" ), Diff( EQUAL, L"a" ) }; + diffs = { Diff( EQUAL, "xa" ), Diff( DELETE, "a" ), Diff( EQUAL, "a" ) }; dmp.diff_cleanupSemanticLossless( diffs ); - assertEquals( "diff_cleanupSemantic: Hitting the end.", { Diff( EQUAL, L"xaa" ), Diff( DELETE, L"a" ) }, diffs ); + assertEquals( "diff_cleanupSemantic: Hitting the end.", { Diff( EQUAL, "xaa" ), Diff( DELETE, "a" ) }, diffs ); - diffs = { Diff( EQUAL, L"The xxx. The " ), Diff( INSERT, L"zzz. The " ), Diff( EQUAL, L"yyy." ) }; + diffs = { Diff( EQUAL, "The xxx. The " ), Diff( INSERT, "zzz. The " ), Diff( EQUAL, "yyy." ) }; dmp.diff_cleanupSemanticLossless( diffs ); - assertEquals( "diff_cleanupSemantic: Sentence boundaries.", { Diff( EQUAL, L"The xxx." ), Diff( INSERT, L" The zzz." ), Diff( EQUAL, L" The yyy." ) }, diffs ); + assertEquals( "diff_cleanupSemantic: Sentence boundaries.", { Diff( EQUAL, "The xxx." ), Diff( INSERT, " The zzz." ), Diff( EQUAL, " The yyy." ) }, diffs ); } void diff_match_patch_test::testDiffCleanupSemantic() @@ -353,45 +353,45 @@ void diff_match_patch_test::testDiffCleanupSemantic() dmp.diff_cleanupSemantic( diffs ); assertEquals( "diff_cleanupSemantic: nullptr case.", {}, diffs ); - diffs = { Diff( DELETE, L"ab" ), Diff( INSERT, L"cd" ), Diff( EQUAL, L"12" ), Diff( DELETE, L"e" ) }; + diffs = { Diff( DELETE, "ab" ), Diff( INSERT, "cd" ), Diff( EQUAL, "12" ), Diff( DELETE, "e" ) }; dmp.diff_cleanupSemantic( diffs ); - assertEquals( "diff_cleanupSemantic: No elimination #1.", { Diff( DELETE, L"ab" ), Diff( INSERT, L"cd" ), Diff( EQUAL, L"12" ), Diff( DELETE, L"e" ) }, diffs ); + assertEquals( "diff_cleanupSemantic: No elimination #1.", { Diff( DELETE, "ab" ), Diff( INSERT, "cd" ), Diff( EQUAL, "12" ), Diff( DELETE, "e" ) }, diffs ); - diffs = { Diff( DELETE, L"abc" ), Diff( INSERT, L"ABC" ), Diff( EQUAL, L"1234" ), Diff( DELETE, L"wxyz" ) }; + diffs = { Diff( DELETE, "abc" ), Diff( INSERT, "ABC" ), Diff( EQUAL, "1234" ), Diff( DELETE, "wxyz" ) }; dmp.diff_cleanupSemantic( diffs ); - assertEquals( "diff_cleanupSemantic: No elimination #2.", { Diff( DELETE, L"abc" ), Diff( INSERT, L"ABC" ), Diff( EQUAL, L"1234" ), Diff( DELETE, L"wxyz" ) }, diffs ); + assertEquals( "diff_cleanupSemantic: No elimination #2.", { Diff( DELETE, "abc" ), Diff( INSERT, "ABC" ), Diff( EQUAL, "1234" ), Diff( DELETE, "wxyz" ) }, diffs ); - diffs = { Diff( DELETE, L"a" ), Diff( EQUAL, L"b" ), Diff( DELETE, L"c" ) }; + diffs = { Diff( DELETE, "a" ), Diff( EQUAL, "b" ), Diff( DELETE, "c" ) }; dmp.diff_cleanupSemantic( diffs ); - assertEquals( "diff_cleanupSemantic: Simple elimination.", { Diff( DELETE, L"abc" ), Diff( INSERT, L"b" ) }, diffs ); + assertEquals( "diff_cleanupSemantic: Simple elimination.", { Diff( DELETE, "abc" ), Diff( INSERT, "b" ) }, diffs ); - diffs = { Diff( DELETE, L"ab" ), Diff( EQUAL, L"cd" ), Diff( DELETE, L"e" ), Diff( EQUAL, L"f" ), Diff( INSERT, L"g" ) }; + diffs = { Diff( DELETE, "ab" ), Diff( EQUAL, "cd" ), Diff( DELETE, "e" ), Diff( EQUAL, "f" ), Diff( INSERT, "g" ) }; dmp.diff_cleanupSemantic( diffs ); - assertEquals( "diff_cleanupSemantic: Backpass elimination.", { Diff( DELETE, L"abcdef" ), Diff( INSERT, L"cdfg" ) }, diffs ); + assertEquals( "diff_cleanupSemantic: Backpass elimination.", { Diff( DELETE, "abcdef" ), Diff( INSERT, "cdfg" ) }, diffs ); - diffs = { Diff( INSERT, L"1" ), Diff( EQUAL, L"a" ), Diff( DELETE, L"b" ), Diff( INSERT, L"2" ), Diff( EQUAL, L"_" ), Diff( INSERT, L"1" ), Diff( EQUAL, L"a" ), Diff( DELETE, L"b" ), Diff( INSERT, L"2" ) }; + diffs = { Diff( INSERT, "1" ), Diff( EQUAL, "a" ), Diff( DELETE, "b" ), Diff( INSERT, "2" ), Diff( EQUAL, "_" ), Diff( INSERT, "1" ), Diff( EQUAL, "a" ), Diff( DELETE, "b" ), Diff( INSERT, "2" ) }; dmp.diff_cleanupSemantic( diffs ); - assertEquals( "diff_cleanupSemantic: Multiple elimination.", { Diff( DELETE, L"AB_AB" ), Diff( INSERT, L"1A2_1A2" ) }, diffs ); + assertEquals( "diff_cleanupSemantic: Multiple elimination.", { Diff( DELETE, "AB_AB" ), Diff( INSERT, "1A2_1A2" ) }, diffs ); - diffs = { Diff( EQUAL, L"The c" ), Diff( DELETE, L"ow and the c" ), Diff( EQUAL, L"at." ) }; + diffs = { Diff( EQUAL, "The c" ), Diff( DELETE, "ow and the c" ), Diff( EQUAL, "at." ) }; dmp.diff_cleanupSemantic( diffs ); - assertEquals( "diff_cleanupSemantic: Word boundaries.", { Diff( EQUAL, L"The " ), Diff( DELETE, L"cow and the " ), Diff( EQUAL, L"cat." ) }, diffs ); + assertEquals( "diff_cleanupSemantic: Word boundaries.", { Diff( EQUAL, "The " ), Diff( DELETE, "cow and the " ), Diff( EQUAL, "cat." ) }, diffs ); - diffs = { Diff( DELETE, L"abcxx" ), Diff( INSERT, L"xxdef" ) }; + diffs = { Diff( DELETE, "abcxx" ), Diff( INSERT, "xxdef" ) }; dmp.diff_cleanupSemantic( diffs ); - assertEquals( "diff_cleanupSemantic: No overlap elimination.", { Diff( DELETE, L"abcxx" ), Diff( INSERT, L"xxdef" ) }, diffs ); + assertEquals( "diff_cleanupSemantic: No overlap elimination.", { Diff( DELETE, "abcxx" ), Diff( INSERT, "xxdef" ) }, diffs ); - diffs = { Diff( DELETE, L"abcxxx" ), Diff( INSERT, L"xxxdef" ) }; + diffs = { Diff( DELETE, "abcxxx" ), Diff( INSERT, "xxxdef" ) }; dmp.diff_cleanupSemantic( diffs ); - assertEquals( "diff_cleanupSemantic: Overlap elimination.", { Diff( DELETE, L"abc" ), Diff( EQUAL, L"xxx" ), Diff( INSERT, L"def" ) }, diffs ); + assertEquals( "diff_cleanupSemantic: Overlap elimination.", { Diff( DELETE, "abc" ), Diff( EQUAL, "xxx" ), Diff( INSERT, "def" ) }, diffs ); - diffs = { Diff( DELETE, L"xxxabc" ), Diff( INSERT, L"defxxx" ) }; + diffs = { Diff( DELETE, "xxxabc" ), Diff( INSERT, "defxxx" ) }; dmp.diff_cleanupSemantic( diffs ); - assertEquals( "diff_cleanupSemantic: Reverse overlap elimination.", { Diff( INSERT, L"def" ), Diff( EQUAL, L"xxx" ), Diff( DELETE, L"abc" ) }, diffs ); + assertEquals( "diff_cleanupSemantic: Reverse overlap elimination.", { Diff( INSERT, "def" ), Diff( EQUAL, "xxx" ), Diff( DELETE, "abc" ) }, diffs ); - diffs = { Diff( DELETE, L"abcd1212" ), Diff( INSERT, L"1212efghi" ), Diff( EQUAL, L"----" ), Diff( DELETE, L"A3" ), Diff( INSERT, L"3BC" ) }; + diffs = { Diff( DELETE, "abcd1212" ), Diff( INSERT, "1212efghi" ), Diff( EQUAL, "----" ), Diff( DELETE, "A3" ), Diff( INSERT, "3BC" ) }; dmp.diff_cleanupSemantic( diffs ); - assertEquals( "diff_cleanupSemantic: Two overlap eliminations.", { Diff( DELETE, L"abcd" ), Diff( EQUAL, L"1212" ), Diff( INSERT, L"efghi" ), Diff( EQUAL, L"----" ), Diff( DELETE, L"a" ), Diff( EQUAL, L"3" ), Diff( INSERT, L"BC" ) }, diffs ); + assertEquals( "diff_cleanupSemantic: Two overlap eliminations.", { Diff( DELETE, "abcd" ), Diff( EQUAL, "1212" ), Diff( INSERT, "efghi" ), Diff( EQUAL, "----" ), Diff( DELETE, "a" ), Diff( EQUAL, "3" ), Diff( INSERT, "BC" ) }, diffs ); } void diff_match_patch_test::testDiffCleanupEfficiency() @@ -402,54 +402,54 @@ void diff_match_patch_test::testDiffCleanupEfficiency() dmp.diff_cleanupEfficiency( diffs ); assertEquals( "diff_cleanupEfficiency: nullptr case.", {}, diffs ); - diffs = { Diff( DELETE, L"ab" ), Diff( INSERT, L"12" ), Diff( EQUAL, L"wxyz" ), Diff( DELETE, L"cd" ), Diff( INSERT, L"34" ) }; + diffs = { Diff( DELETE, "ab" ), Diff( INSERT, "12" ), Diff( EQUAL, "wxyz" ), Diff( DELETE, "cd" ), Diff( INSERT, "34" ) }; dmp.diff_cleanupEfficiency( diffs ); - assertEquals( "diff_cleanupEfficiency: No elimination.", { Diff( DELETE, L"ab" ), Diff( INSERT, L"12" ), Diff( EQUAL, L"wxyz" ), Diff( DELETE, L"cd" ), Diff( INSERT, L"34" ) }, diffs ); + assertEquals( "diff_cleanupEfficiency: No elimination.", { Diff( DELETE, "ab" ), Diff( INSERT, "12" ), Diff( EQUAL, "wxyz" ), Diff( DELETE, "cd" ), Diff( INSERT, "34" ) }, diffs ); - diffs = { Diff( DELETE, L"ab" ), Diff( INSERT, L"12" ), Diff( EQUAL, L"xyz" ), Diff( DELETE, L"cd" ), Diff( INSERT, L"34" ) }; + diffs = { Diff( DELETE, "ab" ), Diff( INSERT, "12" ), Diff( EQUAL, "xyz" ), Diff( DELETE, "cd" ), Diff( INSERT, "34" ) }; dmp.diff_cleanupEfficiency( diffs ); - assertEquals( "diff_cleanupEfficiency: Four-edit elimination.", { Diff( DELETE, L"abxyzcd" ), Diff( INSERT, L"12xyz34" ) }, diffs ); + assertEquals( "diff_cleanupEfficiency: Four-edit elimination.", { Diff( DELETE, "abxyzcd" ), Diff( INSERT, "12xyz34" ) }, diffs ); - diffs = { Diff( INSERT, L"12" ), Diff( EQUAL, L"x" ), Diff( DELETE, L"cd" ), Diff( INSERT, L"34" ) }; + diffs = { Diff( INSERT, "12" ), Diff( EQUAL, "x" ), Diff( DELETE, "cd" ), Diff( INSERT, "34" ) }; dmp.diff_cleanupEfficiency( diffs ); - assertEquals( "diff_cleanupEfficiency: Three-edit elimination.", { Diff( DELETE, L"xcd" ), Diff( INSERT, L"12x34" ) }, diffs ); + assertEquals( "diff_cleanupEfficiency: Three-edit elimination.", { Diff( DELETE, "xcd" ), Diff( INSERT, "12x34" ) }, diffs ); - diffs = { Diff( DELETE, L"ab" ), Diff( INSERT, L"12" ), Diff( EQUAL, L"xy" ), Diff( INSERT, L"34" ), Diff( EQUAL, L"z" ), Diff( DELETE, L"cd" ), Diff( INSERT, L"56" ) }; + diffs = { Diff( DELETE, "ab" ), Diff( INSERT, "12" ), Diff( EQUAL, "xy" ), Diff( INSERT, "34" ), Diff( EQUAL, "z" ), Diff( DELETE, "cd" ), Diff( INSERT, "56" ) }; dmp.diff_cleanupEfficiency( diffs ); - assertEquals( "diff_cleanupEfficiency: Backpass elimination.", { Diff( DELETE, L"abxyzcd" ), Diff( INSERT, L"12xy34z56" ) }, diffs ); + assertEquals( "diff_cleanupEfficiency: Backpass elimination.", { Diff( DELETE, "abxyzcd" ), Diff( INSERT, "12xy34z56" ) }, diffs ); dmp.Diff_EditCost = 5; - diffs = { Diff( DELETE, L"ab" ), Diff( INSERT, L"12" ), Diff( EQUAL, L"wxyz" ), Diff( DELETE, L"cd" ), Diff( INSERT, L"34" ) }; + diffs = { Diff( DELETE, "ab" ), Diff( INSERT, "12" ), Diff( EQUAL, "wxyz" ), Diff( DELETE, "cd" ), Diff( INSERT, "34" ) }; dmp.diff_cleanupEfficiency( diffs ); - assertEquals( "diff_cleanupEfficiency: High cost elimination.", { Diff( DELETE, L"abwxyzcd" ), Diff( INSERT, L"12wxyz34" ) }, diffs ); + assertEquals( "diff_cleanupEfficiency: High cost elimination.", { Diff( DELETE, "abwxyzcd" ), Diff( INSERT, "12wxyz34" ) }, diffs ); dmp.Diff_EditCost = 4; } void diff_match_patch_test::testDiffPrettyHtml() { // Pretty print. - auto diffs = TDiffVector( { Diff( EQUAL, L"a\n" ), Diff( DELETE, L"b" ), Diff( INSERT, L"c&d" ) } ); - assertEquals( "diff_prettyHtml:", L"
<B>b</B>c&d", dmp.diff_prettyHtml( diffs ) ); + auto diffs = TDiffVector( { Diff( EQUAL, "a\n" ), Diff( DELETE, "b" ), Diff( INSERT, "c&d" ) } ); + assertEquals( "diff_prettyHtml:", "
<B>b</B>c&d", dmp.diff_prettyHtml( diffs ) ); } void diff_match_patch_test::testDiffText() { // Compute the source and destination texts. - auto diffs = TDiffVector( { Diff( EQUAL, L"jump" ), Diff( DELETE, L"s" ), Diff( INSERT, L"ed" ), Diff( EQUAL, L" over " ), Diff( DELETE, L"the" ), Diff( INSERT, L"a" ), Diff( EQUAL, L" lazy" ) } ); - assertEquals( "diff_text1:", L"jumps over the lazy", dmp.diff_text1( diffs ) ); - assertEquals( "diff_text2:", L"jumped over a lazy", dmp.diff_text2( diffs ) ); + auto diffs = TDiffVector( { Diff( EQUAL, "jump" ), Diff( DELETE, "s" ), Diff( INSERT, "ed" ), Diff( EQUAL, " over " ), Diff( DELETE, "the" ), Diff( INSERT, "a" ), Diff( EQUAL, " lazy" ) } ); + assertEquals( "diff_text1:", "jumps over the lazy", dmp.diff_text1( diffs ) ); + assertEquals( "diff_text2:", "jumped over a lazy", dmp.diff_text2( diffs ) ); } void diff_match_patch_test::testDiffDelta() { // Convert a diff into delta string. - auto diffs = TDiffVector( { Diff( EQUAL, L"jump" ), Diff( DELETE, L"s" ), Diff( INSERT, L"ed" ), Diff( EQUAL, L" over " ), Diff( DELETE, L"the" ), Diff( INSERT, L"a" ), Diff( EQUAL, L" lazy" ), Diff( INSERT, L"old dog" ) } ); + auto diffs = TDiffVector( { Diff( EQUAL, "jump" ), Diff( DELETE, "s" ), Diff( INSERT, "ed" ), Diff( EQUAL, " over " ), Diff( DELETE, "the" ), Diff( INSERT, "a" ), Diff( EQUAL, " lazy" ), Diff( INSERT, "old dog" ) } ); std::wstring text1 = dmp.diff_text1( diffs ); - assertEquals( "diff_text1: Base text.", L"jumps over the lazy", text1 ); + assertEquals( "diff_text1: Base text.", "jumps over the lazy", text1 ); std::wstring delta = dmp.diff_toDelta( diffs ); std::wstring golden = L"=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog"; - assertEquals( "diff_toDelta:", L"=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog", delta ); + assertEquals( "diff_toDelta:", "=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog", delta ); // Convert delta string into a diff. assertEquals( "diff_fromDelta: Normal.", diffs, dmp.diff_fromDelta( text1, delta ) ); @@ -486,7 +486,7 @@ void diff_match_patch_test::testDiffDelta() exceptionTriggered = false; try { - dmp.diff_fromDelta( L"", L"+%c3%xy" ); + dmp.diff_fromDelta( "", "+%c3%xy" ); assertFalse( "diff_fromDelta: Invalid character.", true ); } catch ( std::wstring ex ) @@ -504,41 +504,41 @@ void diff_match_patch_test::testDiffDelta() assertEquals( "diff_text1: Unicode text", golden, text1 ); delta = dmp.diff_toDelta( diffs ); - assertEquals( "diff_toDelta: Unicode", L"=7\t-7\t+%DA%82 %02 %5C %7C", delta ); + assertEquals( "diff_toDelta: Unicode", "=7\t-7\t+%DA%82 %02 %5C %7C", delta ); assertEquals( "diff_fromDelta: Unicode", diffs, dmp.diff_fromDelta( text1, delta ) ); // Verify pool of unchanged characters. - diffs = { Diff( INSERT, L"A-Z a-z 0-9 - _ . ! ~ * ' ( ) ; / ? : @ & = + $ , # " ) }; + diffs = { Diff( INSERT, "A-Z a-z 0-9 - _ . ! ~ * ' ( ) ; / ? : @ & = + $ , # " ) }; std::wstring text2 = dmp.diff_text2( diffs ); - assertEquals( "diff_text2: Unchanged characters.", L"A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", text2 ); + assertEquals( "diff_text2: Unchanged characters.", "A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", text2 ); delta = dmp.diff_toDelta( diffs ); - assertEquals( "diff_toDelta: Unchanged characters.", L"+A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", delta ); + assertEquals( "diff_toDelta: Unchanged characters.", "+A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", delta ); // Convert delta string into a diff. - assertEquals( "diff_fromDelta: Unchanged characters.", diffs, dmp.diff_fromDelta( L"", delta ) ); + assertEquals( "diff_fromDelta: Unchanged characters.", diffs, dmp.diff_fromDelta( {}, delta ) ); } void diff_match_patch_test::testDiffXIndex() { // Translate a location in text1 to text2. - auto diffs = TDiffVector( { Diff( DELETE, L"a" ), Diff( INSERT, L"1234" ), Diff( EQUAL, L"xyz" ) } ); + auto diffs = TDiffVector( { Diff( DELETE, "a" ), Diff( INSERT, "1234" ), Diff( EQUAL, "xyz" ) } ); assertEquals( "diff_xIndex: Translation on equality.", 5, dmp.diff_xIndex( diffs, 2 ) ); - diffs = { Diff( EQUAL, L"a" ), Diff( DELETE, L"1234" ), Diff( EQUAL, L"xyz" ) }; + diffs = { Diff( EQUAL, "a" ), Diff( DELETE, "1234" ), Diff( EQUAL, "xyz" ) }; assertEquals( "diff_xIndex: Translation on deletion.", 1, dmp.diff_xIndex( diffs, 3 ) ); } void diff_match_patch_test::testDiffLevenshtein() { - auto diffs = TDiffVector( { Diff( DELETE, L"abc" ), Diff( INSERT, L"1234" ), Diff( EQUAL, L"xyz" ) } ); + auto diffs = TDiffVector( { Diff( DELETE, "abc" ), Diff( INSERT, "1234" ), Diff( EQUAL, "xyz" ) } ); assertEquals( "diff_levenshtein: Trailing equality.", 4, dmp.diff_levenshtein( diffs ) ); - diffs = { Diff( EQUAL, L"xyz" ), Diff( DELETE, L"abc" ), Diff( INSERT, L"1234" ) }; + diffs = { Diff( EQUAL, "xyz" ), Diff( DELETE, "abc" ), Diff( INSERT, "1234" ) }; assertEquals( "diff_levenshtein: Leading equality.", 4, dmp.diff_levenshtein( diffs ) ); - diffs = { Diff( DELETE, L"abc" ), Diff( EQUAL, L"xyz" ), Diff( INSERT, L"1234" ) }; + diffs = { Diff( DELETE, "abc" ), Diff( EQUAL, "xyz" ), Diff( INSERT, "1234" ) }; assertEquals( "diff_levenshtein: Middle equality.", 7, dmp.diff_levenshtein( diffs ) ); } @@ -550,12 +550,12 @@ void diff_match_patch_test::testDiffBisect() // Since the resulting diff hasn't been normalized, it would be ok if // the insertion and deletion pairs are swapped. // If the order changes, tweak this test as required. - auto diffs = TDiffVector( { Diff( DELETE, L"c" ), Diff( INSERT, L"m" ), Diff( EQUAL, L"a" ), Diff( DELETE, L"t" ), Diff( INSERT, L"p" ) } ); + auto diffs = TDiffVector( { Diff( DELETE, "c" ), Diff( INSERT, "m" ), Diff( EQUAL, "a" ), Diff( DELETE, "t" ), Diff( INSERT, "p" ) } ); auto results = dmp.diff_bisect( a, b, std::numeric_limits< clock_t >::max() ); assertEquals( "diff_bisect: Normal.", diffs, results ); // Timeout. - diffs = { Diff( DELETE, L"cat" ), Diff( INSERT, L"map" ) }; + diffs = { Diff( DELETE, "cat" ), Diff( INSERT, "map" ) }; assertEquals( "diff_bisect: Timeout.", diffs, dmp.diff_bisect( a, b, 0 ) ); } @@ -563,52 +563,52 @@ void diff_match_patch_test::testDiffMain() { // Perform a trivial diff. auto diffs = TDiffVector(); - assertEquals( "diff_main: nullptr case.", diffs, dmp.diff_main( L"", L"", false ) ); + assertEquals( "diff_main: nullptr case.", diffs, dmp.diff_main( "", "", false ) ); - diffs = { Diff( DELETE, L"abc" ) }; - assertEquals( "diff_main: RHS side nullptr case.", diffs, dmp.diff_main( L"abc", L"", false ) ); + diffs = { Diff( DELETE, "abc" ) }; + assertEquals( "diff_main: RHS side nullptr case.", diffs, dmp.diff_main( "abc", "", false ) ); - diffs = { Diff( INSERT, L"abc" ) }; - assertEquals( "diff_main: LHS side nullptr case.", diffs, dmp.diff_main( L"", L"abc", false ) ); + diffs = { Diff( INSERT, "abc" ) }; + assertEquals( "diff_main: LHS side nullptr case.", diffs, dmp.diff_main( "", "abc", false ) ); - diffs = { Diff( EQUAL, L"abc" ) }; - assertEquals( "diff_main: Equality.", diffs, dmp.diff_main( L"abc", L"abc", false ) ); + diffs = { Diff( EQUAL, "abc" ) }; + assertEquals( "diff_main: Equality.", diffs, dmp.diff_main( "abc", "abc", false ) ); - diffs = { Diff( EQUAL, L"ab" ), Diff( INSERT, L"123" ), Diff( EQUAL, L"c" ) }; - assertEquals( "diff_main: Simple insertion.", diffs, dmp.diff_main( L"abc", L"ab123c", false ) ); + diffs = { Diff( EQUAL, "ab" ), Diff( INSERT, "123" ), Diff( EQUAL, "c" ) }; + assertEquals( "diff_main: Simple insertion.", diffs, dmp.diff_main( "abc", "ab123c", false ) ); - diffs = { Diff( EQUAL, L"a" ), Diff( DELETE, L"123" ), Diff( EQUAL, L"bc" ) }; - assertEquals( "diff_main: Simple deletion.", diffs, dmp.diff_main( L"a123bc", L"abc", false ) ); + diffs = { Diff( EQUAL, "a" ), Diff( DELETE, "123" ), Diff( EQUAL, "bc" ) }; + assertEquals( "diff_main: Simple deletion.", diffs, dmp.diff_main( "a123bc", "abc", false ) ); - diffs = { Diff( EQUAL, L"a" ), Diff( INSERT, L"123" ), Diff( EQUAL, L"b" ), Diff( INSERT, L"456" ), Diff( EQUAL, L"c" ) }; - assertEquals( "diff_main: Two insertions.", diffs, dmp.diff_main( L"abc", L"a123b456c", false ) ); + diffs = { Diff( EQUAL, "a" ), Diff( INSERT, "123" ), Diff( EQUAL, "b" ), Diff( INSERT, "456" ), Diff( EQUAL, "c" ) }; + assertEquals( "diff_main: Two insertions.", diffs, dmp.diff_main( "abc", "a123b456c", false ) ); - diffs = { Diff( EQUAL, L"a" ), Diff( DELETE, L"123" ), Diff( EQUAL, L"b" ), Diff( DELETE, L"456" ), Diff( EQUAL, L"c" ) }; - assertEquals( "diff_main: Two deletions.", diffs, dmp.diff_main( L"a123b456c", L"abc", false ) ); + diffs = { Diff( EQUAL, "a" ), Diff( DELETE, "123" ), Diff( EQUAL, "b" ), Diff( DELETE, "456" ), Diff( EQUAL, "c" ) }; + assertEquals( "diff_main: Two deletions.", diffs, dmp.diff_main( "a123b456c", "abc", false ) ); // Perform a real diff. // Switch off the timeout. dmp.Diff_Timeout = 0; - diffs = { Diff( DELETE, L"a" ), Diff( INSERT, L"b" ) }; - assertEquals( "diff_main: Simple case #1.", diffs, dmp.diff_main( L"a", L"b", false ) ); + diffs = { Diff( DELETE, "a" ), Diff( INSERT, "b" ) }; + assertEquals( "diff_main: Simple case #1.", diffs, dmp.diff_main( "a", "b", false ) ); - diffs = { Diff( DELETE, L"Apple" ), Diff( INSERT, L"Banana" ), Diff( EQUAL, L"s are a" ), Diff( INSERT, L"lso" ), Diff( EQUAL, L" fruit." ) }; - assertEquals( "diff_main: Simple case #2.", diffs, dmp.diff_main( L"Apples are a fruit.", L"Bananas are also fruit.", false ) ); + diffs = { Diff( DELETE, "Apple" ), Diff( INSERT, "Banana" ), Diff( EQUAL, "s are a" ), Diff( INSERT, "lso" ), Diff( EQUAL, " fruit." ) }; + assertEquals( "diff_main: Simple case #2.", diffs, dmp.diff_main( "Apples are a fruit.", "Bananas are also fruit.", false ) ); - diffs = { Diff( DELETE, L"a" ), Diff( INSERT, L"\u0680" ), Diff( EQUAL, L"x" ), Diff( DELETE, L"\t" ), Diff( INSERT, to_wstring( kZero ) ) }; + diffs = { Diff( DELETE, "a" ), Diff( INSERT, L"\u0680" ), Diff( EQUAL, "x" ), Diff( DELETE, "\t" ), Diff( INSERT, to_wstring( kZero ) ) }; assertEquals( "diff_main: Simple case #3.", diffs, dmp.diff_main( L"ax\t", std::wstring( L"\u0680x" ) + kZero, false ) ); - diffs = { Diff( DELETE, L"1" ), Diff( EQUAL, L"a" ), Diff( DELETE, L"y" ), Diff( EQUAL, L"b" ), Diff( DELETE, L"2" ), Diff( INSERT, L"xab" ) }; - assertEquals( "diff_main: Overlap #1.", diffs, dmp.diff_main( L"1ayb2", L"abxab", false ) ); + diffs = { Diff( DELETE, "1" ), Diff( EQUAL, "a" ), Diff( DELETE, "y" ), Diff( EQUAL, "b" ), Diff( DELETE, "2" ), Diff( INSERT, "xab" ) }; + assertEquals( "diff_main: Overlap #1.", diffs, dmp.diff_main( "1ayb2", "abxab", false ) ); - diffs = { Diff( INSERT, L"xaxcx" ), Diff( EQUAL, L"abc" ), Diff( DELETE, L"y" ) }; - assertEquals( "diff_main: Overlap #2.", diffs, dmp.diff_main( L"abcy", L"xaxcxabc", false ) ); + diffs = { Diff( INSERT, "xaxcx" ), Diff( EQUAL, "abc" ), Diff( DELETE, "y" ) }; + assertEquals( "diff_main: Overlap #2.", diffs, dmp.diff_main( "abcy", "xaxcxabc", false ) ); - diffs = { Diff( DELETE, L"ABCD" ), Diff( EQUAL, L"a" ), Diff( DELETE, L"=" ), Diff( INSERT, L"-" ), Diff( EQUAL, L"bcd" ), Diff( DELETE, L"=" ), Diff( INSERT, L"-" ), Diff( EQUAL, L"efghijklmnopqrs" ), Diff( DELETE, L"EFGHIJKLMNOefg" ) }; - assertEquals( "diff_main: Overlap #3.", diffs, dmp.diff_main( L"ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg", L"a-bcd-efghijklmnopqrs", false ) ); + diffs = { Diff( DELETE, "ABCD" ), Diff( EQUAL, "a" ), Diff( DELETE, "=" ), Diff( INSERT, "-" ), Diff( EQUAL, "bcd" ), Diff( DELETE, "=" ), Diff( INSERT, "-" ), Diff( EQUAL, "efghijklmnopqrs" ), Diff( DELETE, "EFGHIJKLMNOefg" ) }; + assertEquals( "diff_main: Overlap #3.", diffs, dmp.diff_main( "ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg", "a-bcd-efghijklmnopqrs", false ) ); - diffs = { Diff( INSERT, L" " ), Diff( EQUAL, L"a" ), Diff( INSERT, L"nd" ), Diff( EQUAL, L" [[Pennsylvania]]" ), Diff( DELETE, L" and [[New" ) }; - assertEquals( "diff_main: Large equality.", diffs, dmp.diff_main( L"a [[Pennsylvania]] and [[New", L" and [[Pennsylvania]]", false ) ); + diffs = { Diff( INSERT, " " ), Diff( EQUAL, "a" ), Diff( INSERT, "nd" ), Diff( EQUAL, " [[Pennsylvania]]" ), Diff( DELETE, " and [[New" ) }; + assertEquals( "diff_main: Large equality.", diffs, dmp.diff_main( "a [[Pennsylvania]] and [[New", " and [[Pennsylvania]]", false ) ); dmp.Diff_Timeout = 0.1f; // 100ms // This test may 'fail' on extremely fast computers. If so, just increase the text lengths. @@ -659,13 +659,13 @@ void diff_match_patch_test::testMatchAlphabet() bitmask[ 'a' ] = 4; bitmask[ 'b' ] = 2; bitmask[ 'c' ] = 1; - assertEquals( "match_alphabet: Unique.", bitmask, dmp.match_alphabet( L"abc" ) ); + assertEquals( "match_alphabet: Unique.", bitmask, dmp.match_alphabet( "abc" ) ); bitmask = TCharPosMap(); bitmask[ 'a' ] = 37; bitmask[ 'b' ] = 18; bitmask[ 'c' ] = 8; - assertEquals( "match_alphabet: Duplicates.", bitmask, dmp.match_alphabet( L"abcaba" ) ); + assertEquals( "match_alphabet: Duplicates.", bitmask, dmp.match_alphabet( "abcaba" ) ); } void diff_match_patch_test::testMatchBitap() @@ -673,60 +673,60 @@ void diff_match_patch_test::testMatchBitap() // Bitap algorithm. dmp.Match_Distance = 100; dmp.Match_Threshold = 0.5f; - assertEquals( "match_bitap: Exact match #1.", 5, dmp.match_bitap( L"abcdefghijk", L"fgh", 5 ) ); + assertEquals( "match_bitap: Exact match #1.", 5, dmp.match_bitap( "abcdefghijk", "fgh", 5 ) ); - assertEquals( "match_bitap: Exact match #2.", 5, dmp.match_bitap( L"abcdefghijk", L"fgh", 0 ) ); + assertEquals( "match_bitap: Exact match #2.", 5, dmp.match_bitap( "abcdefghijk", "fgh", 0 ) ); - assertEquals( "match_bitap: Fuzzy match #1.", 4, dmp.match_bitap( L"abcdefghijk", L"efxhi", 0 ) ); + assertEquals( "match_bitap: Fuzzy match #1.", 4, dmp.match_bitap( "abcdefghijk", "efxhi", 0 ) ); - assertEquals( "match_bitap: Fuzzy match #2.", 2, dmp.match_bitap( L"abcdefghijk", L"cdefxyhijk", 5 ) ); + assertEquals( "match_bitap: Fuzzy match #2.", 2, dmp.match_bitap( "abcdefghijk", "cdefxyhijk", 5 ) ); - assertEquals( "match_bitap: Fuzzy match #3.", -1, dmp.match_bitap( L"abcdefghijk", L"bxy", 1 ) ); + assertEquals( "match_bitap: Fuzzy match #3.", -1, dmp.match_bitap( "abcdefghijk", "bxy", 1 ) ); - assertEquals( "match_bitap: Overflow.", 2, dmp.match_bitap( L"123456789xx0", L"3456789x0", 2 ) ); + assertEquals( "match_bitap: Overflow.", 2, dmp.match_bitap( "123456789xx0", "3456789x0", 2 ) ); - assertEquals( "match_bitap: Before start match.", 0, dmp.match_bitap( L"abcdef", L"xxabc", 4 ) ); + assertEquals( "match_bitap: Before start match.", 0, dmp.match_bitap( "abcdef", "xxabc", 4 ) ); - assertEquals( "match_bitap: Beyond end match.", 3, dmp.match_bitap( L"abcdef", L"defyy", 4 ) ); + assertEquals( "match_bitap: Beyond end match.", 3, dmp.match_bitap( "abcdef", "defyy", 4 ) ); - assertEquals( "match_bitap: Oversized pattern.", 0, dmp.match_bitap( L"abcdef", L"xabcdefy", 0 ) ); + assertEquals( "match_bitap: Oversized pattern.", 0, dmp.match_bitap( "abcdef", "xabcdefy", 0 ) ); dmp.Match_Threshold = 0.4f; - assertEquals( "match_bitap: Threshold #1.", 4, dmp.match_bitap( L"abcdefghijk", L"efxyhi", 1 ) ); + assertEquals( "match_bitap: Threshold #1.", 4, dmp.match_bitap( "abcdefghijk", "efxyhi", 1 ) ); dmp.Match_Threshold = 0.3f; - assertEquals( "match_bitap: Threshold #2.", -1, dmp.match_bitap( L"abcdefghijk", L"efxyhi", 1 ) ); + assertEquals( "match_bitap: Threshold #2.", -1, dmp.match_bitap( "abcdefghijk", "efxyhi", 1 ) ); dmp.Match_Threshold = 0.0f; - assertEquals( "match_bitap: Threshold #3.", 1, dmp.match_bitap( L"abcdefghijk", L"bcdef", 1 ) ); + assertEquals( "match_bitap: Threshold #3.", 1, dmp.match_bitap( "abcdefghijk", "bcdef", 1 ) ); dmp.Match_Threshold = 0.5f; - assertEquals( "match_bitap: Multiple select #1.", 0, dmp.match_bitap( L"abcdexyzabcde", L"abccde", 3 ) ); + assertEquals( "match_bitap: Multiple select #1.", 0, dmp.match_bitap( "abcdexyzabcde", "abccde", 3 ) ); - assertEquals( "match_bitap: Multiple select #2.", 8, dmp.match_bitap( L"abcdexyzabcde", L"abccde", 5 ) ); + assertEquals( "match_bitap: Multiple select #2.", 8, dmp.match_bitap( "abcdexyzabcde", "abccde", 5 ) ); dmp.Match_Distance = 10; // Strict location. - assertEquals( "match_bitap: Distance test #1.", -1, dmp.match_bitap( L"abcdefghijklmnopqrstuvwxyz", L"abcdefg", 24 ) ); + assertEquals( "match_bitap: Distance test #1.", -1, dmp.match_bitap( "abcdefghijklmnopqrstuvwxyz", "abcdefg", 24 ) ); - assertEquals( "match_bitap: Distance test #2.", 0, dmp.match_bitap( L"abcdefghijklmnopqrstuvwxyz", L"abcdxxefg", 1 ) ); + assertEquals( "match_bitap: Distance test #2.", 0, dmp.match_bitap( "abcdefghijklmnopqrstuvwxyz", "abcdxxefg", 1 ) ); dmp.Match_Distance = 1000; // Loose location. - assertEquals( "match_bitap: Distance test #3.", 0, dmp.match_bitap( L"abcdefghijklmnopqrstuvwxyz", L"abcdefg", 24 ) ); + assertEquals( "match_bitap: Distance test #3.", 0, dmp.match_bitap( "abcdefghijklmnopqrstuvwxyz", "abcdefg", 24 ) ); } void diff_match_patch_test::testMatchMain() { // Full match. - assertEquals( "match_main: Equality.", 0, dmp.match_main( L"abcdef", L"abcdef", 1000 ) ); + assertEquals( "match_main: Equality.", 0, dmp.match_main( "abcdef", "abcdef", 1000 ) ); - assertEquals( "match_main: nullptr text.", -1, dmp.match_main( L"", L"abcdef", 1 ) ); + assertEquals( "match_main: nullptr text.", -1, dmp.match_main( "", "abcdef", 1 ) ); - assertEquals( "match_main: nullptr pattern.", 3, dmp.match_main( L"abcdef", L"", 3 ) ); + assertEquals( "match_main: nullptr pattern.", 3, dmp.match_main( "abcdef", "", 3 ) ); - assertEquals( "match_main: Exact match.", 3, dmp.match_main( L"abcdef", L"de", 3 ) ); + assertEquals( "match_main: Exact match.", 3, dmp.match_main( "abcdef", "de", 3 ) ); dmp.Match_Threshold = 0.7f; - assertEquals( "match_main: Complex match.", 4, dmp.match_main( L"I am the very model of a modern major general.", L" that berry ", 5 ) ); + assertEquals( "match_main: Complex match.", 4, dmp.match_main( "I am the very model of a modern major general.", " that berry ", 5 ) ); dmp.Match_Threshold = 0.5f; } @@ -740,29 +740,29 @@ void diff_match_patch_test::testPatchObj() p.start2 = 21; p.length1 = 18; p.length2 = 17; - p.diffs = { Diff( EQUAL, L"jump" ), Diff( DELETE, L"s" ), Diff( INSERT, L"ed" ), Diff( EQUAL, L" over " ), Diff( DELETE, L"the" ), Diff( INSERT, L"a" ), Diff( EQUAL, L"\nlaz" ) }; + p.diffs = { Diff( EQUAL, "jump" ), Diff( DELETE, "s" ), Diff( INSERT, "ed" ), Diff( EQUAL, " over " ), Diff( DELETE, "the" ), Diff( INSERT, "a" ), Diff( EQUAL, "\nlaz" ) }; std::wstring strp = L"@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n"; - assertEquals( "Patch: toString.", strp, p.toString() ); + assertEquals( "patch: toString.", strp, p.toString() ); } void diff_match_patch_test::testPatchFromText() { - assertTrue( "patch_fromText: #0.", dmp.patch_fromText( L"" ).empty() ); + assertTrue( "patch_fromText: #0.", dmp.patch_fromText( "" ).empty() ); std::wstring strp = L"@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n"; assertEquals( "patch_fromText: #1.", strp, dmp.patch_fromText( strp )[ 0 ].toString() ); - assertEquals( "patch_fromText: #2.", L"@@ -1 +1 @@\n-a\n+b\n", dmp.patch_fromText( L"@@ -1 +1 @@\n-a\n+b\n" )[ 0 ].toString() ); + assertEquals( "patch_fromText: #2.", "@@ -1 +1 @@\n-a\n+b\n", dmp.patch_fromText( "@@ -1 +1 @@\n-a\n+b\n" )[ 0 ].toString() ); - assertEquals( "patch_fromText: #3.", L"@@ -1,3 +0,0 @@\n-abc\n", dmp.patch_fromText( L"@@ -1,3 +0,0 @@\n-abc\n" )[ 0 ].toString() ); + assertEquals( "patch_fromText: #3.", "@@ -1,3 +0,0 @@\n-abc\n", dmp.patch_fromText( "@@ -1,3 +0,0 @@\n-abc\n" )[ 0 ].toString() ); - assertEquals( "patch_fromText: #4.", L"@@ -0,0 +1,3 @@\n+abc\n", dmp.patch_fromText( L"@@ -0,0 +1,3 @@\n+abc\n" )[ 0 ].toString() ); + assertEquals( "patch_fromText: #4.", "@@ -0,0 +1,3 @@\n+abc\n", dmp.patch_fromText( "@@ -0,0 +1,3 @@\n+abc\n" )[ 0 ].toString() ); // Generates error. bool exceptionTriggered = false; try { - dmp.patch_fromText( L"Bad\nPatch\n" ); + dmp.patch_fromText( "Bad\nPatch\n" ); assertFalse( "patch_fromText: #5.", true ); } catch ( std::wstring ex ) @@ -781,34 +781,34 @@ void diff_match_patch_test::testPatchToText() strp = L"@@ -1,9 +1,9 @@\n-f\n+F\n oo+fooba\n@@ -7,9 +7,9 @@\n obar\n-,\n+.\n tes\n"; patches = dmp.patch_fromText( strp ); - assertEquals( "patch_toText: Dual", strp, dmp.patch_toText( patches ) ); + assertEquals( "patch_toText: Dua", strp, dmp.patch_toText( patches ) ); } void diff_match_patch_test::testPatchAddContext() { dmp.Patch_Margin = 4; - auto p = dmp.patch_fromText( L"@@ -21,4 +21,10 @@\n-jump\n+somersault\n" )[ 0 ]; - dmp.patch_addContext( p, L"The quick brown fox jumps over the lazy dog." ); - assertEquals( "patch_addContext: Simple case.", L"@@ -17,12 +17,18 @@\n fox \n-jump\n+somersault\n s ov\n", p.toString() ); + auto p = dmp.patch_fromText( "@@ -21,4 +21,10 @@\n-jump\n+somersault\n" )[ 0 ]; + dmp.patch_addContext( p, "The quick brown fox jumps over the lazy dog." ); + assertEquals( "patch_addContext: Simple case.", "@@ -17,12 +17,18 @@\n fox \n-jump\n+somersault\n s ov\n", p.toString() ); - p = dmp.patch_fromText( L"@@ -21,4 +21,10 @@\n-jump\n+somersault\n" )[ 0 ]; - dmp.patch_addContext( p, L"The quick brown fox jumps." ); - assertEquals( "patch_addContext: Not enough trailing context.", L"@@ -17,10 +17,16 @@\n fox \n-jump\n+somersault\n s.\n", p.toString() ); + p = dmp.patch_fromText( "@@ -21,4 +21,10 @@\n-jump\n+somersault\n" )[ 0 ]; + dmp.patch_addContext( p, "The quick brown fox jumps." ); + assertEquals( "patch_addContext: Not enough trailing context.", "@@ -17,10 +17,16 @@\n fox \n-jump\n+somersault\n s.\n", p.toString() ); - p = dmp.patch_fromText( L"@@ -3 +3,2 @@\n-e\n+at\n" )[ 0 ]; - dmp.patch_addContext( p, L"The quick brown fox jumps." ); - assertEquals( "patch_addContext: Not enough leading context.", L"@@ -1,7 +1,8 @@\n Th\n-e\n+at\n qui\n", p.toString() ); + p = dmp.patch_fromText( "@@ -3 +3,2 @@\n-e\n+at\n" )[ 0 ]; + dmp.patch_addContext( p, "The quick brown fox jumps." ); + assertEquals( "patch_addContext: Not enough leading context.", "@@ -1,7 +1,8 @@\n Th\n-e\n+at\n qui\n", p.toString() ); - p = dmp.patch_fromText( L"@@ -3 +3,2 @@\n-e\n+at\n" )[ 0 ]; - dmp.patch_addContext( p, L"The quick brown fox jumps. The quick brown fox crashes." ); - assertEquals( "patch_addContext: Ambiguity.", L"@@ -1,27 +1,28 @@\n Th\n-e\n+at\n quick brown fox jumps. \n", p.toString() ); + p = dmp.patch_fromText( "@@ -3 +3,2 @@\n-e\n+at\n" )[ 0 ]; + dmp.patch_addContext( p, "The quick brown fox jumps. The quick brown fox crashes." ); + assertEquals( "patch_addContext: Ambiguity.", "@@ -1,27 +1,28 @@\n Th\n-e\n+at\n quick brown fox jumps. \n", p.toString() ); } void diff_match_patch_test::testPatchMake() { TPatchVector patches; - patches = dmp.patch_make( L"", L"" ); - assertEquals( "patch_make: nullptr case", L"", dmp.patch_toText( patches ) ); + patches = dmp.patch_make( "", "" ); + assertEquals( "patch_make: nullptr case", "", dmp.patch_toText( patches ) ); std::wstring text1 = L"The quick brown fox jumps over the lazy dog."; std::wstring text2 = L"That quick brown fox jumped over a lazy dog."; @@ -831,13 +831,13 @@ void diff_match_patch_test::testPatchMake() patches = dmp.patch_make( text1, text2, diffs ); assertEquals( "patch_make: Text1+Text2+Diff inputs (deprecated)", expectedPatch, dmp.patch_toText( patches ) ); - patches = dmp.patch_make( L"`1234567890-=[]\\;',./", L"~!@#$%^&*()_+{}|:\"<>?" ); - assertEquals( "patch_toText: Character encoding.", L"@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n", dmp.patch_toText( patches ) ); + patches = dmp.patch_make( "`1234567890-=[]\\;',./", "~!@#$%^&*()_+{}|:\"<>?" ); + assertEquals( "patch_toText: Character encoding.", "@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n", dmp.patch_toText( patches ) ); - diffs = { Diff( DELETE, L"`1234567890-=[]\\;',./" ), Diff( INSERT, L"~!@#$%^&*()_+{}|:\"<>?" ) }; - assertEquals( "patch_fromText: Character decoding.", diffs, dmp.patch_fromText( L"@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n" )[ 0 ].diffs ); + diffs = { Diff( DELETE, "`1234567890-=[]\\;',./" ), Diff( INSERT, "~!@#$%^&*()_+{}|:\"<>?" ) }; + assertEquals( "patch_fromText: Character decoding.", diffs, dmp.patch_fromText( "@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n" )[ 0 ].diffs ); - text1 = L""; + text1 = {}; for ( int x = 0; x < 100; x++ ) { text1 += L"abcdef"; @@ -852,41 +852,41 @@ void diff_match_patch_test::testPatchSplitMax() { // Confirm Match_MaxBits is 32. TPatchVector patches; - patches = dmp.patch_make( L"abcdefghijklmnopqrstuvwxyz01234567890", L"XabXcdXefXghXijXklXmnXopXqrXstXuvXwxXyzX01X23X45X67X89X0" ); + patches = dmp.patch_make( "abcdefghijklmnopqrstuvwxyz01234567890", "XabXcdXefXghXijXklXmnXopXqrXstXuvXwxXyzX01X23X45X67X89X0" ); dmp.patch_splitMax( patches ); - assertEquals( "patch_splitMax: #1.", L"@@ -1,32 +1,46 @@\n+X\n ab\n+X\n cd\n+X\n ef\n+X\n gh\n+X\n ij\n+X\n kl\n+X\n mn\n+X\n op\n+X\n qr\n+X\n st\n+X\n uv\n+X\n wx\n+X\n yz\n+X\n 012345\n@@ -25,13 +39,18 @@\n zX01\n+X\n 23\n+X\n 45\n+X\n 67\n+X\n 89\n+X\n 0\n", dmp.patch_toText( patches ) ); + assertEquals( "patch_splitMax: #1.", "@@ -1,32 +1,46 @@\n+X\n ab\n+X\n cd\n+X\n ef\n+X\n gh\n+X\n ij\n+X\n kl\n+X\n mn\n+X\n op\n+X\n qr\n+X\n st\n+X\n uv\n+X\n wx\n+X\n yz\n+X\n 012345\n@@ -25,13 +39,18 @@\n zX01\n+X\n 23\n+X\n 45\n+X\n 67\n+X\n 89\n+X\n 0\n", dmp.patch_toText( patches ) ); - patches = dmp.patch_make( L"abcdef1234567890123456789012345678901234567890123456789012345678901234567890uvwxyz", L"abcdefuvwxyz" ); + patches = dmp.patch_make( "abcdef1234567890123456789012345678901234567890123456789012345678901234567890uvwxyz", "abcdefuvwxyz" ); std::wstring oldToText = dmp.patch_toText( patches ); dmp.patch_splitMax( patches ); assertEquals( "patch_splitMax: #2.", oldToText, dmp.patch_toText( patches ) ); - patches = dmp.patch_make( L"1234567890123456789012345678901234567890123456789012345678901234567890", L"abc" ); + patches = dmp.patch_make( "1234567890123456789012345678901234567890123456789012345678901234567890", "abc" ); dmp.patch_splitMax( patches ); - assertEquals( "patch_splitMax: #3.", L"@@ -1,32 +1,4 @@\n-1234567890123456789012345678\n 9012\n@@ -29,32 +1,4 @@\n-9012345678901234567890123456\n 7890\n@@ -57,14 +1,3 @@\n-78901234567890\n+abc\n", dmp.patch_toText( patches ) ); + assertEquals( "patch_splitMax: #3.", "@@ -1,32 +1,4 @@\n-1234567890123456789012345678\n 9012\n@@ -29,32 +1,4 @@\n-9012345678901234567890123456\n 7890\n@@ -57,14 +1,3 @@\n-78901234567890\n+abc\n", dmp.patch_toText( patches ) ); - patches = dmp.patch_make( L"abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1", L"abcdefghij , h : 1 , t : 1 abcdefghij , h : 1 , t : 1 abcdefghij , h : 0 , t : 1" ); + patches = dmp.patch_make( "abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1", "abcdefghij , h : 1 , t : 1 abcdefghij , h : 1 , t : 1 abcdefghij , h : 0 , t : 1" ); dmp.patch_splitMax( patches ); - assertEquals( "patch_splitMax: #4.", L"@@ -2,32 +2,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n@@ -29,32 +29,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n", dmp.patch_toText( patches ) ); + assertEquals( "patch_splitMax: #4.", "@@ -2,32 +2,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n@@ -29,32 +29,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n", dmp.patch_toText( patches ) ); } void diff_match_patch_test::testPatchAddPadding() { TPatchVector patches; - patches = dmp.patch_make( L"", L"test" ); - assertEquals( "patch_addPadding: Both edges full", L"@@ -0,0 +1,4 @@\n+test\n", dmp.patch_toText( patches ) ); + patches = dmp.patch_make( "", "test" ); + assertEquals( "patch_addPadding: Both edges ful", "@@ -0,0 +1,4 @@\n+test\n", dmp.patch_toText( patches ) ); dmp.patch_addPadding( patches ); - assertEquals( "patch_addPadding: Both edges full.", L"@@ -1,8 +1,12 @@\n %01%02%03%04\n+test\n %01%02%03%04\n", dmp.patch_toText( patches ) ); + assertEquals( "patch_addPadding: Both edges full.", "@@ -1,8 +1,12 @@\n %01%02%03%04\n+test\n %01%02%03%04\n", dmp.patch_toText( patches ) ); - patches = dmp.patch_make( L"XY", L"XtestY" ); - assertEquals( "patch_addPadding: Both edges partial.", L"@@ -1,2 +1,6 @@\n X\n+test\n Y\n", dmp.patch_toText( patches ) ); + patches = dmp.patch_make( "XY", "XtestY" ); + assertEquals( "patch_addPadding: Both edges partial.", "@@ -1,2 +1,6 @@\n X\n+test\n Y\n", dmp.patch_toText( patches ) ); dmp.patch_addPadding( patches ); - assertEquals( "patch_addPadding: Both edges partial.", L"@@ -2,8 +2,12 @@\n %02%03%04X\n+test\n Y%01%02%03\n", dmp.patch_toText( patches ) ); + assertEquals( "patch_addPadding: Both edges partial.", "@@ -2,8 +2,12 @@\n %02%03%04X\n+test\n Y%01%02%03\n", dmp.patch_toText( patches ) ); - patches = dmp.patch_make( L"XXXXYYYY", L"XXXXtestYYYY" ); - assertEquals( "patch_addPadding: Both edges none.", L"@@ -1,8 +1,12 @@\n XXXX\n+test\n YYYY\n", dmp.patch_toText( patches ) ); + patches = dmp.patch_make( "XXXXYYYY", "XXXXtestYYYY" ); + assertEquals( "patch_addPadding: Both edges none.", "@@ -1,8 +1,12 @@\n XXXX\n+test\n YYYY\n", dmp.patch_toText( patches ) ); dmp.patch_addPadding( patches ); - assertEquals( "patch_addPadding: Both edges none.", L"@@ -5,8 +5,12 @@\n XXXX\n+test\n YYYY\n", dmp.patch_toText( patches ) ); + assertEquals( "patch_addPadding: Both edges none.", "@@ -5,8 +5,12 @@\n XXXX\n+test\n YYYY\n", dmp.patch_toText( patches ) ); } void diff_match_patch_test::testPatchApply() @@ -895,94 +895,95 @@ void diff_match_patch_test::testPatchApply() dmp.Match_Threshold = 0.5f; dmp.Patch_DeleteThreshold = 0.5f; TPatchVector patches; - patches = dmp.patch_make( L"", L"" ); - auto results = dmp.patch_apply( patches, L"Hello world." ); + patches = dmp.patch_make( "", "" ); + auto results = dmp.patch_apply( patches, "Hello world." ); auto &&boolArray = results.second; std::wstring resultStr = results.first + L"\t" + std::to_wstring( boolArray.size() ); assertEquals( "patch_apply: nullptr case.", L"Hello world.\t0", resultStr ); - patches = dmp.patch_make( L"The quick brown fox jumps over the lazy dog.", L"That quick brown fox jumped over a lazy dog." ); - assertEquals( "patch_apply: Exact match.", L"@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n", dmp.patch_toText( patches ) ); + patches = dmp.patch_make( "The quick brown fox jumps over the lazy dog.", "That quick brown fox jumped over a lazy dog." ); + assertEquals( "patch_apply: Exact match.", "@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n", dmp.patch_toText( patches ) ); - results = dmp.patch_apply( patches, L"The quick brown fox jumps over the lazy dog." ); + results = dmp.patch_apply( patches, "The quick brown fox jumps over the lazy dog." ); boolArray = results.second; - resultStr = results.first + L"\t" + ( boolArray[ 0 ] ? L"true" : L"false" ) + L"\t" + ( boolArray[ 1 ] ? L"true" : L"false" ); - assertEquals( "patch_apply: Exact match.", L"That quick brown fox jumped over a lazy dog.\ttrue\ttrue", resultStr ); + resultStr = results.first + to_wstring( boolArray ); + + assertEquals( "patch_apply: Exact match.", "That quick brown fox jumped over a lazy dog.\ttrue\ttrue", resultStr ); - results = dmp.patch_apply( patches, L"The quick red rabbit jumps over the tired tiger." ); + results = dmp.patch_apply( patches, "The quick red rabbit jumps over the tired tiger." ); boolArray = results.second; - resultStr = results.first + L"\t" + ( boolArray[ 0 ] ? L"true" : L"false" ) + L"\t" + ( boolArray[ 1 ] ? L"true" : L"false" ); - assertEquals( "patch_apply: Partial match.", L"That quick red rabbit jumped over a tired tiger.\ttrue\ttrue", resultStr ); + resultStr = results.first + to_wstring( boolArray ); + assertEquals( "patch_apply: Partial match.", "That quick red rabbit jumped over a tired tiger.\ttrue\ttrue", resultStr ); - results = dmp.patch_apply( patches, L"I am the very model of a modern major general." ); + results = dmp.patch_apply( patches, "I am the very model of a modern major general." ); boolArray = results.second; - resultStr = results.first + L"\t" + ( boolArray[ 0 ] ? L"true" : L"false" ) + L"\t" + ( boolArray[ 1 ] ? L"true" : L"false" ); - assertEquals( "patch_apply: Failed match.", L"I am the very model of a modern major general.\tfalse\tfalse", resultStr ); + resultStr = results.first + to_wstring( boolArray ); + assertEquals( "patch_apply: Failed match.", "I am the very model of a modern major general.\tfalse\tfalse", resultStr ); - patches = dmp.patch_make( L"x1234567890123456789012345678901234567890123456789012345678901234567890y", L"xabcy" ); - results = dmp.patch_apply( patches, L"x123456789012345678901234567890-----++++++++++-----123456789012345678901234567890y" ); + patches = dmp.patch_make( "x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy" ); + results = dmp.patch_apply( patches, "x123456789012345678901234567890-----++++++++++-----123456789012345678901234567890y" ); boolArray = results.second; - resultStr = results.first + L"\t" + ( boolArray[ 0 ] ? L"true" : L"false" ) + L"\t" + ( boolArray[ 1 ] ? L"true" : L"false" ); - assertEquals( "patch_apply: Big delete, small change.", L"xabcy\ttrue\ttrue", resultStr ); + resultStr = results.first + to_wstring( boolArray ); + assertEquals( "patch_apply: Big delete, small change.", "xabcy\ttrue\ttrue", resultStr ); - patches = dmp.patch_make( L"x1234567890123456789012345678901234567890123456789012345678901234567890y", L"xabcy" ); - results = dmp.patch_apply( patches, L"x12345678901234567890---------------++++++++++---------------12345678901234567890y" ); + patches = dmp.patch_make( "x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy" ); + results = dmp.patch_apply( patches, "x12345678901234567890---------------++++++++++---------------12345678901234567890y" ); boolArray = results.second; - resultStr = results.first + L"\t" + ( boolArray[ 0 ] ? L"true" : L"false" ) + L"\t" + ( boolArray[ 1 ] ? L"true" : L"false" ); - assertEquals( "patch_apply: Big delete, large change 1.", L"xabc12345678901234567890---------------++++++++++---------------12345678901234567890y\tfalse\ttrue", resultStr ); + resultStr = results.first + to_wstring( boolArray ); + assertEquals( "patch_apply: Big delete, large change 1.", "xabc12345678901234567890---------------++++++++++---------------12345678901234567890y\tfalse\ttrue", resultStr ); dmp.Patch_DeleteThreshold = 0.6f; - patches = dmp.patch_make( L"x1234567890123456789012345678901234567890123456789012345678901234567890y", L"xabcy" ); - results = dmp.patch_apply( patches, L"x12345678901234567890---------------++++++++++---------------12345678901234567890y" ); + patches = dmp.patch_make( "x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy" ); + results = dmp.patch_apply( patches, "x12345678901234567890---------------++++++++++---------------12345678901234567890y" ); boolArray = results.second; - resultStr = results.first + L"\t" + ( boolArray[ 0 ] ? L"true" : L"false" ) + L"\t" + ( boolArray[ 1 ] ? L"true" : L"false" ); - assertEquals( "patch_apply: Big delete, large change 2.", L"xabcy\ttrue\ttrue", resultStr ); + resultStr = results.first + to_wstring( boolArray ); + assertEquals( "patch_apply: Big delete, large change 2.", "xabcy\ttrue\ttrue", resultStr ); dmp.Patch_DeleteThreshold = 0.5f; dmp.Match_Threshold = 0.0f; dmp.Match_Distance = 0; - patches = dmp.patch_make( L"abcdefghijklmnopqrstuvwxyz--------------------1234567890", L"abcXXXXXXXXXXdefghijklmnopqrstuvwxyz--------------------1234567YYYYYYYYYY890" ); - results = dmp.patch_apply( patches, L"ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567890" ); + patches = dmp.patch_make( "abcdefghijklmnopqrstuvwxyz--------------------1234567890", "abcXXXXXXXXXXdefghijklmnopqrstuvwxyz--------------------1234567YYYYYYYYYY890" ); + results = dmp.patch_apply( patches, "ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567890" ); boolArray = results.second; - resultStr = results.first + L"\t" + ( boolArray[ 0 ] ? L"true" : L"false" ) + L"\t" + ( boolArray[ 1 ] ? L"true" : L"false" ); - assertEquals( "patch_apply: Compensate for failed patch.", L"ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567YYYYYYYYYY890\tfalse\ttrue", resultStr ); + resultStr = results.first + to_wstring( boolArray ); + assertEquals( "patch_apply: Compensate for failed patch.", "ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567YYYYYYYYYY890\tfalse\ttrue", resultStr ); dmp.Match_Threshold = 0.5f; dmp.Match_Distance = 1000; - patches = dmp.patch_make( L"", L"test" ); + patches = dmp.patch_make( "", "test" ); std::wstring patchStr = dmp.patch_toText( patches ); - dmp.patch_apply( patches, L"" ); + dmp.patch_apply( patches, "" ); assertEquals( "patch_apply: No side effects.", patchStr, dmp.patch_toText( patches ) ); - patches = dmp.patch_make( L"The quick brown fox jumps over the lazy dog.", L"Woof" ); + patches = dmp.patch_make( "The quick brown fox jumps over the lazy dog.", "Woof" ); patchStr = dmp.patch_toText( patches ); - dmp.patch_apply( patches, L"The quick brown fox jumps over the lazy dog." ); + dmp.patch_apply( patches, "The quick brown fox jumps over the lazy dog." ); assertEquals( "patch_apply: No side effects with major delete.", patchStr, dmp.patch_toText( patches ) ); - patches = dmp.patch_make( L"", L"test" ); - results = dmp.patch_apply( patches, L"" ); + patches = dmp.patch_make( "", "test" ); + results = dmp.patch_apply( patches, "" ); boolArray = results.second; - resultStr = results.first + L"\t" + ( boolArray[ 0 ] ? L"true" : L"false" ); - assertEquals( "patch_apply: Edge exact match.", L"test\ttrue", resultStr ); + resultStr = results.first + L"\t" + to_wstring( boolArray[ 0 ], false ); + assertEquals( "patch_apply: Edge exact match.", "test\ttrue", resultStr ); - patches = dmp.patch_make( L"XY", L"XtestY" ); - results = dmp.patch_apply( patches, L"XY" ); + patches = dmp.patch_make( "XY", "XtestY" ); + results = dmp.patch_apply( patches, "XY" ); boolArray = results.second; - resultStr = results.first + L"\t" + ( boolArray[ 0 ] ? L"true" : L"false" ); - assertEquals( "patch_apply: Near edge exact match.", L"XtestY\ttrue", resultStr ); + resultStr = results.first + L"\t" + to_wstring( boolArray[ 0 ], false ); + assertEquals( "patch_apply: Near edge exact match.", "XtestY\ttrue", resultStr ); - patches = dmp.patch_make( L"y", L"y123" ); - results = dmp.patch_apply( patches, L"x" ); + patches = dmp.patch_make( "y", "y123" ); + results = dmp.patch_apply( patches, "x" ); boolArray = results.second; - resultStr = results.first + L"\t" + ( boolArray[ 0 ] ? L"true" : L"false" ); - assertEquals( "patch_apply: Edge partial match.", L"x123\ttrue", resultStr ); + resultStr = results.first + L"\t" + to_wstring( boolArray[ 0 ] ); + assertEquals( "patch_apply: Edge partial match.", "x123\ttrue", resultStr ); } void diff_match_patch_test::reportFailure( const std::string &strCase, const std::wstring &expected, const std::wstring &actual ) { std::cout << "FAILED : " + strCase + "\n"; - std::wcerr << " Expected: " << expected << L"\n Actual: " << actual << "\n"; + std::wcerr << " Expected: " << expected << "\n Actual: " << actual << "\n"; numFailedTests++; //throw strCase; } @@ -1010,15 +1011,6 @@ void diff_match_patch_test::assertEquals( const std::string &strCase, const std: reportPassed( strCase ); } -void diff_match_patch_test::assertEquals( const std::string &strCase, const std::string &s1, const std::string &s2 ) -{ - if ( s1 != s2 ) - { - reportFailure( strCase, to_wstring( s1 ), to_wstring( s2 ) ); - } - reportPassed( strCase ); -} - void diff_match_patch_test::assertEquals( const std::string &strCase, const Diff &d1, const Diff &d2 ) { if ( d1 != d2 ) @@ -1064,7 +1056,7 @@ void diff_match_patch_test::assertEquals( const std::string &strCase, bool lhs, { if ( lhs != rhs ) { - reportFailure( strCase, lhs ? L"true" : L"false", rhs ? L"true" : L"false" ); + reportFailure( strCase, to_wstring( lhs, false ), to_wstring( rhs, false ) ); } reportPassed( strCase ); } @@ -1073,7 +1065,7 @@ void diff_match_patch_test::assertTrue( const std::string &strCase, bool value ) { if ( !value ) { - reportFailure( strCase, L"true", L"false" ); + reportFailure( strCase, to_wstring( true, false ), to_wstring( false, false ) ); } reportPassed( strCase ); } @@ -1082,15 +1074,15 @@ void diff_match_patch_test::assertFalse( const std::string &strCase, bool value { if ( value ) { - reportFailure( strCase, L"false", L"true" ); + reportFailure( strCase, to_wstring( false, false ), to_wstring( true, false ) ); } reportPassed( strCase ); } // Construct the two texts which made up the diff originally. -TStringVector diff_match_patch_test::diff_rebuildtexts( const TDiffVector &diffs ) +diff_match_patch_test::TStringVector diff_match_patch_test::diff_rebuildtexts( const TDiffVector &diffs ) { - TStringVector text( { L"", L"" } ); + TStringVector text( 2, std::wstring() ); for ( auto &&myDiff : diffs ) { if ( myDiff.operation != INSERT ) diff --git a/cpp17/diff_match_patch_test.h b/cpp17/diff_match_patch_test.h index 4c8b9a5f..eb22dc92 100644 --- a/cpp17/diff_match_patch_test.h +++ b/cpp17/diff_match_patch_test.h @@ -21,132 +21,14 @@ #include -template< typename T > -std::wstring to_wstring( const T & /*value*/, bool /*doubleQuoteEmpty*/ = false ) -{ - assert( false ); - return {}; -} - -template<> -std::wstring to_wstring( const std::string &string, bool doubleQuoteEmpty ) -{ - if ( doubleQuoteEmpty && string.empty() ) - return LR"("")"; - - std::wstring wstring( string.size(), L' ' ); // Overestimate number of code points. - wstring.resize( std::mbstowcs( &wstring[ 0 ], string.c_str(), string.size() ) ); // Shrink to fit. - return wstring; -} - -template<> -std::wstring to_wstring( const TVariant &variant, bool doubleQuoteEmpty ) -{ - std::wstring retVal; - if ( std::holds_alternative< std::wstring >( variant ) ) - retVal = std::get< std::wstring >( variant ); - - if ( doubleQuoteEmpty && retVal.empty() ) - return LR"("")"; - - return retVal; -} - -template<> -std::wstring to_wstring( const Diff &diff, bool doubleQuoteEmpty ) -{ - auto retVal = diff.toString(); - if ( doubleQuoteEmpty && retVal.empty() ) - return LR"("")"; - return retVal; -} - -template<> -std::wstring to_wstring( const Patch &patch, bool doubleQuoteEmpty ) -{ - auto retVal = patch.toString(); - if ( doubleQuoteEmpty && retVal.empty() ) - return LR"("")"; - return retVal; -} - -template<> -std::wstring to_wstring( const wchar_t &value, bool doubleQuoteEmpty ) -{ - if ( doubleQuoteEmpty && ( value == 0 ) ) - return LR"("")"; - - return std::wstring( 1, value ); -} - -template<> -std::wstring to_wstring( const int &value, bool doubleQuoteEmpty ) -{ - return to_wstring( static_cast< wchar_t >( value ), doubleQuoteEmpty ); -} - -template<> -std::wstring to_wstring( const std::wstring &value, bool doubleQuoteEmpty ) -{ - if ( doubleQuoteEmpty && value.empty() ) - return LR"("")"; - - return value; -} - -template< typename T > -std::wstring to_wstring( const std::vector< T > &values, bool doubleQuoteEmpty = false ) -{ - std::wstring retVal = L"("; - bool first = true; - for ( auto &&curr : values ) - { - if ( !first ) - { - retVal += L", "; - } - retVal += to_wstring( curr, doubleQuoteEmpty ); - first = false; - } - retVal += L")"; - return retVal; -} - -template< typename T > -typename std::enable_if_t< std::is_integral_v< T >, std::wstring > to_wstring( const std::initializer_list< T > &values, bool doubleQuoteEmpty = false ) -{ - if ( doubleQuoteEmpty && ( values.size() == 0 ) ) - return LR"(\"\")"; - - std::wstring retVal; - for ( auto &&curr : values ) - { - retVal += to_wstring( curr, false ); - } - return retVal; -} - -template< typename T > -typename std::enable_if_t< !std::is_integral_v< T >, std::wstring > to_wstring( const std::initializer_list< T > &values, bool doubleQuoteEmpty = false ) -{ - std::wstring retVal = L"("; - bool first = true; - for ( auto &&curr : values ) - { - if ( !first ) - { - retVal += L", "; - } - retVal += to_wstring( curr, doubleQuoteEmpty ); - first = false; - } - retVal += L")"; - return retVal; -} - class diff_match_patch_test { public: + using TStringVector = diff_match_patch::TStringVector; + using TCharPosMap = diff_match_patch::TCharPosMap; + using TVariant = diff_match_patch::TVariant; + using TVariantVector = diff_match_patch::TVariantVector; + diff_match_patch_test(); void run_all_tests(); void runTest( std::function< void() > test ); @@ -190,6 +72,155 @@ class diff_match_patch_test std::size_t numFailedTests{ 0 }; diff_match_patch dmp; + template< typename T > + std::wstring to_wstring( const T & /*value*/, bool /*doubleQuoteEmpty*/ = false ) + { + assert( false ); + return {}; + } + + template<> + std::wstring to_wstring( const bool &value, bool /*doubleQuoteOnEmpty*/ ) + { + std::wstring retVal = std::wstring( value ? L"true" : L"false" ); + return retVal; + } + + template<> + std::wstring to_wstring( const std::vector< bool >::reference &value, bool /*doubleQuoteOnEmpty*/ ) + { + std::wstring retVal = std::wstring( value ? L"true" : L"false" ); + return retVal; + } + + template<> + std::wstring to_wstring( const std::string &string, bool doubleQuoteEmpty ) + { + if ( doubleQuoteEmpty && string.empty() ) + return LR"("")"; + + std::wstring wstring( string.size(), L' ' ); // Overestimate number of code points. + wstring.resize( std::mbstowcs( &wstring[ 0 ], string.c_str(), string.size() ) ); // Shrink to fit. + return wstring; + } + + template<> + std::wstring to_wstring( const TVariant &variant, bool doubleQuoteEmpty ) + { + std::wstring retVal; + if ( std::holds_alternative< std::wstring >( variant ) ) + retVal = std::get< std::wstring >( variant ); + + if ( doubleQuoteEmpty && retVal.empty() ) + return LR"("")"; + + return retVal; + } + + template<> + std::wstring to_wstring( const Diff &diff, bool doubleQuoteEmpty ) + { + auto retVal = diff.toString(); + if ( doubleQuoteEmpty && retVal.empty() ) + return LR"("")"; + return retVal; + } + + template<> + std::wstring to_wstring( const Patch &patch, bool doubleQuoteEmpty ) + { + auto retVal = patch.toString(); + if ( doubleQuoteEmpty && retVal.empty() ) + return LR"("")"; + return retVal; + } + + template<> + std::wstring to_wstring( const wchar_t &value, bool doubleQuoteEmpty ) + { + if ( doubleQuoteEmpty && ( value == 0 ) ) + return LR"("")"; + + return std::wstring( 1, value ); + } + + template<> + std::wstring to_wstring( const int &value, bool doubleQuoteEmpty ) + { + return to_wstring( static_cast< wchar_t >( value ), doubleQuoteEmpty ); + } + + template<> + std::wstring to_wstring( const std::wstring &value, bool doubleQuoteEmpty ) + { + if ( doubleQuoteEmpty && value.empty() ) + return LR"("")"; + + return value; + } + + template< typename T > + std::wstring to_wstring( const std::vector< T > &values, bool doubleQuoteEmpty = false ) + { + std::wstring retVal = L"("; + bool first = true; + for ( auto &&curr : values ) + { + if ( !first ) + { + retVal += L", "; + } + retVal += to_wstring( curr, doubleQuoteEmpty ); + first = false; + } + retVal += L")"; + return retVal; + } + + template<> + std::wstring to_wstring( const std::vector< bool > &boolArray, bool doubleQuoteOnEmpty ) + { + std::wstring retVal; + for ( auto &&curr : boolArray ) + { + retVal += L"\t" + to_wstring( curr, doubleQuoteOnEmpty ); + } + return retVal; + } + + + template< typename T > + typename std::enable_if_t< std::is_integral_v< T >, std::wstring > to_wstring( const std::initializer_list< T > &values, bool doubleQuoteEmpty = false ) + { + if ( doubleQuoteEmpty && ( values.size() == 0 ) ) + return LR"(\"\")"; + + std::wstring retVal; + for ( auto &&curr : values ) + { + retVal += to_wstring( curr, false ); + } + return retVal; + } + + template< typename T > + typename std::enable_if_t< !std::is_integral_v< T >, std::wstring > to_wstring( const std::initializer_list< T > &values, bool doubleQuoteEmpty = false ) + { + std::wstring retVal = L"("; + bool first = true; + for ( auto &&curr : values ) + { + if ( !first ) + { + retVal += L", "; + } + retVal += to_wstring( curr, doubleQuoteEmpty ); + first = false; + } + retVal += L")"; + return retVal; + } + // Define equality. template< typename T > void assertEquals( const std::string &strCase, const T &lhs, const T &rhs ) @@ -217,7 +248,9 @@ class diff_match_patch_test void assertEquals( const std::string &strCase, bool lhs, bool rhs ); void assertEquals( const std::string &strCase, std::size_t n1, std::size_t n2 ); void assertEquals( const std::string &strCase, const std::wstring &s1, const std::wstring &s2 ); - void assertEquals( const std::string &strCase, const std::string &s1, const std::string &s2 ); + void assertEquals( const std::string &strCase, const std::string &s1, const std::string &s2 ) { return assertEquals( strCase, ::to_wstring( s1 ), ::to_wstring( s2 ) ); } + void assertEquals( const std::string &strCase, const std::wstring &s1, const std::string &s2 ) { return assertEquals( strCase, s1, ::to_wstring( s2 ) ); } + void assertEquals( const std::string &strCase, const std::string &s1, const std::wstring &s2 ) { return assertEquals( strCase, ::to_wstring( s1 ), s2 ); } void assertEquals( const std::string &strCase, const Diff &d1, const Diff &d2 ); void assertEquals( const std::string &strCase, const TVariant &var1, const TVariant &var2 ); void assertEquals( const std::string &strCase, const TCharPosMap &m1, const TCharPosMap &m2 ); @@ -230,7 +263,7 @@ class diff_match_patch_test void reportPassed( const std::string &strCase ); // Construct the two texts which made up the diff originally. - TStringVector diff_rebuildtexts( const std::vector< Diff > &diffs ); + TStringVector diff_rebuildtexts( const TDiffVector &diffs ); }; #endif // DIFF_MATCH_PATCH_TEST_H From 28bcdf2ebb7d9d1068125739d14fdd3acb3bca9b Mon Sep 17 00:00:00 2001 From: Scott Aron Bloom Date: Mon, 29 Jan 2024 17:08:04 -0800 Subject: [PATCH 03/15] Unit tests pass, removing unecessary changes, remove utilities into utisl file --- cpp17/CMakeLists.txt | 52 +- cpp17/diff_match_patch.cpp | 536 ++++++------------- cpp17/diff_match_patch.h | 107 ++-- cpp17/diff_match_patch.pro | 19 - cpp17/diff_match_patch_test.cpp | 169 +----- cpp17/diff_match_patch_test.h | 160 +----- cpp17/diff_match_patch_test_assertEquals.cpp | 163 ++++++ cpp17/diff_match_patch_utils.cpp | 198 +++++++ cpp17/diff_match_patch_utils.h | 288 ++++++++++ cpp17/include.cmake | 23 - 10 files changed, 898 insertions(+), 817 deletions(-) delete mode 100644 cpp17/diff_match_patch.pro create mode 100644 cpp17/diff_match_patch_test_assertEquals.cpp create mode 100644 cpp17/diff_match_patch_utils.cpp create mode 100644 cpp17/diff_match_patch_utils.h delete mode 100644 cpp17/include.cmake diff --git a/cpp17/CMakeLists.txt b/cpp17/CMakeLists.txt index e58cbd06..a4280440 100644 --- a/cpp17/CMakeLists.txt +++ b/cpp17/CMakeLists.txt @@ -1,44 +1,24 @@ cmake_minimum_required(VERSION 3.22) -find_package(IncludeProjectSettings REQUIRED) -include( ${CMAKE_CURRENT_LIST_DIR}/include.cmake ) +SET( _PROJECT_NAME diff_match_patch_cpp17 ) project( ${_PROJECT_NAME} ) -IncludeProjectSettings(QT ${USE_QT}) -add_library(${_PROJECT_NAME} STATIC - ${_PROJECT_DEPENDENCIES} +add_library(${_PROJECT_NAME} + STATIC + diff_match_patch.cpp + diff_match_patch.h + diff_match_patch_utils.cpp + diff_match_patch_utils.h ) -set_target_properties( ${_PROJECT_NAME} PROPERTIES FOLDER ${FOLDER_NAME} ) + target_include_directories( ${_PROJECT_NAME} PUBLIC ${CMAKE_SOURCE_DIR} ) -target_link_libraries( ${_PROJECT_NAME} - PUBLIC - ${project_pub_DEPS} - PRIVATE - ${project_pri_DEPS} -) +target_link_libraries( ${_PROJECT_NAME} ) -set( testProjectName "" ) -SET( TEST_SOURCE_FILES - diff_match_patch_test.cpp - diff_match_patch_test.h - ) - -SAB_UNIT_TEST(diff_match_patch_cpp17 - diff_match_patch_test.cpp - "gtest;gmock;diff_match_patch_cpp17" - testProjectName - ${TEST_SOURCE_FILES} - ) - +SET( TEST_NAME "${_PROJECT_NAME}_test" ) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED YES) -set_target_properties( ${testProjectName} PROPERTIES - VS_DEBUGGER_WORKING_DIRECTORY "$" - VS_DEBUGGER_COMMAND "$" - VS_DEBUGGER_ENVIRONMENT "PATH=${DEBUG_PATH}" -) -if( WIN32 ) - set_target_properties( ${testProjectName} PROPERTIES - CXX_STANDARD 17 - ) -target_include_directories( ${testProjectName} PUBLIC ${CMAKE_SOURCE_DIR}) -endif() +project( ${TEST_NAME} ) +add_executable( ${TEST_NAME} diff_match_patch_test.cpp diff_match_patch_test.h diff_match_patch_test_assertEquals.cpp) + +target_include_directories( ${TEST_NAME} PUBLIC ${CMAKE_SOURCE_DIR}) +target_link_libraries( diff_match_patch_cpp17_test ${_PROJECT_NAME} ) +add_test( ${TEST_NAME} ${TEST_NAME} ) diff --git a/cpp17/diff_match_patch.cpp b/cpp17/diff_match_patch.cpp index 0b799449..1dc092fa 100644 --- a/cpp17/diff_match_patch.cpp +++ b/cpp17/diff_match_patch.cpp @@ -17,6 +17,7 @@ */ #include "diff_match_patch.h" +#include "diff_match_patch_utils.h" #include #include @@ -54,6 +55,16 @@ Diff::Diff( Operation _operation, const wchar_t *_text ) : { } +Diff::Diff( Operation _operation, const std::string &_text ) : + Diff( _operation, NUtils::to_wstring( _text ) ) +{ +} + +Diff::Diff( Operation _operation, const char *_text ) : + Diff( _operation, std::string( _text ) ) +{ +} + std::wstring Diff::strOperation( Operation op ) { switch ( op ) @@ -116,7 +127,7 @@ Patch::Patch( std::wstring &text ) { throw std::wstring( L"Invalid patch string: " + text ); } - start1 = diff_match_patch::toInt( matches[ 1 ].str() ); + start1 = NUtils::toInt( matches[ 1 ].str() ); if ( !matches[ 2 ].length() ) { start1--; @@ -129,10 +140,10 @@ Patch::Patch( std::wstring &text ) else { start1--; - length1 = diff_match_patch::toInt( matches[ 2 ].str() ); + length1 = NUtils::toInt( matches[ 2 ].str() ); } - start2 = diff_match_patch::toInt( matches[ 3 ].str() ); + start2 = NUtils::toInt( matches[ 3 ].str() ); if ( !matches[ 4 ].length() ) { start2--; @@ -145,7 +156,7 @@ Patch::Patch( std::wstring &text ) else { start2--; - length2 = diff_match_patch::toInt( matches[ 4 ].str() ); + length2 = NUtils::toInt( matches[ 4 ].str() ); } text.erase( text.begin() ); } @@ -183,7 +194,7 @@ std::wstring Patch::toString() const text += L" "; break; } - text += std::wstring( diff_match_patch::toPercentEncoding( aDiff.text, L" !~*'();/?:@&=+$,#" ) ) + std::wstring( L"\n" ); + text += NUtils::toPercentEncoding( aDiff.text, L" !~*'();/?:@&=+$,#" ) + std::wstring( L"\n" ); } return text; @@ -301,6 +312,21 @@ TDiffVector diff_match_patch::diff_main( const std::wstring &text1, const std::w return diffs; } +TDiffVector diff_match_patch::diff_main( const std::string &text1, const std::string &text2 ) +{ + return diff_main( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ) ); +} + +TDiffVector diff_match_patch::diff_main( const std::string &text1, const std::string &text2, bool checklines ) +{ + return diff_main( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ), checklines ); +} + +TDiffVector diff_match_patch::diff_main( const std::string &text1, const std::string &text2, bool checklines, clock_t deadline ) +{ + return diff_main( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ), checklines, deadline ); +} + TDiffVector diff_match_patch::diff_compute( const std::wstring &text1, const std::wstring &text2, bool checklines, clock_t deadline ) { TDiffVector diffs; @@ -371,6 +397,11 @@ TDiffVector diff_match_patch::diff_compute( const std::wstring &text1, const std return diff_bisect( text1, text2, deadline ); } +TDiffVector diff_match_patch::diff_compute( const std::string &text1, const std::string &text2, bool checklines, clock_t deadline ) +{ + return diff_compute( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ), checklines, deadline ); +} + TDiffVector diff_match_patch::diff_lineMode( std::wstring text1, std::wstring text2, clock_t deadline ) { // Scan the text on a line-by-line basis first. @@ -433,6 +464,11 @@ TDiffVector diff_match_patch::diff_lineMode( std::wstring text1, std::wstring te return diffs; } +TDiffVector diff_match_patch::diff_lineMode( std::string text1, std::string text2, clock_t deadline ) +{ + return diff_lineMode( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ), deadline ); +} + // using int64_t rather thant size_t due to the backward walking nature of the algorithm TDiffVector diff_match_patch::diff_bisect( const std::wstring &text1, const std::wstring &text2, clock_t deadline ) { @@ -564,6 +600,11 @@ TDiffVector diff_match_patch::diff_bisect( const std::wstring &text1, const std: return diffs; } +TDiffVector diff_match_patch::diff_bisect( const std::string &text1, const std::string &text2, clock_t deadline ) +{ + return diff_bisect( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ), deadline ); +} + TDiffVector diff_match_patch::diff_bisectSplit( const std::wstring &text1, const std::wstring &text2, std::size_t x, std::size_t y, clock_t deadline ) { auto text1a = text1.substr( 0, x ); @@ -579,6 +620,11 @@ TDiffVector diff_match_patch::diff_bisectSplit( const std::wstring &text1, const return diffs; } +TDiffVector diff_match_patch::diff_bisectSplit( const std::string &text1, const std::string &text2, std::size_t x, std::size_t y, clock_t deadline ) +{ + return diff_bisectSplit( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ), x, y, deadline ); +} + diff_match_patch::TVariantVector diff_match_patch::diff_linesToChars( const std::wstring &text1, const std::wstring &text2 ) { TStringVector lineArray; @@ -600,6 +646,11 @@ diff_match_patch::TVariantVector diff_match_patch::diff_linesToChars( const std: return listRet; } +std::vector< diff_match_patch::diff_match_patch::TVariant > diff_match_patch::diff_linesToChars( const std::string &text1, const std::string &text2 ) +{ + return diff_linesToChars( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ) ); +} + std::wstring diff_match_patch::diff_linesToCharsMunge( const std::wstring &text, TStringVector &lineArray, std::unordered_map< std::wstring, std::size_t > &lineHash ) { std::size_t lineStart = 0; @@ -665,6 +716,11 @@ std::size_t diff_match_patch::diff_commonPrefix( const std::wstring &text1, cons return n; } +std::size_t diff_match_patch::diff_commonPrefix( const std::string &text1, const std::string &text2 ) +{ + return diff_commonPrefix( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ) ); +} + std::size_t diff_match_patch::diff_commonSuffix( const std::wstring &text1, const std::wstring &text2 ) { // Performance analysis: http://neil.fraser.name/news/2007/10/09/ @@ -681,6 +737,11 @@ std::size_t diff_match_patch::diff_commonSuffix( const std::wstring &text1, cons return n; } +std::size_t diff_match_patch::diff_commonSuffix( const std::string &text1, const std::string &text2 ) +{ + return diff_commonSuffix( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ) ); +} + std::size_t diff_match_patch::diff_commonOverlap( const std::wstring &text1, const std::wstring &text2 ) { // Cache the text lengths to prevent multiple calls. @@ -734,6 +795,11 @@ std::size_t diff_match_patch::diff_commonOverlap( const std::wstring &text1, con } } +std::size_t diff_match_patch::diff_commonOverlap( const std::string &text1, const std::string &text2 ) +{ + return diff_commonOverlap( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ) ); +} + diff_match_patch::TStringVector diff_match_patch::diff_halfMatch( const std::wstring &text1, const std::wstring &text2 ) { if ( Diff_Timeout <= 0 ) @@ -783,6 +849,11 @@ diff_match_patch::TStringVector diff_match_patch::diff_halfMatch( const std::wst } } +diff_match_patch::TStringVector diff_match_patch::diff_halfMatch( const std::string &text1, const std::string &text2 ) +{ + return diff_halfMatch( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ) ); +} + diff_match_patch::TStringVector diff_match_patch::diff_halfMatchI( const std::wstring &longtext, const std::wstring &shorttext, std::size_t i ) { // Start with a 1/4 length substring at position i as a seed. @@ -815,6 +886,11 @@ diff_match_patch::TStringVector diff_match_patch::diff_halfMatchI( const std::ws } } +diff_match_patch::TStringVector diff_match_patch::diff_halfMatchI( const std::string &longtext, const std::string &shorttext, std::size_t i ) +{ + return diff_halfMatchI( NUtils::to_wstring( longtext ), NUtils::to_wstring( shorttext ), i ); +} + void diff_match_patch::diff_cleanupSemantic( TDiffVector &diffs ) { if ( diffs.empty() ) @@ -1059,6 +1135,11 @@ int64_t diff_match_patch::diff_cleanupSemanticScore( const std::wstring &one, co return 0; } +int64_t diff_match_patch::diff_cleanupSemanticScore( const std::string &one, const std::string &two ) +{ + return diff_cleanupSemanticScore( NUtils::to_wstring( one ), NUtils::to_wstring( two ) ); +} + // Define some regex patterns for matching boundaries. std::wregex diff_match_patch::BLANKLINEEND = std::wregex( LR"(\n\r?\n$)" ); std::wregex diff_match_patch::BLANKLINESTART = std::wregex( LR"(^\r?\n\r?\n)" ); @@ -1209,15 +1290,15 @@ void diff_match_patch::diff_cleanupMerge( TDiffVector &diffs ) } // Delete the offending records and add the merged ones. pointer -= count_delete + count_insert; - Splice( diffs, pointer, count_delete + count_insert ); + NUtils::Splice( diffs, pointer, count_delete + count_insert ); if ( !text_delete.empty() ) { - Splice( diffs, pointer, 0, { Diff( DELETE, text_delete ) } ); + NUtils::Splice( diffs, pointer, 0, { Diff( DELETE, text_delete ) } ); pointer++; } if ( !text_insert.empty() ) { - Splice( diffs, pointer, 0, { Diff( INSERT, text_insert ) } ); + NUtils::Splice( diffs, pointer, 0, { Diff( INSERT, text_insert ) } ); pointer++; } pointer++; @@ -1255,12 +1336,12 @@ void diff_match_patch::diff_cleanupMerge( TDiffVector &diffs ) if ( diffs[ pointer - 1 ].operation == EQUAL && diffs[ pointer + 1 ].operation == EQUAL ) { // This is a single edit surrounded by equalities. - if ( endsWith( diffs[ pointer ].text, diffs[ pointer - 1 ].text ) ) + if ( NUtils::endsWith( diffs[ pointer ].text, diffs[ pointer - 1 ].text ) ) { // Shift the edit over the previous equality. diffs[ pointer ].text = diffs[ pointer - 1 ].text + diffs[ pointer ].text.substr( 0, diffs[ pointer ].text.length() - diffs[ pointer - 1 ].text.length() ); diffs[ pointer + 1 ].text = diffs[ pointer - 1 ].text + diffs[ pointer + 1 ].text; - Splice( diffs, pointer - 1, 1 ); + NUtils::Splice( diffs, pointer - 1, 1 ); changes = true; } else if ( diffs[ pointer ].text.find( diffs[ pointer + 1 ].text ) == 0 ) @@ -1268,7 +1349,7 @@ void diff_match_patch::diff_cleanupMerge( TDiffVector &diffs ) // Shift the edit over the next equality. diffs[ pointer - 1 ].text += diffs[ pointer + 1 ].text; diffs[ pointer ].text = diffs[ pointer ].text.substr( diffs[ pointer + 1 ].text.length() ) + diffs[ pointer + 1 ].text; - Splice( diffs, pointer + 1, 1 ); + NUtils::Splice( diffs, pointer + 1, 1 ); changes = true; } } @@ -1324,10 +1405,10 @@ std::wstring diff_match_patch::diff_prettyHtml( const TDiffVector &diffs ) for ( auto &&aDiff : diffs ) { text = aDiff.text; - replace( text, L"&", L"&" ); - replace( text, L"<", L"<" ); - replace( text, L">", L">" ); - replace( text, L"\n", L"¶
" ); + NUtils::replace( text, L"&", L"&" ); + NUtils::replace( text, L"<", L"<" ); + NUtils::replace( text, L">", L">" ); + NUtils::replace( text, L"\n", L"¶
" ); switch ( aDiff.operation ) { case INSERT: @@ -1405,7 +1486,7 @@ std::wstring diff_match_patch::diff_toDelta( const TDiffVector &diffs ) switch ( aDiff.operation ) { case INSERT: - text += L"+" + toPercentEncoding( aDiff.text, L" !~*'();/?:@&=+$,#" ) + L"\t"; + text += L"+" + NUtils::toPercentEncoding( aDiff.text, L" !~*'();/?:@&=+$,#" ) + L"\t"; break; case DELETE: text += L"-" + std::to_wstring( aDiff.text.length() ) + L"\t"; @@ -1427,7 +1508,7 @@ TDiffVector diff_match_patch::diff_fromDelta( const std::wstring &text1, const s { TDiffVector diffs; std::size_t pointer = 0; // Cursor in text1 - TStringVector tokens = splitString( delta, L"\t", false ); + auto tokens = NUtils::splitString( delta, L"\t", false ); for ( auto &&token : tokens ) { if ( token.empty() ) @@ -1441,15 +1522,15 @@ TDiffVector diff_match_patch::diff_fromDelta( const std::wstring &text1, const s switch ( token[ 0 ] ) { case '+': - replace( param, L"+", L"%2b" ); - param = fromPercentEncoding( param ); + NUtils::replace( param, L"+", L"%2b" ); + param = NUtils::fromPercentEncoding( param ); diffs.emplace_back( INSERT, param ); break; case '-': // Fall through. case '=': { - auto n = toInt( param ); + auto n = NUtils::toInt( param ); if ( n < 0 ) { throw std::wstring( L"Negative number in diff_fromDelta: " + param ); @@ -1483,6 +1564,11 @@ TDiffVector diff_match_patch::diff_fromDelta( const std::wstring &text1, const s return diffs; } +TDiffVector diff_match_patch::diff_fromDelta( const std::string &text1, const std::string &delta ) +{ + return diff_fromDelta( NUtils::to_wstring( text1 ), NUtils::to_wstring( delta ) ); +} + // MATCH FUNCTIONS std::size_t diff_match_patch::match_main( const std::wstring &text, const std::wstring &pattern, std::size_t loc ) @@ -1512,6 +1598,11 @@ std::size_t diff_match_patch::match_main( const std::wstring &text, const std::w } } +std::size_t diff_match_patch::match_main( const std::string &text, const std::string &pattern, std::size_t loc ) +{ + return match_main( NUtils::to_wstring( text ), NUtils::to_wstring( pattern ), loc ); +} + std::size_t diff_match_patch::match_bitap( const std::wstring &text, const std::wstring &pattern, std::size_t loc ) { if ( !( Match_MaxBits == 0 || pattern.length() <= Match_MaxBits ) ) @@ -1631,6 +1722,11 @@ std::size_t diff_match_patch::match_bitap( const std::wstring &text, const std:: return best_loc; } +std::size_t diff_match_patch::match_bitap( const std::string &text, const std::string &pattern, std::size_t loc ) +{ + return match_bitap( NUtils::to_wstring( text ), NUtils::to_wstring( pattern ), loc ); +} + double diff_match_patch::match_bitapScore( int64_t e, int64_t x, int64_t loc, const std::wstring &pattern ) { const float accuracy = static_cast< float >( e ) / pattern.length(); @@ -1664,6 +1760,11 @@ diff_match_patch::TCharPosMap diff_match_patch::match_alphabet( const std::wstri return s; } +diff_match_patch::TCharPosMap diff_match_patch::match_alphabet( const std::string &pattern ) +{ + return match_alphabet( NUtils::to_wstring( pattern ) ); +} + // PATCH FUNCTIONS void diff_match_patch::patch_addContext( Patch &patch, const std::wstring &text ) @@ -1706,6 +1807,11 @@ void diff_match_patch::patch_addContext( Patch &patch, const std::wstring &text patch.length2 += prefix.length() + suffix.length(); } +void diff_match_patch::patch_addContext( Patch &patch, const std::string &text ) +{ + return patch_addContext( patch, NUtils::to_wstring( text ) ); +} + TPatchVector diff_match_patch::patch_make( const std::wstring &text1, const std::wstring &text2 ) { // Check for null inputs not needed since null can't be passed via std::wstring @@ -1820,6 +1926,21 @@ TPatchVector diff_match_patch::patch_make( const std::wstring &text1, const TDif return patches; } +TPatchVector diff_match_patch::patch_make( const std::string &text1, const TDiffVector &diffs ) +{ + return patch_make( NUtils::to_wstring( text1 ), diffs ); +} + +TPatchVector diff_match_patch::patch_make( const std::string &text1, const std::string &text2, const TDiffVector &diffs ) +{ + return patch_make( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ), diffs ); +} + +TPatchVector diff_match_patch::patch_make( const std::string &text1, const std::string &text2 ) +{ + return patch_make( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ) ); +} + TPatchVector diff_match_patch::patch_deepCopy( const TPatchVector &patches ) { TPatchVector patchesCopy; @@ -1955,17 +2076,23 @@ std::pair< std::wstring, std::vector< bool > > diff_match_patch::patch_apply( TP text = safeMid( text, nullPadding.length(), text.length() - 2 * nullPadding.length() ); return { text, results }; } + +std::pair< std::wstring, std::vector< bool > > diff_match_patch::patch_apply( TPatchVector patches, std::string text ) +{ + return patch_apply( patches, NUtils::to_wstring( text ) ); +} + std::wstring diff_match_patch::patch_addPadding( TPatchVector &patches ) { auto paddingLength = Patch_Margin; std::wstring nullPadding; for ( char x = 1; x <= paddingLength; x++ ) { - nullPadding += to_wstring( x ); + nullPadding += NUtils::to_wstring( x ); } // Bump all the patches forward. - for( auto && aPatch : patches ) + for ( auto &&aPatch : patches ) { aPatch.start1 += paddingLength; aPatch.start2 += paddingLength; @@ -2018,72 +2145,6 @@ std::wstring diff_match_patch::patch_addPadding( TPatchVector &patches ) return nullPadding; } -#ifdef NO -std::wstring diff_match_patch::patch_addPadding( TPatchVector &patches ) -{ - short paddingLength = Patch_Margin; - std::wstring nullPadding; - for ( short x = 1; x <= paddingLength; x++ ) - { - nullPadding += static_cast< wchar_t >( x ); - } - - // Bump all the patches forward. - auto pointer = patches.begin(); - while ( pointer != patches.end() ) - { - auto &&aPatch = *pointer; - aPatch.start1 += paddingLength; - aPatch.start2 += paddingLength; - } - - // Add some padding on start of first diff. - auto &&firstPatch = patches.front(); - TDiffVector &firstPatchDiffs = firstPatch.diffs; - if ( firstPatchDiffs.empty() || firstPatchDiffs.front().operation != EQUAL ) - { - // Add nullPadding equality. - firstPatchDiffs.emplace( firstPatchDiffs.begin(), EQUAL, nullPadding ); - firstPatch.start1 -= paddingLength; // Should be 0. - firstPatch.start2 -= paddingLength; // Should be 0. - firstPatch.length1 += paddingLength; - firstPatch.length2 += paddingLength; - } - else if ( paddingLength > firstPatchDiffs.front().text.length() ) - { - // Grow first equality. - Diff &firstDiff = firstPatchDiffs.front(); - auto extraLength = paddingLength - firstDiff.text.length(); - firstDiff.text = safeMid( nullPadding, firstDiff.text.length(), paddingLength - firstDiff.text.length() ) + firstDiff.text; - firstPatch.start1 -= extraLength; - firstPatch.start2 -= extraLength; - firstPatch.length1 += extraLength; - firstPatch.length2 += extraLength; - } - - // Add some padding on end of last diff. - Patch &lastPatch = patches.front(); - TDiffVector &lastPatchDiffs = lastPatch.diffs; - if ( lastPatchDiffs.empty() || lastPatchDiffs.back().operation != EQUAL ) - { - // Add nullPadding equality. - lastPatchDiffs.emplace_back( EQUAL, nullPadding ); - lastPatch.length1 += paddingLength; - lastPatch.length2 += paddingLength; - } - else if ( paddingLength > lastPatchDiffs.back().text.length() ) - { - // Grow last equality. - Diff &lastDiff = lastPatchDiffs.back(); - auto extraLength = paddingLength - lastDiff.text.length(); - lastDiff.text += nullPadding.substr( 0, extraLength ); - lastPatch.length1 += extraLength; - lastPatch.length2 += extraLength; - } - - return nullPadding; -} -#endif void diff_match_patch::patch_splitMax( TPatchVector &patches ) { auto patch_size = Match_MaxBits; @@ -2095,7 +2156,7 @@ void diff_match_patch::patch_splitMax( TPatchVector &patches ) } Patch bigpatch = patches[ x ]; // Remove the big old patch. - Splice( patches, x--, 1 ); + NUtils::Splice( patches, x--, 1 ); auto start1 = bigpatch.start1; auto start2 = bigpatch.start2; std::wstring precontext; @@ -2189,136 +2250,12 @@ void diff_match_patch::patch_splitMax( TPatchVector &patches ) } if ( !empty ) { - Splice( patches, ++x, 0ULL, patch ); + NUtils::Splice( patches, ++x, 0ULL, patch ); } } } } -#ifdef NO -void diff_match_patch::patch_splitMax( TPatchVector &patches ) -{ - short patch_size = Match_MaxBits; - std::wstring precontext, postcontext; - Patch patch; - std::size_t start1, start2; - bool empty; - Operation diff_type; - std::wstring diff_text; - auto pointer = patches.begin(); - Patch bigpatch; - - if ( pointer != patches.end() ) - { - bigpatch = *pointer; - } - - while ( !bigpatch.isNull() ) - { - if ( bigpatch.length1 <= patch_size ) - { - bigpatch = ( ( ++pointer ) != patches.end() ) ? *pointer : Patch(); - continue; - } - // Remove the big old patch. - pointer = patches.erase( pointer ); - start1 = bigpatch.start1; - start2 = bigpatch.start2; - precontext.clear(); - while ( !bigpatch.diffs.empty() ) - { - // Create one of several smaller patches. - patch = Patch(); - empty = true; - patch.start1 = start1 - precontext.length(); - patch.start2 = start2 - precontext.length(); - if ( !precontext.empty() ) - { - patch.length1 = patch.length2 = precontext.length(); - patch.diffs.emplace_back( EQUAL, precontext ); - } - while ( !bigpatch.diffs.empty() && patch.length1 < patch_size - Patch_Margin ) - { - diff_type = bigpatch.diffs.front().operation; - diff_text = bigpatch.diffs.front().text; - if ( diff_type == INSERT ) - { - // Insertions are harmless. - patch.length2 += diff_text.length(); - start2 += diff_text.length(); - patch.diffs.emplace_back( bigpatch.diffs.front() ); - bigpatch.diffs.erase( bigpatch.diffs.begin() ); - empty = false; - } - else if ( diff_type == DELETE && patch.diffs.size() == 1 && patch.diffs.front().operation == EQUAL && diff_text.length() > 2 * patch_size ) - { - // This is a large deletion. Let it pass in one chunk. - patch.length1 += diff_text.length(); - start1 += diff_text.length(); - empty = false; - patch.diffs.emplace_back( diff_type, diff_text ); - bigpatch.diffs.erase( bigpatch.diffs.begin() ); - } - else - { - // Deletion or equality. Only take as much as we can stomach. - diff_text = diff_text.substr( 0, std::min( diff_text.length(), ( patch_size > ( patch.length1 + Patch_Margin ) ) ? ( patch_size - patch.length1 - Patch_Margin ) : ( -1 * 1ULL ) ) ); - patch.length1 += diff_text.length(); - start1 += diff_text.length(); - if ( diff_type == EQUAL ) - { - patch.length2 += diff_text.length(); - start2 += diff_text.length(); - } - else - { - empty = false; - } - patch.diffs.emplace_back( diff_type, diff_text ); - if ( diff_text == bigpatch.diffs.front().text ) - { - bigpatch.diffs.erase( bigpatch.diffs.begin() ); - } - else - { - bigpatch.diffs.front().text = safeMid( bigpatch.diffs.front().text, diff_text.length() ); - } - } - } - // Compute the head context for the next patch. - precontext = diff_text2( patch.diffs ); - precontext = safeMid( precontext, precontext.length() - Patch_Margin ); - // Append the end context for this patch. - if ( diff_text1( bigpatch.diffs ).length() > Patch_Margin ) - { - postcontext = diff_text1( bigpatch.diffs ).substr( 0, Patch_Margin ); - } - else - { - postcontext = diff_text1( bigpatch.diffs ); - } - if ( !postcontext.empty() ) - { - patch.length1 += postcontext.length(); - patch.length2 += postcontext.length(); - if ( !patch.diffs.empty() && patch.diffs.back().operation == EQUAL ) - { - patch.diffs.back().text += postcontext; - } - else - { - patch.diffs.emplace_back( EQUAL, postcontext ); - } - } - if ( !empty ) - { - patches.emplace( pointer, patch ); - } - } - bigpatch = ( ( ++pointer ) != patches.end() ) ? *pointer : Patch(); - } -} -#endif std::wstring diff_match_patch::patch_toText( const TPatchVector &patches ) { std::wstring text; @@ -2336,7 +2273,7 @@ TPatchVector diff_match_patch::patch_fromText( const std::wstring &textline ) { return patches; } - auto text = splitString( textline, L"\n", true ); + auto text = NUtils::splitString( textline, L"\n", true ); int textPointer = 0; std::wstring line; while ( textPointer < text.size() ) @@ -2356,8 +2293,8 @@ TPatchVector diff_match_patch::patch_fromText( const std::wstring &textline ) auto sign = text[ textPointer ][ 0 ]; line = text[ textPointer ].substr( 1 ); - replace( line, L"+", L"%2b" ); - line = fromPercentEncoding( line ); + NUtils::replace( line, L"+", L"%2b" ); + line = NUtils::fromPercentEncoding( line ); if ( sign == '-' ) { // Deletion. @@ -2390,6 +2327,11 @@ TPatchVector diff_match_patch::patch_fromText( const std::wstring &textline ) return patches; } +TPatchVector diff_match_patch::patch_fromText( const std::string &textline ) +{ + return patch_fromText( NUtils::to_wstring( textline ) ); +} + std::wstring diff_match_patch::safeMid( const std::wstring &str, std::size_t pos ) { return safeMid( str, pos, std::string::npos ); @@ -2400,164 +2342,30 @@ std::wstring diff_match_patch::safeMid( const std::wstring &str, std::size_t pos return ( pos == str.length() ) ? std::wstring() : str.substr( pos, len ); } -void diff_match_patch::replace( std::wstring &inString, const std::wstring &from, const std::wstring &to ) -{ - std::size_t pos = inString.find( from ); - while ( pos != std::wstring::npos ) - { - inString.replace( pos, from.length(), to ); - pos = inString.find( from, pos + 1 ); - } -} - -wchar_t toHexUpper( wchar_t value ) -{ - return L"0123456789ABCDEF"[ value & 0xF ]; -} - -std::wstring to_wstring( const std::string &string ) -{ - std::wstring_convert< std::codecvt_utf8< wchar_t > > utf8_conv; - return utf8_conv.from_bytes( string ); -} - -std::wstring diff_match_patch::toPercentEncoding( wchar_t c, const std::wstring &exclude, const std::wstring &include ) +std::wstring NUtils::to_wstring( const diff_match_patch::TVariant &variant, bool doubleQuoteEmpty ) { std::wstring retVal; + if ( std::holds_alternative< std::wstring >( variant ) ) + retVal = std::get< std::wstring >( variant ); - if ( ( ( c >= 0x61 && c <= 0x7A ) // ALPHA - || ( c >= 0x41 && c <= 0x5A ) // ALPHA - || ( c >= 0x30 && c <= 0x39 ) // DIGIT - || c == 0x2D // - - || c == 0x2E // . - || c == 0x5F // _ - || c == 0x7E // ~ - || ( exclude.find( c ) != std::string::npos ) ) - && ( include.find( c ) == std::string::npos ) ) - { - retVal = std::wstring( 1, c ); - } - else - { - retVal = L'%'; - retVal += toHexUpper( ( c & 0xf0 ) >> 4 ); - retVal += toHexUpper( c & 0xf ); - } - return retVal; -} - -std::wstring diff_match_patch::toPercentEncoding( const std::wstring &input, const std::wstring &exclude /*= std::wstring()*/, const std::wstring &include /*= std::wstring() */ ) -{ - if ( input.empty() ) - return {}; - std::wstring retVal; - retVal.reserve( input.length() * 3 ); - - static_assert( sizeof( wchar_t ) <= 4, "wchar_t is greater that 32 bit" ); + if ( doubleQuoteEmpty && retVal.empty() ) + return LR"("")"; - auto sz = sizeof( wchar_t ); - std::wstring_convert< std::codecvt_utf8< wchar_t > > utf8_conv; - for ( auto &&c : input ) - { - auto currStr = std::wstring( 1, c ); - auto asBytes = utf8_conv.to_bytes( currStr ); - for ( auto &&ii : asBytes ) - { - if ( ii ) - retVal += diff_match_patch::toPercentEncoding( ii, exclude, include ); - } - } return retVal; } -wchar_t diff_match_patch::getValue( wchar_t ch ) +std::wstring NUtils::to_wstring( const Patch &patch, bool doubleQuoteEmpty ) { - if ( ch >= '0' && ch <= '9' ) - ch -= '0'; - else if ( ch >= 'a' && ch <= 'f' ) - ch = ch - 'a' + 10; - else if ( ch >= 'A' && ch <= 'F' ) - ch = ch - 'A' + 10; - else - throw std::wstring( L"Invalid Character %" ) + ch; - - return ch; -} - -std::wstring diff_match_patch::fromPercentEncoding( const std::wstring &input ) -{ - if ( input.empty() ) - return {}; - std::wstring retVal; - retVal.reserve( input.length() ); - for ( auto ii = 0ULL; ii < input.length(); ++ii ) - { - auto c = input[ ii ]; - if ( c == L'%' && ( ii + 2 ) < input.length() ) - { - auto a = input[ ++ii ]; - auto b = input[ ++ii ]; - a = getValue( a ); - b = getValue( b ); - retVal += wchar_t( ( a << 4 ) | b ); - } - else - { - retVal += c; - } - } + auto retVal = patch.toString(); + if ( doubleQuoteEmpty && retVal.empty() ) + return LR"("")"; return retVal; } -bool diff_match_patch::endsWith( const std::wstring &string, const std::wstring &suffix ) -{ - if ( suffix.length() > string.length() ) - return false; - - return string.compare( string.length() - suffix.length(), suffix.length(), suffix ) == 0; -} - -diff_match_patch::TStringVector diff_match_patch::splitString( const std::wstring &string, const std::wstring &separator, bool skipEmptyParts ) -{ - if ( separator.empty() ) - { - if ( !skipEmptyParts || !string.empty() ) - return { string }; - return {}; - } - - TStringVector strings; - auto prevPos = 0ULL; - auto startPos = string.find_first_of( separator ); - while ( startPos != std::string::npos ) - { - auto start = prevPos ? prevPos + 1 : prevPos; - auto len = prevPos ? ( startPos - prevPos - 1 ) : startPos; - auto curr = string.substr( start, len ); - prevPos = startPos; - if ( !skipEmptyParts || !curr.empty() ) - strings.emplace_back( curr ); - startPos = string.find_first_of( separator, prevPos + 1 ); - } - auto remainder = string.substr( prevPos ? prevPos + 1 : prevPos ); - if ( !skipEmptyParts || !remainder.empty() ) - strings.emplace_back( remainder ); - - return strings; -} - -int64_t diff_match_patch::toInt( const std::wstring &string ) +std::wstring NUtils::to_wstring( const Diff &diff, bool doubleQuoteEmpty ) { - int64_t retVal = 0; - try - { - std::size_t lastPos{}; - retVal = std::stoul( string, &lastPos ); - if ( lastPos != string.length() ) - return 0; - } - catch ( ... ) - { - } + auto retVal = diff.toString(); + if ( doubleQuoteEmpty && retVal.empty() ) + return LR"("")"; return retVal; } diff --git a/cpp17/diff_match_patch.h b/cpp17/diff_match_patch.h index 8dc04568..5ed51b98 100644 --- a/cpp17/diff_match_patch.h +++ b/cpp17/diff_match_patch.h @@ -27,7 +27,7 @@ #include #include #include -#include + /* * Functions for diff, match and patch. * Computes the difference between two texts to create a patch. @@ -74,8 +74,6 @@ enum Operation EQUAL }; -std::wstring to_wstring( const std::string &string ); - /** * Class representing one diff operation. */ @@ -94,14 +92,8 @@ class Diff */ Diff( Operation _operation, const std::wstring &_text ); Diff( Operation _operation, const wchar_t *_text ); - Diff( Operation _operation, const std::string &_text ) : - Diff( _operation, to_wstring( _text ) ) - { - } - Diff( Operation _operation, const char *_text ) : - Diff( _operation, std::string( _text ) ) - { - } + Diff( Operation _operation, const std::string &_text ); + Diff( Operation _operation, const char *_text ); Diff(); inline bool isNull() const; std::wstring toString() const; @@ -196,7 +188,7 @@ class diff_match_patch * @return Linked List of Diff objects. */ TDiffVector diff_main( const std::wstring &text1, const std::wstring &text2 ); - TDiffVector diff_main( const std::string &text1, const std::string &text2 ) { return diff_main( ::to_wstring( text1 ), ::to_wstring( text2 ) ); } + TDiffVector diff_main( const std::string &text1, const std::string &text2 ); /** * Find the differences between two texts. @@ -208,7 +200,7 @@ class diff_match_patch * @return Linked List of Diff objects. */ TDiffVector diff_main( const std::wstring &text1, const std::wstring &text2, bool checklines ); - TDiffVector diff_main( const std::string &text1, const std::string &text2, bool checklines ) { return diff_main( ::to_wstring( text1 ), ::to_wstring( text2 ), checklines ); } + TDiffVector diff_main( const std::string &text1, const std::string &text2, bool checklines ); /** * Find the differences between two texts. Simplifies the problem by @@ -224,7 +216,7 @@ class diff_match_patch */ private: TDiffVector diff_main( const std::wstring &text1, const std::wstring &text2, bool checklines, clock_t deadline ); - TDiffVector diff_main( const std::string &text1, const std::string &text2, bool checklines, clock_t deadline ) { return diff_main( ::to_wstring( text1 ), ::to_wstring( text2 ), checklines, deadline ); } + TDiffVector diff_main( const std::string &text1, const std::string &text2, bool checklines, clock_t deadline ); /** * Find the differences between two texts. Assumes that the texts do not @@ -239,7 +231,7 @@ class diff_match_patch */ private: TDiffVector diff_compute( const std::wstring &text1, const std::wstring &text2, bool checklines, clock_t deadline ); - TDiffVector diff_compute( const std::string &text1, const std::string &text2, bool checklines, clock_t deadline ) { return diff_compute( ::to_wstring( text1 ), ::to_wstring( text2 ), checklines, deadline ); } + TDiffVector diff_compute( const std::string &text1, const std::string &text2, bool checklines, clock_t deadline ); /** * Do a quick line-level diff on both strings, then rediff the parts for @@ -252,7 +244,7 @@ class diff_match_patch */ private: TDiffVector diff_lineMode( std::wstring text1, std::wstring text2, clock_t deadline ); - TDiffVector diff_lineMode( std::string text1, std::string text2, clock_t deadline ) { return diff_lineMode( ::to_wstring( text1 ), ::to_wstring( text2 ), deadline ); } + TDiffVector diff_lineMode( std::string text1, std::string text2, clock_t deadline ); /** * Find the 'middle snake' of a diff, split the problem in two @@ -264,7 +256,7 @@ class diff_match_patch */ protected: TDiffVector diff_bisect( const std::wstring &text1, const std::wstring &text2, clock_t deadline ); - TDiffVector diff_bisect( const std::string &text1, const std::string &text2, clock_t deadline ) { return diff_bisect( ::to_wstring( text1 ), ::to_wstring( text2 ), deadline ); } + TDiffVector diff_bisect( const std::string &text1, const std::string &text2, clock_t deadline ); /** * Given the location of the 'middle snake', split the diff in two parts @@ -278,7 +270,7 @@ class diff_match_patch */ private: TDiffVector diff_bisectSplit( const std::wstring &text1, const std::wstring &text2, std::size_t x, std::size_t y, clock_t deadline ); - TDiffVector diff_bisectSplit( const std::string &text1, const std::string &text2, std::size_t x, std::size_t y, clock_t deadline ) { return diff_bisectSplit( ::to_wstring( text1 ), ::to_wstring( text2 ), x, y, deadline ); } + TDiffVector diff_bisectSplit( const std::string &text1, const std::string &text2, std::size_t x, std::size_t y, clock_t deadline ); /** * Split two texts into a list of strings. Reduce the texts to a string of @@ -291,7 +283,7 @@ class diff_match_patch */ protected: std::vector< TVariant > diff_linesToChars( const std::wstring &text1, const std::wstring &text2 ); // return elems 0 and 1 are std::wstring, elem 2 is TStringVector - std::vector< TVariant > diff_linesToChars( const std::string &text1, const std::string &text2 ) { return diff_linesToChars( ::to_wstring( text1 ), ::to_wstring( text2 ) ); } + std::vector< TVariant > diff_linesToChars( const std::string &text1, const std::string &text2 ); /** * Split a text into a list of strings. Reduce the texts to a string of @@ -321,7 +313,7 @@ class diff_match_patch */ public: std::size_t diff_commonPrefix( const std::wstring &text1, const std::wstring &text2 ); - std::size_t diff_commonPrefix( const std::string &text1, const std::string &text2 ) { return diff_commonPrefix( ::to_wstring( text1 ), ::to_wstring( text2 ) ); } + std::size_t diff_commonPrefix( const std::string &text1, const std::string &text2 ); /** * Determine the common suffix of two strings. @@ -331,7 +323,7 @@ class diff_match_patch */ public: std::size_t diff_commonSuffix( const std::wstring &text1, const std::wstring &text2 ); - std::size_t diff_commonSuffix( const std::string &text1, const std::string &text2 ) { return diff_commonSuffix( ::to_wstring( text1 ), ::to_wstring( text2 ) ); } + std::size_t diff_commonSuffix( const std::string &text1, const std::string &text2 ); /** * Determine if the suffix of one string is the prefix of another. @@ -342,7 +334,7 @@ class diff_match_patch */ protected: std::size_t diff_commonOverlap( const std::wstring &text1, const std::wstring &text2 ); - std::size_t diff_commonOverlap( const std::string &text1, const std::string &text2 ) { return diff_commonOverlap( ::to_wstring( text1 ), ::to_wstring( text2 ) ); } + std::size_t diff_commonOverlap( const std::string &text1, const std::string &text2 ); /** * Do the two texts share a substring which is at least half the length of @@ -356,7 +348,7 @@ class diff_match_patch */ protected: TStringVector diff_halfMatch( const std::wstring &text1, const std::wstring &text2 ); - TStringVector diff_halfMatch( const std::string &text1, const std::string &text2 ) { return diff_halfMatch( ::to_wstring( text1 ), ::to_wstring( text2 ) ); } + TStringVector diff_halfMatch( const std::string &text1, const std::string &text2 ); /** * Does a substring of shorttext exist within longtext such that the @@ -370,7 +362,7 @@ class diff_match_patch */ private: TStringVector diff_halfMatchI( const std::wstring &longtext, const std::wstring &shorttext, std::size_t i ); - TStringVector diff_halfMatchI( const std::string &longtext, const std::string &shorttext, std::size_t i ) { return diff_halfMatchI( ::to_wstring( longtext ), ::to_wstring( shorttext ), i ); } + TStringVector diff_halfMatchI( const std::string &longtext, const std::string &shorttext, std::size_t i ); /** * Reduce the number of edits by eliminating semantically trivial equalities. @@ -398,7 +390,7 @@ class diff_match_patch */ private: int64_t diff_cleanupSemanticScore( const std::wstring &one, const std::wstring &two ); - int64_t diff_cleanupSemanticScore( const std::string &one, const std::string &two ) { return diff_cleanupSemanticScore( ::to_wstring( one ), ::to_wstring( two ) ); } + int64_t diff_cleanupSemanticScore( const std::string &one, const std::string &two ); /** * Reduce the number of edits by eliminating operationally trivial equalities. @@ -480,7 +472,7 @@ class diff_match_patch */ public: TDiffVector diff_fromDelta( const std::wstring &text1, const std::wstring &delta ); - TDiffVector diff_fromDelta( const std::string &text1, const std::string &delta ) { return diff_fromDelta( ::to_wstring( text1 ), ::to_wstring( delta ) ); } + TDiffVector diff_fromDelta( const std::string &text1, const std::string &delta ); // MATCH FUNCTIONS @@ -494,7 +486,7 @@ class diff_match_patch */ public: std::size_t match_main( const std::wstring &text, const std::wstring &pattern, std::size_t loc ); - std::size_t match_main( const std::string &text, const std::string &pattern, std::size_t loc ) { return match_main( ::to_wstring( text ), ::to_wstring( pattern ), loc ); } + std::size_t match_main( const std::string &text, const std::string &pattern, std::size_t loc ); /** * Locate the best instance of 'pattern' in 'text' near 'loc' using the @@ -506,7 +498,7 @@ class diff_match_patch */ protected: std::size_t match_bitap( const std::wstring &text, const std::wstring &pattern, std::size_t loc ); - std::size_t match_bitap( const std::string &text, const std::string &pattern, std::size_t loc ) { return match_bitap( ::to_wstring( text ), ::to_wstring( pattern ), loc ); } + std::size_t match_bitap( const std::string &text, const std::string &pattern, std::size_t loc ); /** * Compute and return the score for a match with e errors and x location. @@ -526,7 +518,7 @@ class diff_match_patch */ protected: TCharPosMap match_alphabet( const std::wstring &pattern ); - TCharPosMap match_alphabet( const std::string &pattern ) { return match_alphabet( ::to_wstring( pattern ) ); } + TCharPosMap match_alphabet( const std::string &pattern ); // PATCH FUNCTIONS @@ -538,7 +530,7 @@ class diff_match_patch */ protected: void patch_addContext( Patch &patch, const std::wstring &text ); - void patch_addContext( Patch &patch, const std::string &text ) { return patch_addContext( patch, ::to_wstring( text ) ); } + void patch_addContext( Patch &patch, const std::string &text ); /** * Compute a list of patches to turn text1 into text2. @@ -549,7 +541,7 @@ class diff_match_patch */ public: TPatchVector patch_make( const std::wstring &text1, const std::wstring &text2 ); - TPatchVector patch_make( const std::string &text1, const std::string &text2 ) { return patch_make( ::to_wstring( text1 ), ::to_wstring( text2 ) ); } + TPatchVector patch_make( const std::string &text1, const std::string &text2 ); /** * Compute a list of patches to turn text1 into text2. @@ -571,7 +563,7 @@ class diff_match_patch */ public: TPatchVector patch_make( const std::wstring &text1, const std::wstring &text2, const TDiffVector &diffs ); - TPatchVector patch_make( const std::string &text1, const std::string &text2, const TDiffVector &diffs ) { return patch_make( ::to_wstring( text1 ), ::to_wstring( text2 ), diffs ); } + TPatchVector patch_make( const std::string &text1, const std::string &text2, const TDiffVector &diffs ); /** * Compute a list of patches to turn text1 into text2. @@ -582,7 +574,7 @@ class diff_match_patch */ public: TPatchVector patch_make( const std::wstring &text1, const TDiffVector &diffs ); - TPatchVector patch_make( const std::string &text1, const TDiffVector &diffs ) { return patch_make( ::to_wstring( text1 ), diffs ); } + TPatchVector patch_make( const std::string &text1, const TDiffVector &diffs ); /** * Given an array of patches, return another array that is identical. @@ -602,7 +594,7 @@ class diff_match_patch */ public: std::pair< std::wstring, std::vector< bool > > patch_apply( TPatchVector patches, std::wstring text ); - std::pair< std::wstring, std::vector< bool > > patch_apply( TPatchVector patches, std::string text ) { return patch_apply( patches, ::to_wstring( text ) ); } + std::pair< std::wstring, std::vector< bool > > patch_apply( TPatchVector patches, std::string text ); /** * Add some padding on text start and end so that edges can match something. @@ -639,7 +631,7 @@ class diff_match_patch */ public: TPatchVector patch_fromText( const std::wstring &textline ); - TPatchVector patch_fromText( const std::string &textline ) { return patch_fromText( ::to_wstring( textline ) ); } + TPatchVector patch_fromText( const std::string &textline ); /** * A safer version of std::wstring.mid(pos). This one returns "" instead of @@ -661,45 +653,12 @@ class diff_match_patch */ private: static std::wstring safeMid( const std::wstring &str, std::size_t pos, std::size_t len ); - - // utilities to replace built in Qt functionality -public: - static void replace( std::wstring &inString, const std::wstring &from, const std::wstring &to ); - - static std::wstring toPercentEncoding( wchar_t c, const std::wstring &exclude, const std::wstring &include ); - static std::wstring toPercentEncoding( const std::wstring &input, const std::wstring &exclude = std::wstring(), const std::wstring &include = std::wstring() ); - - static wchar_t getValue( wchar_t ch ); - static std::wstring fromPercentEncoding( const std::wstring &input ); - - static bool endsWith( const std::wstring &string, const std::wstring &suffix ); - static TStringVector splitString( const std::wstring &string, const std::wstring &separator, bool skipEmptyParts ); - - static int64_t toInt( const std::wstring &string ); - - static std::wstring to_wstring( const char &value, bool doubleQuoteEmpty = false ) - { - if ( doubleQuoteEmpty && ( value == 0 ) ) - return LR"("")"; - - return std::wstring( 1, static_cast< wchar_t >( value ) ); - } - - template< typename T > - static std::vector< T > Splice( std::vector< T > &input, std::size_t start, std::size_t count, const std::vector< T > &objects = {} ) - { - auto deletedRange = std::vector< T >( { input.begin() + start, input.begin() + start + count } ); - input.erase( input.begin() + start, input.begin() + start + count ); - input.insert( input.begin() + start, objects.begin(), objects.end() ); - - return deletedRange; - } - - template< typename T > - static std::vector< T > Splice( std::vector< T > &input, std::size_t start, std::size_t count, const T &object ) - { - return Splice( input, start, count, std::vector< T >( { object } ) ); - } }; +namespace NUtils +{ + std::wstring to_wstring( const diff_match_patch::TVariant &variant, bool doubleQuoteEmpty = false ); + std::wstring to_wstring( const Diff &diff, bool doubleQuoteEmpty = false ); + std::wstring to_wstring( const Patch &patch, bool doubleQuoteEmpty = false ); +} #endif // DIFF_MATCH_PATCH_H diff --git a/cpp17/diff_match_patch.pro b/cpp17/diff_match_patch.pro deleted file mode 100644 index 80527977..00000000 --- a/cpp17/diff_match_patch.pro +++ /dev/null @@ -1,19 +0,0 @@ -#QT += sql xml network -TEMPLATE = app -CONFIG += qt debug_and_release - -mac { - CONFIG -= app_bundle -} - -# don't embed the manifest for now (doesn't work :( ) -#CONFIG -= embed_manifest_exe - -FORMS = - -HEADERS = diff_match_patch.h diff_match_patch_test.h - -SOURCES = diff_match_patch.cpp diff_match_patch_test.cpp - -RESOURCES = - diff --git a/cpp17/diff_match_patch_test.cpp b/cpp17/diff_match_patch_test.cpp index 0654a005..19385b3f 100644 --- a/cpp17/diff_match_patch_test.cpp +++ b/cpp17/diff_match_patch_test.cpp @@ -16,8 +16,8 @@ * limitations under the License. */ -// Code known to compile and run with Qt 4.3 through Qt 4.7. #include "diff_match_patch.h" +#include "diff_match_patch_utils.h" #include "diff_match_patch_test.h" #include @@ -173,8 +173,8 @@ void diff_match_patch_test::testDiffLinesToChars() // Convert lines down to characters. TStringVector tmpVector = TStringVector( { L"", L"alpha\n", L"beta\n" } ); TVariantVector tmpVarList; - tmpVarList.emplace_back( to_wstring( { 1, 2, 1 } ) ); //(("\u0001\u0002\u0001")); - tmpVarList.emplace_back( to_wstring( { 2, 1, 2 } ) ); // (("\u0002\u0001\u0002")); + tmpVarList.emplace_back( NUtils::to_wstring( { 1, 2, 1 } ) ); //(("\u0001\u0002\u0001")); + tmpVarList.emplace_back( NUtils::to_wstring( { 2, 1, 2 } ) ); // (("\u0002\u0001\u0002")); tmpVarList.emplace_back( tmpVector ); assertEquals( "diff_linesToChars:", tmpVarList, dmp.diff_linesToChars( "alpha\nbeta\nalpha\n", "beta\nalpha\nbeta\n" ) ); @@ -185,7 +185,7 @@ void diff_match_patch_test::testDiffLinesToChars() tmpVector.emplace_back( L"beta\r\n" ); tmpVector.emplace_back( L"\r\n" ); tmpVarList.emplace_back( std::wstring() ); - tmpVarList.emplace_back( to_wstring( { 1, 2, 3, 3 } ) ); // (("\u0001\u0002\u0003\u0003")); + tmpVarList.emplace_back( NUtils::to_wstring( { 1, 2, 3, 3 } ) ); // (("\u0001\u0002\u0003\u0003")); tmpVarList.emplace_back( tmpVector ); assertEquals( "diff_linesToChars:", tmpVarList, dmp.diff_linesToChars( "", "alpha\r\nbeta\r\n\r\n\r\n" ) ); @@ -194,8 +194,8 @@ void diff_match_patch_test::testDiffLinesToChars() tmpVector.emplace_back( L"" ); tmpVector.emplace_back( L"a" ); tmpVector.emplace_back( L"b" ); - tmpVarList.emplace_back( to_wstring( 1 ) ); // (("\u0001")); - tmpVarList.emplace_back( to_wstring( 2 ) ); // (("\u0002")); + tmpVarList.emplace_back( NUtils::to_wstring( 1 ) ); // (("\u0001")); + tmpVarList.emplace_back( NUtils::to_wstring( 2 ) ); // (("\u0002")); tmpVarList.emplace_back( tmpVector ); assertEquals( "diff_linesToChars:", tmpVarList, dmp.diff_linesToChars( "a", "b" ) ); @@ -209,7 +209,7 @@ void diff_match_patch_test::testDiffLinesToChars() { tmpVector.emplace_back( std::to_wstring( x ) + L"\n" ); lines += std::to_wstring( x ) + L"\n"; - chars += to_wstring( x ); + chars += NUtils::to_wstring( x ); } assertEquals( "diff_linesToChars: More than 256 (setup).", n, tmpVector.size() ); assertEquals( "diff_linesToChars: More than 256 (setup).", n, chars.length() ); @@ -229,8 +229,8 @@ void diff_match_patch_test::testDiffCharsToLines() // Convert chars up to lines. TDiffVector diffs; - diffs.emplace_back( EQUAL, to_wstring( { 1, 2, 1 } ) ); // ("\u0001\u0002\u0001"); - diffs.emplace_back( INSERT, to_wstring( { 2, 1, 2 } ) ); // ("\u0002\u0001\u0002"); + diffs.emplace_back( EQUAL, NUtils::to_wstring( { 1, 2, 1 } ) ); // ("\u0001\u0002\u0001"); + diffs.emplace_back( INSERT, NUtils::to_wstring( { 2, 1, 2 } ) ); // ("\u0002\u0001\u0002"); TStringVector tmpVector; tmpVector.emplace_back( L"" ); tmpVector.emplace_back( L"alpha\n" ); @@ -248,7 +248,7 @@ void diff_match_patch_test::testDiffCharsToLines() { tmpVector.emplace_back( std::to_wstring( x ) + L"\n" ); lines += std::to_wstring( x ) + L"\n"; - chars += to_wstring( x ); + chars += NUtils::to_wstring( x ); } assertEquals( "diff_linesToChars: More than 256 (setup).", n, tmpVector.size() ); assertEquals( "diff_linesToChars: More than 256 (setup).", n, chars.length() ); @@ -595,7 +595,7 @@ void diff_match_patch_test::testDiffMain() diffs = { Diff( DELETE, "Apple" ), Diff( INSERT, "Banana" ), Diff( EQUAL, "s are a" ), Diff( INSERT, "lso" ), Diff( EQUAL, " fruit." ) }; assertEquals( "diff_main: Simple case #2.", diffs, dmp.diff_main( "Apples are a fruit.", "Bananas are also fruit.", false ) ); - diffs = { Diff( DELETE, "a" ), Diff( INSERT, L"\u0680" ), Diff( EQUAL, "x" ), Diff( DELETE, "\t" ), Diff( INSERT, to_wstring( kZero ) ) }; + diffs = { Diff( DELETE, "a" ), Diff( INSERT, L"\u0680" ), Diff( EQUAL, "x" ), Diff( DELETE, "\t" ), Diff( INSERT, NUtils::to_wstring( kZero ) ) }; assertEquals( "diff_main: Simple case #3.", diffs, dmp.diff_main( L"ax\t", std::wstring( L"\u0680x" ) + kZero, false ) ); diffs = { Diff( DELETE, "1" ), Diff( EQUAL, "a" ), Diff( DELETE, "y" ), Diff( EQUAL, "b" ), Diff( DELETE, "2" ), Diff( INSERT, "xab" ) }; @@ -907,37 +907,37 @@ void diff_match_patch_test::testPatchApply() results = dmp.patch_apply( patches, "The quick brown fox jumps over the lazy dog." ); boolArray = results.second; - resultStr = results.first + to_wstring( boolArray ); - + resultStr = results.first + NUtils::to_wstring( boolArray ); + assertEquals( "patch_apply: Exact match.", "That quick brown fox jumped over a lazy dog.\ttrue\ttrue", resultStr ); results = dmp.patch_apply( patches, "The quick red rabbit jumps over the tired tiger." ); boolArray = results.second; - resultStr = results.first + to_wstring( boolArray ); + resultStr = results.first + NUtils::to_wstring( boolArray ); assertEquals( "patch_apply: Partial match.", "That quick red rabbit jumped over a tired tiger.\ttrue\ttrue", resultStr ); results = dmp.patch_apply( patches, "I am the very model of a modern major general." ); boolArray = results.second; - resultStr = results.first + to_wstring( boolArray ); + resultStr = results.first + NUtils::to_wstring( boolArray ); assertEquals( "patch_apply: Failed match.", "I am the very model of a modern major general.\tfalse\tfalse", resultStr ); patches = dmp.patch_make( "x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy" ); results = dmp.patch_apply( patches, "x123456789012345678901234567890-----++++++++++-----123456789012345678901234567890y" ); boolArray = results.second; - resultStr = results.first + to_wstring( boolArray ); + resultStr = results.first + NUtils::to_wstring( boolArray ); assertEquals( "patch_apply: Big delete, small change.", "xabcy\ttrue\ttrue", resultStr ); patches = dmp.patch_make( "x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy" ); results = dmp.patch_apply( patches, "x12345678901234567890---------------++++++++++---------------12345678901234567890y" ); boolArray = results.second; - resultStr = results.first + to_wstring( boolArray ); + resultStr = results.first + NUtils::to_wstring( boolArray ); assertEquals( "patch_apply: Big delete, large change 1.", "xabc12345678901234567890---------------++++++++++---------------12345678901234567890y\tfalse\ttrue", resultStr ); dmp.Patch_DeleteThreshold = 0.6f; patches = dmp.patch_make( "x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy" ); results = dmp.patch_apply( patches, "x12345678901234567890---------------++++++++++---------------12345678901234567890y" ); boolArray = results.second; - resultStr = results.first + to_wstring( boolArray ); + resultStr = results.first + NUtils::to_wstring( boolArray ); assertEquals( "patch_apply: Big delete, large change 2.", "xabcy\ttrue\ttrue", resultStr ); dmp.Patch_DeleteThreshold = 0.5f; @@ -946,7 +946,7 @@ void diff_match_patch_test::testPatchApply() patches = dmp.patch_make( "abcdefghijklmnopqrstuvwxyz--------------------1234567890", "abcXXXXXXXXXXdefghijklmnopqrstuvwxyz--------------------1234567YYYYYYYYYY890" ); results = dmp.patch_apply( patches, "ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567890" ); boolArray = results.second; - resultStr = results.first + to_wstring( boolArray ); + resultStr = results.first + NUtils::to_wstring( boolArray ); assertEquals( "patch_apply: Compensate for failed patch.", "ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567YYYYYYYYYY890\tfalse\ttrue", resultStr ); dmp.Match_Threshold = 0.5f; dmp.Match_Distance = 1000; @@ -964,147 +964,22 @@ void diff_match_patch_test::testPatchApply() patches = dmp.patch_make( "", "test" ); results = dmp.patch_apply( patches, "" ); boolArray = results.second; - resultStr = results.first + L"\t" + to_wstring( boolArray[ 0 ], false ); + resultStr = results.first + L"\t" + NUtils::to_wstring( boolArray[ 0 ], false ); assertEquals( "patch_apply: Edge exact match.", "test\ttrue", resultStr ); patches = dmp.patch_make( "XY", "XtestY" ); results = dmp.patch_apply( patches, "XY" ); boolArray = results.second; - resultStr = results.first + L"\t" + to_wstring( boolArray[ 0 ], false ); + resultStr = results.first + L"\t" + NUtils::to_wstring( boolArray[ 0 ], false ); assertEquals( "patch_apply: Near edge exact match.", "XtestY\ttrue", resultStr ); patches = dmp.patch_make( "y", "y123" ); results = dmp.patch_apply( patches, "x" ); boolArray = results.second; - resultStr = results.first + L"\t" + to_wstring( boolArray[ 0 ] ); + resultStr = results.first + L"\t" + NUtils::to_wstring( boolArray[ 0 ] ); assertEquals( "patch_apply: Edge partial match.", "x123\ttrue", resultStr ); } -void diff_match_patch_test::reportFailure( const std::string &strCase, const std::wstring &expected, const std::wstring &actual ) -{ - std::cout << "FAILED : " + strCase + "\n"; - std::wcerr << " Expected: " << expected << "\n Actual: " << actual << "\n"; - numFailedTests++; - //throw strCase; -} - -void diff_match_patch_test::reportPassed( const std::string &strCase ) -{ - std::cout << "PASSED: " + strCase + "\n"; -} - -void diff_match_patch_test::assertEquals( const std::string &strCase, std::size_t n1, std::size_t n2 ) -{ - if ( n1 != n2 ) - { - reportFailure( strCase, std::to_wstring( n1 ), std::to_wstring( n2 ) ); - } - reportPassed( strCase ); -} - -void diff_match_patch_test::assertEquals( const std::string &strCase, const std::wstring &s1, const std::wstring &s2 ) -{ - if ( s1 != s2 ) - { - reportFailure( strCase, s1, s2 ); - } - reportPassed( strCase ); -} - -void diff_match_patch_test::assertEquals( const std::string &strCase, const Diff &d1, const Diff &d2 ) -{ - if ( d1 != d2 ) - { - reportFailure( strCase, d1.toString(), d2.toString() ); - } - reportPassed( strCase ); -} - -void diff_match_patch_test::assertEquals( const std::string &strCase, const TVariant &var1, const TVariant &var2 ) -{ - if ( var1 != var2 ) - { - reportFailure( strCase, to_wstring( var1 ), to_wstring( var2 ) ); - } - reportPassed( strCase ); -} - -void diff_match_patch_test::assertEquals( const std::string &strCase, const TCharPosMap &m1, const TCharPosMap &m2 ) -{ - for ( auto &&ii : m1 ) - { - auto rhs = m2.find( ii.first ); - if ( rhs == m2.end() ) - { - reportFailure( strCase, L"(" + to_wstring( ii.first ) + L"," + std::to_wstring( ii.second ) + L")", L"" ); - } - } - - for ( auto &&ii : m2 ) - { - auto rhs = m1.find( ii.first ); - if ( rhs == m1.end() ) - { - reportFailure( strCase, L"(" + to_wstring( ii.first ) + L"," + std::to_wstring( ii.second ) + L")", L"" ); - } - } - - reportPassed( strCase ); -} - -void diff_match_patch_test::assertEquals( const std::string &strCase, bool lhs, bool rhs ) -{ - if ( lhs != rhs ) - { - reportFailure( strCase, to_wstring( lhs, false ), to_wstring( rhs, false ) ); - } - reportPassed( strCase ); -} - -void diff_match_patch_test::assertTrue( const std::string &strCase, bool value ) -{ - if ( !value ) - { - reportFailure( strCase, to_wstring( true, false ), to_wstring( false, false ) ); - } - reportPassed( strCase ); -} - -void diff_match_patch_test::assertFalse( const std::string &strCase, bool value ) -{ - if ( value ) - { - reportFailure( strCase, to_wstring( false, false ), to_wstring( true, false ) ); - } - reportPassed( strCase ); -} - -// Construct the two texts which made up the diff originally. -diff_match_patch_test::TStringVector diff_match_patch_test::diff_rebuildtexts( const TDiffVector &diffs ) -{ - TStringVector text( 2, std::wstring() ); - for ( auto &&myDiff : diffs ) - { - if ( myDiff.operation != INSERT ) - { - text[ 0 ] += myDiff.text; - } - if ( myDiff.operation != DELETE ) - { - text[ 1 ] += myDiff.text; - } - } - return text; -} - -void diff_match_patch_test::assertEmpty( const std::string &strCase, const TStringVector &list ) -{ - if ( !list.empty() ) - { - throw strCase; - } -} - /* Compile instructions for cmake on Windows: mkdir build diff --git a/cpp17/diff_match_patch_test.h b/cpp17/diff_match_patch_test.h index eb22dc92..e7285fb8 100644 --- a/cpp17/diff_match_patch_test.h +++ b/cpp17/diff_match_patch_test.h @@ -72,155 +72,6 @@ class diff_match_patch_test std::size_t numFailedTests{ 0 }; diff_match_patch dmp; - template< typename T > - std::wstring to_wstring( const T & /*value*/, bool /*doubleQuoteEmpty*/ = false ) - { - assert( false ); - return {}; - } - - template<> - std::wstring to_wstring( const bool &value, bool /*doubleQuoteOnEmpty*/ ) - { - std::wstring retVal = std::wstring( value ? L"true" : L"false" ); - return retVal; - } - - template<> - std::wstring to_wstring( const std::vector< bool >::reference &value, bool /*doubleQuoteOnEmpty*/ ) - { - std::wstring retVal = std::wstring( value ? L"true" : L"false" ); - return retVal; - } - - template<> - std::wstring to_wstring( const std::string &string, bool doubleQuoteEmpty ) - { - if ( doubleQuoteEmpty && string.empty() ) - return LR"("")"; - - std::wstring wstring( string.size(), L' ' ); // Overestimate number of code points. - wstring.resize( std::mbstowcs( &wstring[ 0 ], string.c_str(), string.size() ) ); // Shrink to fit. - return wstring; - } - - template<> - std::wstring to_wstring( const TVariant &variant, bool doubleQuoteEmpty ) - { - std::wstring retVal; - if ( std::holds_alternative< std::wstring >( variant ) ) - retVal = std::get< std::wstring >( variant ); - - if ( doubleQuoteEmpty && retVal.empty() ) - return LR"("")"; - - return retVal; - } - - template<> - std::wstring to_wstring( const Diff &diff, bool doubleQuoteEmpty ) - { - auto retVal = diff.toString(); - if ( doubleQuoteEmpty && retVal.empty() ) - return LR"("")"; - return retVal; - } - - template<> - std::wstring to_wstring( const Patch &patch, bool doubleQuoteEmpty ) - { - auto retVal = patch.toString(); - if ( doubleQuoteEmpty && retVal.empty() ) - return LR"("")"; - return retVal; - } - - template<> - std::wstring to_wstring( const wchar_t &value, bool doubleQuoteEmpty ) - { - if ( doubleQuoteEmpty && ( value == 0 ) ) - return LR"("")"; - - return std::wstring( 1, value ); - } - - template<> - std::wstring to_wstring( const int &value, bool doubleQuoteEmpty ) - { - return to_wstring( static_cast< wchar_t >( value ), doubleQuoteEmpty ); - } - - template<> - std::wstring to_wstring( const std::wstring &value, bool doubleQuoteEmpty ) - { - if ( doubleQuoteEmpty && value.empty() ) - return LR"("")"; - - return value; - } - - template< typename T > - std::wstring to_wstring( const std::vector< T > &values, bool doubleQuoteEmpty = false ) - { - std::wstring retVal = L"("; - bool first = true; - for ( auto &&curr : values ) - { - if ( !first ) - { - retVal += L", "; - } - retVal += to_wstring( curr, doubleQuoteEmpty ); - first = false; - } - retVal += L")"; - return retVal; - } - - template<> - std::wstring to_wstring( const std::vector< bool > &boolArray, bool doubleQuoteOnEmpty ) - { - std::wstring retVal; - for ( auto &&curr : boolArray ) - { - retVal += L"\t" + to_wstring( curr, doubleQuoteOnEmpty ); - } - return retVal; - } - - - template< typename T > - typename std::enable_if_t< std::is_integral_v< T >, std::wstring > to_wstring( const std::initializer_list< T > &values, bool doubleQuoteEmpty = false ) - { - if ( doubleQuoteEmpty && ( values.size() == 0 ) ) - return LR"(\"\")"; - - std::wstring retVal; - for ( auto &&curr : values ) - { - retVal += to_wstring( curr, false ); - } - return retVal; - } - - template< typename T > - typename std::enable_if_t< !std::is_integral_v< T >, std::wstring > to_wstring( const std::initializer_list< T > &values, bool doubleQuoteEmpty = false ) - { - std::wstring retVal = L"("; - bool first = true; - for ( auto &&curr : values ) - { - if ( !first ) - { - retVal += L", "; - } - retVal += to_wstring( curr, doubleQuoteEmpty ); - first = false; - } - retVal += L")"; - return retVal; - } - // Define equality. template< typename T > void assertEquals( const std::string &strCase, const T &lhs, const T &rhs ) @@ -238,19 +89,20 @@ class diff_match_patch_test else { // Build human readable description of both lists. - auto lhsString = to_wstring( lhs, true ); - auto rhsString = to_wstring( rhs, true ); + auto lhsString = NUtils::to_wstring( lhs, true ); + auto rhsString = NUtils::to_wstring( rhs, true ); reportFailure( strCase, lhsString, rhsString ); return; } reportPassed( strCase ); } + void assertEquals( const std::string &strCase, bool lhs, bool rhs ); void assertEquals( const std::string &strCase, std::size_t n1, std::size_t n2 ); void assertEquals( const std::string &strCase, const std::wstring &s1, const std::wstring &s2 ); - void assertEquals( const std::string &strCase, const std::string &s1, const std::string &s2 ) { return assertEquals( strCase, ::to_wstring( s1 ), ::to_wstring( s2 ) ); } - void assertEquals( const std::string &strCase, const std::wstring &s1, const std::string &s2 ) { return assertEquals( strCase, s1, ::to_wstring( s2 ) ); } - void assertEquals( const std::string &strCase, const std::string &s1, const std::wstring &s2 ) { return assertEquals( strCase, ::to_wstring( s1 ), s2 ); } + void assertEquals( const std::string &strCase, const std::string &s1, const std::string &s2 ); + void assertEquals( const std::string &strCase, const std::wstring &s1, const std::string &s2 ); + void assertEquals( const std::string &strCase, const std::string &s1, const std::wstring &s2 ); void assertEquals( const std::string &strCase, const Diff &d1, const Diff &d2 ); void assertEquals( const std::string &strCase, const TVariant &var1, const TVariant &var2 ); void assertEquals( const std::string &strCase, const TCharPosMap &m1, const TCharPosMap &m2 ); diff --git a/cpp17/diff_match_patch_test_assertEquals.cpp b/cpp17/diff_match_patch_test_assertEquals.cpp new file mode 100644 index 00000000..34cd7fc1 --- /dev/null +++ b/cpp17/diff_match_patch_test_assertEquals.cpp @@ -0,0 +1,163 @@ +/* + * Diff Match and Patch -- Test Harness + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "diff_match_patch.h" +#include "diff_match_patch_utils.h" +#include "diff_match_patch_test.h" + +#include + +void diff_match_patch_test::reportFailure( const std::string &strCase, const std::wstring &expected, const std::wstring &actual ) +{ + std::cout << "FAILED : " + strCase + "\n"; + std::wcerr << " Expected: " << expected << "\n Actual: " << actual << "\n"; + numFailedTests++; + //throw strCase; +} + +void diff_match_patch_test::reportPassed( const std::string &strCase ) +{ + std::cout << "PASSED: " + strCase + "\n"; +} + +void diff_match_patch_test::assertEquals( const std::string &strCase, std::size_t n1, std::size_t n2 ) +{ + if ( n1 != n2 ) + { + reportFailure( strCase, std::to_wstring( n1 ), std::to_wstring( n2 ) ); + } + reportPassed( strCase ); +} + +void diff_match_patch_test::assertEquals( const std::string &strCase, const std::wstring &s1, const std::wstring &s2 ) +{ + if ( s1 != s2 ) + { + reportFailure( strCase, s1, s2 ); + } + reportPassed( strCase ); +} + +void diff_match_patch_test::assertEquals( const std::string &strCase, const std::string &s1, const std::string &s2 ) +{ + return assertEquals( strCase, NUtils::to_wstring( s1 ), NUtils::to_wstring( s2 ) ); +} + +void diff_match_patch_test::assertEquals( const std::string &strCase, const std::string &s1, const std::wstring &s2 ) +{ + return assertEquals( strCase, NUtils::to_wstring( s1 ), s2 ); +} + +void diff_match_patch_test::assertEquals( const std::string &strCase, const std::wstring &s1, const std::string &s2 ) +{ + return assertEquals( strCase, s1, NUtils::to_wstring( s2 ) ); +} + +void diff_match_patch_test::assertEquals( const std::string &strCase, const Diff &d1, const Diff &d2 ) +{ + if ( d1 != d2 ) + { + reportFailure( strCase, d1.toString(), d2.toString() ); + } + reportPassed( strCase ); +} + +void diff_match_patch_test::assertEquals( const std::string &strCase, const TVariant &var1, const TVariant &var2 ) +{ + if ( var1 != var2 ) + { + reportFailure( strCase, NUtils::to_wstring( var1 ), NUtils::to_wstring( var2 ) ); + } + reportPassed( strCase ); +} + +void diff_match_patch_test::assertEquals( const std::string &strCase, const TCharPosMap &m1, const TCharPosMap &m2 ) +{ + for ( auto &&ii : m1 ) + { + auto rhs = m2.find( ii.first ); + if ( rhs == m2.end() ) + { + reportFailure( strCase, L"(" + NUtils::to_wstring( ii.first ) + L"," + std::to_wstring( ii.second ) + L")", L"" ); + } + } + + for ( auto &&ii : m2 ) + { + auto rhs = m1.find( ii.first ); + if ( rhs == m1.end() ) + { + reportFailure( strCase, L"(" + NUtils::to_wstring( ii.first ) + L"," + std::to_wstring( ii.second ) + L")", L"" ); + } + } + + reportPassed( strCase ); +} + +void diff_match_patch_test::assertEquals( const std::string &strCase, bool lhs, bool rhs ) +{ + if ( lhs != rhs ) + { + reportFailure( strCase, NUtils::to_wstring( lhs, false ), NUtils::to_wstring( rhs, false ) ); + } + reportPassed( strCase ); +} + +void diff_match_patch_test::assertTrue( const std::string &strCase, bool value ) +{ + if ( !value ) + { + reportFailure( strCase, NUtils::to_wstring( true, false ), NUtils::to_wstring( false, false ) ); + } + reportPassed( strCase ); +} + +void diff_match_patch_test::assertFalse( const std::string &strCase, bool value ) +{ + if ( value ) + { + reportFailure( strCase, NUtils::to_wstring( false, false ), NUtils::to_wstring( true, false ) ); + } + reportPassed( strCase ); +} + +// Construct the two texts which made up the diff originally. +diff_match_patch_test::TStringVector diff_match_patch_test::diff_rebuildtexts( const TDiffVector &diffs ) +{ + TStringVector text( 2, std::wstring() ); + for ( auto &&myDiff : diffs ) + { + if ( myDiff.operation != INSERT ) + { + text[ 0 ] += myDiff.text; + } + if ( myDiff.operation != DELETE ) + { + text[ 1 ] += myDiff.text; + } + } + return text; +} + +void diff_match_patch_test::assertEmpty( const std::string &strCase, const TStringVector &list ) +{ + if ( !list.empty() ) + { + throw strCase; + } +} diff --git a/cpp17/diff_match_patch_utils.cpp b/cpp17/diff_match_patch_utils.cpp new file mode 100644 index 00000000..5243ed08 --- /dev/null +++ b/cpp17/diff_match_patch_utils.cpp @@ -0,0 +1,198 @@ +/* + * Diff Match and Patch + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "diff_match_patch_utils.h" + +//#include +//#include +//#include +//#include +//#include +//#include +//#include +#include + +namespace NUtils +{ + std::wstring safeMid( const std::wstring &str, std::size_t pos ) + { + return safeMid( str, pos, std::string::npos ); + } + + std::wstring safeMid( const std::wstring &str, std::size_t pos, std::size_t len ) + { + return ( pos == str.length() ) ? std::wstring() : str.substr( pos, len ); + } + + void replace( std::wstring &inString, const std::wstring &from, const std::wstring &to ) + { + std::size_t pos = inString.find( from ); + while ( pos != std::wstring::npos ) + { + inString.replace( pos, from.length(), to ); + pos = inString.find( from, pos + 1 ); + } + } + + wchar_t toHexUpper( wchar_t value ) + { + return L"0123456789ABCDEF"[ value & 0xF ]; + } + + std::wstring toPercentEncoding( wchar_t c, const std::wstring &exclude, const std::wstring &include ) + { + std::wstring retVal; + + if ( ( ( c >= 0x61 && c <= 0x7A ) // ALPHA + || ( c >= 0x41 && c <= 0x5A ) // ALPHA + || ( c >= 0x30 && c <= 0x39 ) // DIGIT + || c == 0x2D // - + || c == 0x2E // . + || c == 0x5F // _ + || c == 0x7E // ~ + || ( exclude.find( c ) != std::string::npos ) ) + && ( include.find( c ) == std::string::npos ) ) + { + retVal = std::wstring( 1, c ); + } + else + { + retVal = L'%'; + retVal += toHexUpper( ( c & 0xf0 ) >> 4 ); + retVal += toHexUpper( c & 0xf ); + } + return retVal; + } + + std::wstring toPercentEncoding( const std::wstring &input, const std::wstring &exclude /*= std::wstring()*/, const std::wstring &include /*= std::wstring() */ ) + { + if ( input.empty() ) + return {}; + std::wstring retVal; + retVal.reserve( input.length() * 3 ); + + static_assert( sizeof( wchar_t ) <= 4, "wchar_t is greater that 32 bit" ); + + auto sz = sizeof( wchar_t ); + std::wstring_convert< std::codecvt_utf8< wchar_t > > utf8_conv; + for ( auto &&c : input ) + { + auto currStr = std::wstring( 1, c ); + auto asBytes = utf8_conv.to_bytes( currStr ); + for ( auto &&ii : asBytes ) + { + if ( ii ) + retVal += toPercentEncoding( ii, exclude, include ); + } + } + return retVal; + } + + wchar_t getValue( wchar_t ch ) + { + if ( ch >= '0' && ch <= '9' ) + ch -= '0'; + else if ( ch >= 'a' && ch <= 'f' ) + ch = ch - 'a' + 10; + else if ( ch >= 'A' && ch <= 'F' ) + ch = ch - 'A' + 10; + else + throw std::wstring( L"Invalid Character %" ) + ch; + + return ch; + } + + std::wstring fromPercentEncoding( const std::wstring &input ) + { + if ( input.empty() ) + return {}; + std::wstring retVal; + retVal.reserve( input.length() ); + for ( auto ii = 0ULL; ii < input.length(); ++ii ) + { + auto c = input[ ii ]; + if ( c == L'%' && ( ii + 2 ) < input.length() ) + { + auto a = input[ ++ii ]; + auto b = input[ ++ii ]; + a = getValue( a ); + b = getValue( b ); + retVal += wchar_t( ( a << 4 ) | b ); + } + else + { + retVal += c; + } + } + return retVal; + } + + bool endsWith( const std::wstring &string, const std::wstring &suffix ) + { + if ( suffix.length() > string.length() ) + return false; + + return string.compare( string.length() - suffix.length(), suffix.length(), suffix ) == 0; + } + + TStringVector splitString( const std::wstring &string, const std::wstring &separator, bool skipEmptyParts ) + { + if ( separator.empty() ) + { + if ( !skipEmptyParts || !string.empty() ) + return { string }; + return {}; + } + + TStringVector strings; + auto prevPos = 0ULL; + auto startPos = string.find_first_of( separator ); + while ( startPos != std::string::npos ) + { + auto start = prevPos ? prevPos + 1 : prevPos; + auto len = prevPos ? ( startPos - prevPos - 1 ) : startPos; + auto curr = string.substr( start, len ); + prevPos = startPos; + if ( !skipEmptyParts || !curr.empty() ) + strings.emplace_back( curr ); + startPos = string.find_first_of( separator, prevPos + 1 ); + } + auto remainder = string.substr( prevPos ? prevPos + 1 : prevPos ); + if ( !skipEmptyParts || !remainder.empty() ) + strings.emplace_back( remainder ); + + return strings; + } + + int64_t toInt( const std::wstring &string ) + { + int64_t retVal = 0; + try + { + std::size_t lastPos{}; + retVal = std::stoul( string, &lastPos ); + if ( lastPos != string.length() ) + return 0; + } + catch ( ... ) + { + } + return retVal; + } + +} \ No newline at end of file diff --git a/cpp17/diff_match_patch_utils.h b/cpp17/diff_match_patch_utils.h new file mode 100644 index 00000000..30c51c06 --- /dev/null +++ b/cpp17/diff_match_patch_utils.h @@ -0,0 +1,288 @@ +/* +/* + * Diff Match and Patch + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DIFF_MATCH_PATCH_UTILS_H +#define DIFF_MATCH_PATCH_UTILS_H +// +#include +#include +#include +namespace NUtils +{ + using TStringVector = std::vector< std::wstring >; + + /* + * Utility functions to replace Qt built in methods + */ + + /** + * A safer version of std::wstring.mid(pos). This one returns "" instead of + * null when the postion equals the string length. + * @param str String to take a substring from. + * @param pos Position to start the substring from. + * @return Substring. + */ + std::wstring safeMid( const std::wstring &str, std::size_t pos ); + + /** + * A safer version of std::wstring.mid(pos, len). This one returns "" instead of + * null when the postion equals the string length. + * @param str String to take a substring from. + * @param pos Position to start the substring from. + * @param len Length of substring. + * @return Substring. + */ + std::wstring safeMid( const std::wstring &str, std::size_t pos, std::size_t len ); + + /** + * replaces QString::replace + * @param haystack String to replace all needles with to + * @param needle Substring to search for in the haystack + * @param to replacement string + * @return void. + */ + void replace( std::wstring &haystack, const std::wstring &needle, const std::wstring &to ); + + /** + * replaces returns the html percent encoded character equivalent + * @param c the input Character to return the encoded string of + * @param exclude The list of chars that are NOT to be encoded + * @param include The list of chars that are to be encoded + * @return the encoded string + */ + std::wstring toPercentEncoding( wchar_t c, const std::wstring &exclude = std::wstring(), const std::wstring &include = std::wstring() ); + + /** + * return the html percent encoded string equivalent + * @param input the input String to return the encoded string of + * @param exclude The list of chars that are NOT to be encoded + * @param include The list of chars that are to be encoded + * @return the encoded string + */ + std::wstring toPercentEncoding( const std::wstring &input, const std::wstring &exclude = std::wstring(), const std::wstring &include = std::wstring() ); + + /** + * returns the string equivalent removing any percent encoding and replacing it with the correct character + * @param input the input String to return the encoded string of + * @return the decoded string + */ + std::wstring fromPercentEncoding( const std::wstring &input ); + + /** + * replaces returns integer value of the character, '0'-'9' = 0-9, 'A'-'F' = 10-15, 'a'-'f' = 10-15 + * @param input the value to return the integer value of + * @return the integer value of the character + */ + wchar_t getIntValue( wchar_t ch ); + + /** + * return the integer value of the string + * @param string the String to be converted to an integer + * @return the integer version, on an invalid input returns 0 + */ + int64_t toInt( const std::wstring &string ); + + /** + * return true if the string has the suffix + * @param string the String to check to see if it ends with suffix + * @param suffix the String to see if the input string ends with + * @return True if the string ends with suffix + */ + bool endsWith( const std::wstring &string, const std::wstring &suffix ); + + /** + * return a TStringVector of the string split by separator + * @param string the String to be split + * @param separator the String to search in the input string to split on + * @param if true, empty values will be removed + * @return the split string + */ + TStringVector splitString( const std::wstring &string, const std::wstring &separator, bool skipEmptyParts ); + + /** + * splices the objects vector into the input vector + * @param input The input vector to splice out from + * @param start The position of the first item to remove from the input vector + * @param count How many values to remove from the input vector + * @param objects optional objects to insert where the previous objects were removed + * @return the character as a single character string + */ + template< typename T > + static std::vector< T > Splice( std::vector< T > &input, std::size_t start, std::size_t count, const std::vector< T > &objects = {} ) + { + auto deletedRange = std::vector< T >( { input.begin() + start, input.begin() + start + count } ); + input.erase( input.begin() + start, input.begin() + start + count ); + input.insert( input.begin() + start, objects.begin(), objects.end() ); + + return deletedRange; + } + + /** + * splices the objects vector into the input vector + * @param input The input vector to splice out from + * @param start The position of the first item to remove from the input vector + * @param count How many values to remove from the input vector + * @param object individual object to insert where the previous objects were removed + * @return the character as a single character string + */ + template< typename T > + static std::vector< T > Splice( std::vector< T > &input, std::size_t start, std::size_t count, const T &object ) + { + return Splice( input, start, count, std::vector< T >( { object } ) ); + } + + template< typename T > + std::wstring to_wstring( const T & /*value*/, bool /*doubleQuoteEmpty*/ ) + { + assert( false ); + return {}; + } + + /** + * return the single character wide string for the given character + * @param value the char to be converted to an wstring + * @param doubleQuoteEmpty, if the return value would be empty, return "" + * @return the character as a single character string + */ + inline std::wstring to_wstring( const char &value, bool doubleQuoteEmpty ) + { + if ( doubleQuoteEmpty && ( value == 0 ) ) + return LR"("")"; + + return std::wstring( 1, static_cast< wchar_t >( value ) ); + } + + template<> + inline std::wstring to_wstring( const bool &value, bool /*doubleQuoteOnEmpty*/ ) + { + std::wstring retVal = std::wstring( value ? L"true" : L"false" ); + return retVal; + } + + template<> + inline std::wstring to_wstring( const std::vector< bool >::reference &value, bool /*doubleQuoteOnEmpty*/ ) + { + std::wstring retVal = std::wstring( value ? L"true" : L"false" ); + return retVal; + } + + template<> + inline std::wstring to_wstring( const std::string &string, bool doubleQuoteEmpty ) + { + if ( doubleQuoteEmpty && string.empty() ) + return LR"("")"; + + std::wstring wstring( string.size(), L' ' ); // Overestimate number of code points. + wstring.resize( std::mbstowcs( &wstring[ 0 ], string.c_str(), string.size() ) ); // Shrink to fit. + return wstring; + } + + template<> + inline std::wstring to_wstring( const wchar_t &value, bool doubleQuoteEmpty ) + { + if ( doubleQuoteEmpty && ( value == 0 ) ) + return LR"("")"; + + return std::wstring( 1, value ); + } + + template<> + inline std::wstring to_wstring( const int &value, bool doubleQuoteEmpty ) + { + return to_wstring( static_cast< wchar_t >( value ), doubleQuoteEmpty ); + } + + template<> + inline std::wstring to_wstring( const std::wstring &value, bool doubleQuoteEmpty ) + { + if ( doubleQuoteEmpty && value.empty() ) + return LR"("")"; + + return value; + } + + template< typename T > + inline std::wstring to_wstring( const std::vector< T > &values, bool doubleQuoteEmpty ) + { + std::wstring retVal = L"("; + bool first = true; + for ( auto &&curr : values ) + { + if ( !first ) + { + retVal += L", "; + } + retVal += to_wstring( curr, doubleQuoteEmpty ); + first = false; + } + retVal += L")"; + return retVal; + } + + template<> + inline std::wstring to_wstring( const std::vector< bool > &boolArray, bool doubleQuoteOnEmpty ) + { + std::wstring retVal; + for ( auto &&curr : boolArray ) + { + retVal += L"\t" + to_wstring( curr, doubleQuoteOnEmpty ); + } + return retVal; + } + + template< typename T > + inline typename std::enable_if_t< std::is_integral_v< T >, std::wstring > to_wstring( const std::initializer_list< T > &values, bool doubleQuoteEmpty = false ) + { + if ( doubleQuoteEmpty && ( values.size() == 0 ) ) + return LR"(\"\")"; + + std::wstring retVal; + for ( auto &&curr : values ) + { + retVal += to_wstring( curr, false ); + } + return retVal; + } + + template< typename T > + inline typename std::enable_if_t< !std::is_integral_v< T >, std::wstring > to_wstring( const std::initializer_list< T > &values, bool doubleQuoteEmpty = false ) + { + std::wstring retVal = L"("; + bool first = true; + for ( auto &&curr : values ) + { + if ( !first ) + { + retVal += L", "; + } + retVal += to_wstring( curr, doubleQuoteEmpty ); + first = false; + } + retVal += L")"; + return retVal; + } + + template< typename T > + std::wstring to_wstring( const T &value ) + { + return to_wstring( value, false ); + } +}; + +#endif diff --git a/cpp17/include.cmake b/cpp17/include.cmake deleted file mode 100644 index 654bee29..00000000 --- a/cpp17/include.cmake +++ /dev/null @@ -1,23 +0,0 @@ -set(_PROJECT_NAME diff_match_patch_cpp17) -set(FOLDER_NAME Libs) - -set(project_SRCS - diff_match_patch.cpp -) - -set(qtproject_H -) - -set(project_H - diff_match_patch.h -) - -set(qtproject_UIS -) - - -set(qtproject_QRC -) - -set( project_pub_DEPS -) From c5516f503bbf8385fd8f62b4e656cd714f2eef34 Mon Sep 17 00:00:00 2001 From: Scott Aron Bloom Date: Tue, 30 Jan 2024 12:51:17 -0800 Subject: [PATCH 04/15] Added support for googletest --- cpp17/CMakeLists.txt | 12 +- cpp17/diff_match_patch.h | 14 +- cpp17/diff_match_patch_test.cpp | 326 ++++++++----------- cpp17/diff_match_patch_test.h | 62 +++- cpp17/diff_match_patch_test_assertEquals.cpp | 29 +- 5 files changed, 227 insertions(+), 216 deletions(-) diff --git a/cpp17/CMakeLists.txt b/cpp17/CMakeLists.txt index a4280440..dbc68325 100644 --- a/cpp17/CMakeLists.txt +++ b/cpp17/CMakeLists.txt @@ -1,5 +1,6 @@ cmake_minimum_required(VERSION 3.22) + SET( _PROJECT_NAME diff_match_patch_cpp17 ) project( ${_PROJECT_NAME} ) add_library(${_PROJECT_NAME} @@ -18,7 +19,14 @@ set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED YES) project( ${TEST_NAME} ) add_executable( ${TEST_NAME} diff_match_patch_test.cpp diff_match_patch_test.h diff_match_patch_test_assertEquals.cpp) - + target_include_directories( ${TEST_NAME} PUBLIC ${CMAKE_SOURCE_DIR}) -target_link_libraries( diff_match_patch_cpp17_test ${_PROJECT_NAME} ) +if( USE_GTEST ) + SET( GTEST_LIBS gtest gmock ) + target_include_directories( ${_PROJECT_NAME} PUBLIC ${GOOGLETEST_ROOT_DIR}/googletest/include ) + target_compile_definitions( ${_PROJECT_NAME} PUBLIC USE_GTEST ) + target_compile_definitions( ${TEST_NAME} PUBLIC USE_GTEST ) +endif() + +target_link_libraries( diff_match_patch_cpp17_test ${_PROJECT_NAME} ${GTEST_LIBS}) add_test( ${TEST_NAME} ${TEST_NAME} ) diff --git a/cpp17/diff_match_patch.h b/cpp17/diff_match_patch.h index 5ed51b98..08f4582b 100644 --- a/cpp17/diff_match_patch.h +++ b/cpp17/diff_match_patch.h @@ -27,6 +27,9 @@ #include #include #include +#ifdef USE_GTEST + #include "gtest/gtest.h" +#endif /* * Functions for diff, match and patch. @@ -138,7 +141,16 @@ using TPatchVector = std::vector< Patch >; class diff_match_patch { friend class diff_match_patch_test; - +#ifdef USE_GTEST + FRIEND_TEST( diff_match_patch_test, testDiffCommonOverlap ); + FRIEND_TEST( diff_match_patch_test, testDiffHalfmatch ); + FRIEND_TEST( diff_match_patch_test, testDiffLinesToChars ); + FRIEND_TEST( diff_match_patch_test, testDiffCharsToLines ); + FRIEND_TEST( diff_match_patch_test, testDiffBisect ); + FRIEND_TEST( diff_match_patch_test, testMatchAlphabet ); + FRIEND_TEST( diff_match_patch_test, testMatchBitap ); + FRIEND_TEST( diff_match_patch_test, testPatchAddContext ); +#endif public: // Defaults. // Set these on your diff_match_patch instance to override the defaults. diff --git a/cpp17/diff_match_patch_test.cpp b/cpp17/diff_match_patch_test.cpp index 19385b3f..eb22914c 100644 --- a/cpp17/diff_match_patch_test.cpp +++ b/cpp17/diff_match_patch_test.cpp @@ -20,16 +20,25 @@ #include "diff_match_patch_utils.h" #include "diff_match_patch_test.h" +#ifdef USE_GTEST + #include "gtest/gtest.h" +#endif + #include #include -int main( int /*argc*/, char ** /*argv*/ ) +int main( int argc, char **argv ) { +#ifdef USE_GTEST + ::testing::InitGoogleTest( &argc, argv ); + int retVal = RUN_ALL_TESTS(); +#else diff_match_patch_test dmp_test; std::cerr << "Starting diff_match_patch unit tests.\n"; - dmp_test.run_all_tests(); + int retVal = dmp_test.run_all_tests(); std::cerr << "Done.\n"; - return 0; +#endif + return retVal; } static wchar_t kZero{ 0 }; @@ -40,25 +49,26 @@ diff_match_patch_test::diff_match_patch_test() { } -void diff_match_patch_test::runTest( std::function< void() > test ) +#ifndef USE_GTEST +bool diff_match_patch_test::runTest( std::function< void() > test ) { + bool retVal = false; try { test(); numPassedTests++; + retVal = true; } - //catch ( const char *msg ) - //{ - // std::cerr << "Test failed: " << msg << "\n"; - //} catch ( std::string msg ) { std::cerr << "Test failed: " << msg << "\n"; numFailedTests++; + retVal = false; } + return retVal; } -void diff_match_patch_test::run_all_tests() +int diff_match_patch_test::run_all_tests() { auto startTime = std::chrono::high_resolution_clock::now(); @@ -99,11 +109,13 @@ void diff_match_patch_test::run_all_tests() auto endTime = std::chrono::high_resolution_clock::now(); auto elapsed = std::chrono::duration_cast< std::chrono::milliseconds >( endTime - startTime ).count(); std::wcout << "Total time: " << elapsed << " ms\n"; + return ( numFailedTests == 0 ) ? 0 : 1; } +#endif // DIFF TEST FUNCTIONS -void diff_match_patch_test::testDiffCommonPrefix() +TEST_F( diff_match_patch_test, testDiffCommonPrefix ) { // Detect any common prefix. assertEquals( "diff_commonPrefix: nullptr case.", 0, dmp.diff_commonPrefix( "abc", "xyz" ) ); @@ -113,7 +125,7 @@ void diff_match_patch_test::testDiffCommonPrefix() assertEquals( "diff_commonPrefix: Whole case.", 4, dmp.diff_commonPrefix( "1234", "1234xyz" ) ); } -void diff_match_patch_test::testDiffCommonSuffix() +TEST_F( diff_match_patch_test, testDiffCommonSuffix ) { // Detect any common suffix. assertEquals( "diff_commonSuffix: nullptr case.", 0, dmp.diff_commonSuffix( "abc", "xyz" ) ); @@ -123,7 +135,7 @@ void diff_match_patch_test::testDiffCommonSuffix() assertEquals( "diff_commonSuffix: Whole case.", 4, dmp.diff_commonSuffix( "1234", "xyz1234" ) ); } -void diff_match_patch_test::testDiffCommonOverlap() +TEST_F( diff_match_patch_test, testDiffCommonOverlap ) { // Detect any suffix/prefix overlap. assertEquals( "diff_commonOverlap: nullptr case.", 0, dmp.diff_commonOverlap( "", "abcd" ) ); @@ -139,7 +151,7 @@ void diff_match_patch_test::testDiffCommonOverlap() assertEquals( "diff_commonOverlap: Unicode.", 0, dmp.diff_commonOverlap( L"fi", std::wstring( L"\ufb01i" ) ) ); } -void diff_match_patch_test::testDiffHalfmatch() +TEST_F( diff_match_patch_test, testDiffHalfmatch ) { // Detect a halfmatch. dmp.Diff_Timeout = 1; @@ -147,28 +159,28 @@ void diff_match_patch_test::testDiffHalfmatch() assertEmpty( "diff_halfMatch: No match #2.", dmp.diff_halfMatch( "12345", "23" ) ); - assertEquals( "diff_halfMatch: Single Match #1.", { L"12", L"90", L"a", L"z", L"345678" }, dmp.diff_halfMatch( "1234567890", "a345678z" ) ); + assertEquals( "diff_halfMatch: Single Match #1.", TStringVector( { L"12", L"90", L"a", L"z", L"345678" } ), dmp.diff_halfMatch( "1234567890", "a345678z" ) ); - assertEquals( "diff_halfMatch: Single Match #2.", { L"a", L"z", L"12", L"90", L"345678" }, dmp.diff_halfMatch( "a345678z", "1234567890" ) ); + assertEquals( "diff_halfMatch: Single Match #2.", TStringVector( { L"a", L"z", L"12", L"90", L"345678" } ), dmp.diff_halfMatch( "a345678z", "1234567890" ) ); - assertEquals( "diff_halfMatch: Single Match #3.", { L"abc", L"z", L"1234", L"0", L"56789" }, dmp.diff_halfMatch( "abc56789z", "1234567890" ) ); + assertEquals( "diff_halfMatch: Single Match #3.", TStringVector( { L"abc", L"z", L"1234", L"0", L"56789" } ), dmp.diff_halfMatch( "abc56789z", "1234567890" ) ); - assertEquals( "diff_halfMatch: Single Match #4.", { L"a", L"xyz", L"1", L"7890", L"23456" }, dmp.diff_halfMatch( "a23456xyz", "1234567890" ) ); + assertEquals( "diff_halfMatch: Single Match #4.", TStringVector( { L"a", L"xyz", L"1", L"7890", L"23456" } ), dmp.diff_halfMatch( "a23456xyz", "1234567890" ) ); - assertEquals( "diff_halfMatch: Multiple Matches #1.", { L"12123", L"123121", L"a", L"z", L"1234123451234" }, dmp.diff_halfMatch( "121231234123451234123121", "a1234123451234z" ) ); + assertEquals( "diff_halfMatch: Multiple Matches #1.", TStringVector( { L"12123", L"123121", L"a", L"z", L"1234123451234" } ), dmp.diff_halfMatch( "121231234123451234123121", "a1234123451234z" ) ); - assertEquals( "diff_halfMatch: Multiple Matches #2.", { L"", L"-=-=-=-=-=", L"x", L"", L"x-=-=-=-=-=-=-=" }, dmp.diff_halfMatch( "x-=-=-=-=-=-=-=-=-=-=-=-=", "xx-=-=-=-=-=-=-=" ) ); + assertEquals( "diff_halfMatch: Multiple Matches #2.", TStringVector( { L"", L"-=-=-=-=-=", L"x", L"", L"x-=-=-=-=-=-=-=" } ), dmp.diff_halfMatch( "x-=-=-=-=-=-=-=-=-=-=-=-=", "xx-=-=-=-=-=-=-=" ) ); - assertEquals( "diff_halfMatch: Multiple Matches #3.", { L"-=-=-=-=-=", L"", L"", L"y", L"-=-=-=-=-=-=-=y" }, dmp.diff_halfMatch( "-=-=-=-=-=-=-=-=-=-=-=-=y", "-=-=-=-=-=-=-=yy" ) ); + assertEquals( "diff_halfMatch: Multiple Matches #3.", TStringVector( { L"-=-=-=-=-=", L"", L"", L"y", L"-=-=-=-=-=-=-=y" } ), dmp.diff_halfMatch( "-=-=-=-=-=-=-=-=-=-=-=-=y", "-=-=-=-=-=-=-=yy" ) ); // Optimal diff would be -q+x=H-i+e=lloHe+Hu=llo-Hew+y not -qHillo+x=HelloHe-w+Hulloy - assertEquals( "diff_halfMatch: Non-optimal halfmatch.", { L"qHillo", L"w", L"x", L"Hulloy", L"HelloHe" }, dmp.diff_halfMatch( "qHilloHelloHew", "xHelloHeHulloy" ) ); + assertEquals( "diff_halfMatch: Non-optimal halfmatch.", TStringVector( { L"qHillo", L"w", L"x", L"Hulloy", L"HelloHe" } ), dmp.diff_halfMatch( "qHilloHelloHew", "xHelloHeHulloy" ) ); dmp.Diff_Timeout = 0; assertEmpty( "diff_halfMatch: Optimal no halfmatch.", dmp.diff_halfMatch( L"qHilloHelloHew", L"xHelloHeHulloy" ) ); } -void diff_match_patch_test::testDiffLinesToChars() +TEST_F( diff_match_patch_test, testDiffLinesToChars ) { // Convert lines down to characters. TStringVector tmpVector = TStringVector( { L"", L"alpha\n", L"beta\n" } ); @@ -220,7 +232,7 @@ void diff_match_patch_test::testDiffLinesToChars() assertEquals( "diff_linesToChars: More than 256.", tmpVarList, dmp.diff_linesToChars( lines, {} ) ); } -void diff_match_patch_test::testDiffCharsToLines() +TEST_F( diff_match_patch_test, testDiffCharsToLines ) { // First check that Diff equality works. assertTrue( "diff_charsToLines:", Diff( EQUAL, "a" ) == Diff( EQUAL, "a" ) ); @@ -236,7 +248,7 @@ void diff_match_patch_test::testDiffCharsToLines() tmpVector.emplace_back( L"alpha\n" ); tmpVector.emplace_back( L"beta\n" ); dmp.diff_charsToLines( diffs, tmpVector ); - assertEquals( "diff_charsToLines:", { Diff( EQUAL, "alpha\nbeta\nalpha\n" ), Diff( INSERT, "beta\nalpha\nbeta\n" ) }, diffs ); + assertEquals( "diff_charsToLines:", TDiffVector( { Diff( EQUAL, "alpha\nbeta\nalpha\n" ), Diff( INSERT, "beta\nalpha\nbeta\n" ) } ), diffs ); // More than 256 to reveal any 8-bit limitations. int n = 300; @@ -255,246 +267,213 @@ void diff_match_patch_test::testDiffCharsToLines() tmpVector.emplace( tmpVector.begin(), L"" ); diffs = { Diff( DELETE, chars ) }; dmp.diff_charsToLines( diffs, tmpVector ); - assertEquals( "diff_charsToLines: More than 256.", { Diff( DELETE, lines ) }, diffs ); + assertEquals( "diff_charsToLines: More than 256.", TDiffVector( { Diff( DELETE, lines ) } ), diffs ); } -void diff_match_patch_test::testDiffCleanupMerge() +TEST_F( diff_match_patch_test, testDiffCleanupMerge ) { // Cleanup a messy diff. TDiffVector diffs; dmp.diff_cleanupMerge( diffs ); - assertEquals( "diff_cleanupMerge: nullptr case.", {}, diffs ); + assertEquals( "diff_cleanupMerge: nullptr case.", TDiffVector(), diffs ); diffs = { Diff( EQUAL, "a" ), Diff( DELETE, "b" ), Diff( INSERT, "c" ) }; dmp.diff_cleanupMerge( diffs ); - assertEquals( "diff_cleanupMerge: No change case.", { Diff( EQUAL, "a" ), Diff( DELETE, "b" ), Diff( INSERT, "c" ) }, diffs ); + assertEquals( "diff_cleanupMerge: No change case.", TDiffVector( { Diff( EQUAL, "a" ), Diff( DELETE, "b" ), Diff( INSERT, "c" ) } ), diffs ); diffs = { Diff( EQUAL, "a" ), Diff( EQUAL, "b" ), Diff( EQUAL, "c" ) }; dmp.diff_cleanupMerge( diffs ); - assertEquals( "diff_cleanupMerge: Merge equalities.", { Diff( EQUAL, "abc" ) }, diffs ); + assertEquals( "diff_cleanupMerge: Merge equalities.", TDiffVector( { Diff( EQUAL, "abc" ) } ), diffs ); diffs = { Diff( DELETE, "a" ), Diff( DELETE, "b" ), Diff( DELETE, "c" ) }; dmp.diff_cleanupMerge( diffs ); - assertEquals( "diff_cleanupMerge: Merge deletions.", { Diff( DELETE, "abc" ) }, diffs ); + assertEquals( "diff_cleanupMerge: Merge deletions.", TDiffVector( { Diff( DELETE, "abc" ) } ), diffs ); diffs = { Diff( INSERT, "a" ), Diff( INSERT, "b" ), Diff( INSERT, "c" ) }; dmp.diff_cleanupMerge( diffs ); - assertEquals( "diff_cleanupMerge: Merge insertions.", { Diff( INSERT, "abc" ) }, diffs ); + assertEquals( "diff_cleanupMerge: Merge insertions.", TDiffVector( { Diff( INSERT, "abc" ) } ), diffs ); diffs = { Diff( DELETE, "a" ), Diff( INSERT, "b" ), Diff( DELETE, "c" ), Diff( INSERT, "d" ), Diff( EQUAL, "e" ), Diff( EQUAL, "f" ) }; dmp.diff_cleanupMerge( diffs ); - assertEquals( "diff_cleanupMerge: Merge interweave.", { Diff( DELETE, "ac" ), Diff( INSERT, "bd" ), Diff( EQUAL, "ef" ) }, diffs ); + assertEquals( "diff_cleanupMerge: Merge interweave.", TDiffVector( { Diff( DELETE, "ac" ), Diff( INSERT, "bd" ), Diff( EQUAL, "ef" ) } ), diffs ); diffs = { Diff( DELETE, "a" ), Diff( INSERT, "abc" ), Diff( DELETE, "dc" ) }; dmp.diff_cleanupMerge( diffs ); - assertEquals( "diff_cleanupMerge: Prefix and suffix detection.", { Diff( EQUAL, "a" ), Diff( DELETE, "d" ), Diff( INSERT, "b" ), Diff( EQUAL, "c" ) }, diffs ); + assertEquals( "diff_cleanupMerge: Prefix and suffix detection.", TDiffVector( { Diff( EQUAL, "a" ), Diff( DELETE, "d" ), Diff( INSERT, "b" ), Diff( EQUAL, "c" ) } ), diffs ); diffs = { Diff( EQUAL, "x" ), Diff( DELETE, "a" ), Diff( INSERT, "abc" ), Diff( DELETE, "dc" ), Diff( EQUAL, "y" ) }; dmp.diff_cleanupMerge( diffs ); - assertEquals( "diff_cleanupMerge: Prefix and suffix detection with equalities.", { Diff( EQUAL, "xa" ), Diff( DELETE, "d" ), Diff( INSERT, "b" ), Diff( EQUAL, "cy" ) }, diffs ); + assertEquals( "diff_cleanupMerge: Prefix and suffix detection with equalities.", TDiffVector( { Diff( EQUAL, "xa" ), Diff( DELETE, "d" ), Diff( INSERT, "b" ), Diff( EQUAL, "cy" ) } ), diffs ); diffs = { Diff( EQUAL, "a" ), Diff( INSERT, "ba" ), Diff( EQUAL, "c" ) }; dmp.diff_cleanupMerge( diffs ); - assertEquals( "diff_cleanupMerge: Slide edit left.", { Diff( INSERT, "ab" ), Diff( EQUAL, "ac" ) }, diffs ); + assertEquals( "diff_cleanupMerge: Slide edit left.", TDiffVector( { Diff( INSERT, "ab" ), Diff( EQUAL, "ac" ) } ), diffs ); diffs = { Diff( EQUAL, "c" ), Diff( INSERT, "ab" ), Diff( EQUAL, "a" ) }; dmp.diff_cleanupMerge( diffs ); - assertEquals( "diff_cleanupMerge: Slide edit right.", { Diff( EQUAL, "ca" ), Diff( INSERT, "ba" ) }, diffs ); + assertEquals( "diff_cleanupMerge: Slide edit right.", TDiffVector( { Diff( EQUAL, "ca" ), Diff( INSERT, "ba" ) } ), diffs ); diffs = { Diff( EQUAL, "a" ), Diff( DELETE, "b" ), Diff( EQUAL, "c" ), Diff( DELETE, "ac" ), Diff( EQUAL, "x" ) }; dmp.diff_cleanupMerge( diffs ); - assertEquals( "diff_cleanupMerge: Slide edit left recursive.", { Diff( DELETE, "abc" ), Diff( EQUAL, "acx" ) }, diffs ); + assertEquals( "diff_cleanupMerge: Slide edit left recursive.", TDiffVector( { Diff( DELETE, "abc" ), Diff( EQUAL, "acx" ) } ), diffs ); diffs = { Diff( EQUAL, "x" ), Diff( DELETE, "ca" ), Diff( EQUAL, "c" ), Diff( DELETE, "b" ), Diff( EQUAL, "a" ) }; dmp.diff_cleanupMerge( diffs ); - assertEquals( "diff_cleanupMerge: Slide edit right recursive.", { Diff( EQUAL, "xca" ), Diff( DELETE, "cba" ) }, diffs ); + assertEquals( "diff_cleanupMerge: Slide edit right recursive.", TDiffVector( { Diff( EQUAL, "xca" ), Diff( DELETE, "cba" ) } ), diffs ); } -void diff_match_patch_test::testDiffCleanupSemanticLossless() +TEST_F( diff_match_patch_test, testDiffCleanupSemanticLossless ) { // Slide diffs to match logical boundaries. auto diffs = TDiffVector(); dmp.diff_cleanupSemanticLossless( diffs ); - assertEquals( "diff_cleanupSemantic: nullptr case.", {}, diffs ); + assertEquals( "diff_cleanupSemantic: nullptr case.", TDiffVector(), diffs ); diffs = { Diff( EQUAL, "AAA\r\n\r\nBBB" ), Diff( INSERT, "\r\nDDD\r\n\r\nBBB" ), Diff( EQUAL, "\r\nEEE" ) }; dmp.diff_cleanupSemanticLossless( diffs ); - assertEquals( "diff_cleanupSemanticLossless: Blank lines.", { Diff( EQUAL, "AAA\r\n\r\n" ), Diff( INSERT, "BBB\r\nDDD\r\n\r\n" ), Diff( EQUAL, "BBB\r\nEEE" ) }, diffs ); + assertEquals( "diff_cleanupSemanticLossless: Blank lines.", TDiffVector( { Diff( EQUAL, "AAA\r\n\r\n" ), Diff( INSERT, "BBB\r\nDDD\r\n\r\n" ), Diff( EQUAL, "BBB\r\nEEE" ) } ), diffs ); diffs = { Diff( EQUAL, "AAA\r\nBBB" ), Diff( INSERT, " DDD\r\nBBB" ), Diff( EQUAL, " EEE" ) }; dmp.diff_cleanupSemanticLossless( diffs ); - assertEquals( "diff_cleanupSemanticLossless: Line boundaries.", { Diff( EQUAL, "AAA\r\n" ), Diff( INSERT, "BBB DDD\r\n" ), Diff( EQUAL, "BBB EEE" ) }, diffs ); + assertEquals( "diff_cleanupSemanticLossless: Line boundaries.", TDiffVector( { Diff( EQUAL, "AAA\r\n" ), Diff( INSERT, "BBB DDD\r\n" ), Diff( EQUAL, "BBB EEE" ) } ), diffs ); diffs = { Diff( EQUAL, "The c" ), Diff( INSERT, "ow and the c" ), Diff( EQUAL, "at." ) }; dmp.diff_cleanupSemanticLossless( diffs ); - assertEquals( "diff_cleanupSemantic: Word boundaries.", { Diff( EQUAL, "The " ), Diff( INSERT, "cow and the " ), Diff( EQUAL, "cat." ) }, diffs ); + assertEquals( "diff_cleanupSemantic: Word boundaries.", TDiffVector( { Diff( EQUAL, "The " ), Diff( INSERT, "cow and the " ), Diff( EQUAL, "cat." ) } ), diffs ); diffs = { Diff( EQUAL, "The-c" ), Diff( INSERT, "ow-and-the-c" ), Diff( EQUAL, "at." ) }; dmp.diff_cleanupSemanticLossless( diffs ); - assertEquals( "diff_cleanupSemantic: Alphanumeric boundaries.", { Diff( EQUAL, "The-" ), Diff( INSERT, "cow-and-the-" ), Diff( EQUAL, "cat." ) }, diffs ); + assertEquals( "diff_cleanupSemantic: Alphanumeric boundaries.", TDiffVector( { Diff( EQUAL, "The-" ), Diff( INSERT, "cow-and-the-" ), Diff( EQUAL, "cat." ) } ), diffs ); diffs = { Diff( EQUAL, "a" ), Diff( DELETE, "a" ), Diff( EQUAL, "ax" ) }; dmp.diff_cleanupSemanticLossless( diffs ); - assertEquals( "diff_cleanupSemantic: Hitting the start.", { Diff( DELETE, "a" ), Diff( EQUAL, "aax" ) }, diffs ); + assertEquals( "diff_cleanupSemantic: Hitting the start.", TDiffVector( { Diff( DELETE, "a" ), Diff( EQUAL, "aax" ) } ), diffs ); diffs = { Diff( EQUAL, "xa" ), Diff( DELETE, "a" ), Diff( EQUAL, "a" ) }; dmp.diff_cleanupSemanticLossless( diffs ); - assertEquals( "diff_cleanupSemantic: Hitting the end.", { Diff( EQUAL, "xaa" ), Diff( DELETE, "a" ) }, diffs ); + assertEquals( "diff_cleanupSemantic: Hitting the end.", TDiffVector( { Diff( EQUAL, "xaa" ), Diff( DELETE, "a" ) } ), diffs ); diffs = { Diff( EQUAL, "The xxx. The " ), Diff( INSERT, "zzz. The " ), Diff( EQUAL, "yyy." ) }; dmp.diff_cleanupSemanticLossless( diffs ); - assertEquals( "diff_cleanupSemantic: Sentence boundaries.", { Diff( EQUAL, "The xxx." ), Diff( INSERT, " The zzz." ), Diff( EQUAL, " The yyy." ) }, diffs ); + assertEquals( "diff_cleanupSemantic: Sentence boundaries.", TDiffVector( { Diff( EQUAL, "The xxx." ), Diff( INSERT, " The zzz." ), Diff( EQUAL, " The yyy." ) } ), diffs ); } -void diff_match_patch_test::testDiffCleanupSemantic() +TEST_F( diff_match_patch_test, testDiffCleanupSemantic ) { // Cleanup semantically trivial equalities. auto diffs = TDiffVector(); dmp.diff_cleanupSemantic( diffs ); - assertEquals( "diff_cleanupSemantic: nullptr case.", {}, diffs ); + assertEquals( "diff_cleanupSemantic: nullptr case.", TDiffVector(), diffs ); diffs = { Diff( DELETE, "ab" ), Diff( INSERT, "cd" ), Diff( EQUAL, "12" ), Diff( DELETE, "e" ) }; dmp.diff_cleanupSemantic( diffs ); - assertEquals( "diff_cleanupSemantic: No elimination #1.", { Diff( DELETE, "ab" ), Diff( INSERT, "cd" ), Diff( EQUAL, "12" ), Diff( DELETE, "e" ) }, diffs ); + assertEquals( "diff_cleanupSemantic: No elimination #1.", TDiffVector( { Diff( DELETE, "ab" ), Diff( INSERT, "cd" ), Diff( EQUAL, "12" ), Diff( DELETE, "e" ) } ), diffs ); diffs = { Diff( DELETE, "abc" ), Diff( INSERT, "ABC" ), Diff( EQUAL, "1234" ), Diff( DELETE, "wxyz" ) }; dmp.diff_cleanupSemantic( diffs ); - assertEquals( "diff_cleanupSemantic: No elimination #2.", { Diff( DELETE, "abc" ), Diff( INSERT, "ABC" ), Diff( EQUAL, "1234" ), Diff( DELETE, "wxyz" ) }, diffs ); + assertEquals( "diff_cleanupSemantic: No elimination #2.", TDiffVector( { Diff( DELETE, "abc" ), Diff( INSERT, "ABC" ), Diff( EQUAL, "1234" ), Diff( DELETE, "wxyz" ) } ), diffs ); diffs = { Diff( DELETE, "a" ), Diff( EQUAL, "b" ), Diff( DELETE, "c" ) }; dmp.diff_cleanupSemantic( diffs ); - assertEquals( "diff_cleanupSemantic: Simple elimination.", { Diff( DELETE, "abc" ), Diff( INSERT, "b" ) }, diffs ); + assertEquals( "diff_cleanupSemantic: Simple elimination.", TDiffVector( { Diff( DELETE, "abc" ), Diff( INSERT, "b" ) } ), diffs ); diffs = { Diff( DELETE, "ab" ), Diff( EQUAL, "cd" ), Diff( DELETE, "e" ), Diff( EQUAL, "f" ), Diff( INSERT, "g" ) }; dmp.diff_cleanupSemantic( diffs ); - assertEquals( "diff_cleanupSemantic: Backpass elimination.", { Diff( DELETE, "abcdef" ), Diff( INSERT, "cdfg" ) }, diffs ); + assertEquals( "diff_cleanupSemantic: Backpass elimination.", TDiffVector( { Diff( DELETE, "abcdef" ), Diff( INSERT, "cdfg" ) } ), diffs ); - diffs = { Diff( INSERT, "1" ), Diff( EQUAL, "a" ), Diff( DELETE, "b" ), Diff( INSERT, "2" ), Diff( EQUAL, "_" ), Diff( INSERT, "1" ), Diff( EQUAL, "a" ), Diff( DELETE, "b" ), Diff( INSERT, "2" ) }; + diffs = { Diff( INSERT, "1" ), Diff( EQUAL, "A" ), Diff( DELETE, "B" ), Diff( INSERT, "2" ), Diff( EQUAL, "_" ), Diff( INSERT, "1" ), Diff( EQUAL, "A" ), Diff( DELETE, "B" ), Diff( INSERT, "2" ) }; dmp.diff_cleanupSemantic( diffs ); - assertEquals( "diff_cleanupSemantic: Multiple elimination.", { Diff( DELETE, "AB_AB" ), Diff( INSERT, "1A2_1A2" ) }, diffs ); + assertEquals( "diff_cleanupSemantic: Multiple elimination.", TDiffVector( { Diff( DELETE, "AB_AB" ), Diff( INSERT, "1A2_1A2" ) } ), diffs ); diffs = { Diff( EQUAL, "The c" ), Diff( DELETE, "ow and the c" ), Diff( EQUAL, "at." ) }; dmp.diff_cleanupSemantic( diffs ); - assertEquals( "diff_cleanupSemantic: Word boundaries.", { Diff( EQUAL, "The " ), Diff( DELETE, "cow and the " ), Diff( EQUAL, "cat." ) }, diffs ); + assertEquals( "diff_cleanupSemantic: Word boundaries.", TDiffVector( { Diff( EQUAL, "The " ), Diff( DELETE, "cow and the " ), Diff( EQUAL, "cat." ) } ), diffs ); diffs = { Diff( DELETE, "abcxx" ), Diff( INSERT, "xxdef" ) }; dmp.diff_cleanupSemantic( diffs ); - assertEquals( "diff_cleanupSemantic: No overlap elimination.", { Diff( DELETE, "abcxx" ), Diff( INSERT, "xxdef" ) }, diffs ); + assertEquals( "diff_cleanupSemantic: No overlap elimination.", TDiffVector( { Diff( DELETE, "abcxx" ), Diff( INSERT, "xxdef" ) } ), diffs ); diffs = { Diff( DELETE, "abcxxx" ), Diff( INSERT, "xxxdef" ) }; dmp.diff_cleanupSemantic( diffs ); - assertEquals( "diff_cleanupSemantic: Overlap elimination.", { Diff( DELETE, "abc" ), Diff( EQUAL, "xxx" ), Diff( INSERT, "def" ) }, diffs ); + assertEquals( "diff_cleanupSemantic: Overlap elimination.", TDiffVector( { Diff( DELETE, "abc" ), Diff( EQUAL, "xxx" ), Diff( INSERT, "def" ) } ), diffs ); diffs = { Diff( DELETE, "xxxabc" ), Diff( INSERT, "defxxx" ) }; dmp.diff_cleanupSemantic( diffs ); - assertEquals( "diff_cleanupSemantic: Reverse overlap elimination.", { Diff( INSERT, "def" ), Diff( EQUAL, "xxx" ), Diff( DELETE, "abc" ) }, diffs ); + assertEquals( "diff_cleanupSemantic: Reverse overlap elimination.", TDiffVector( { Diff( INSERT, "def" ), Diff( EQUAL, "xxx" ), Diff( DELETE, "abc" ) } ), diffs ); diffs = { Diff( DELETE, "abcd1212" ), Diff( INSERT, "1212efghi" ), Diff( EQUAL, "----" ), Diff( DELETE, "A3" ), Diff( INSERT, "3BC" ) }; dmp.diff_cleanupSemantic( diffs ); - assertEquals( "diff_cleanupSemantic: Two overlap eliminations.", { Diff( DELETE, "abcd" ), Diff( EQUAL, "1212" ), Diff( INSERT, "efghi" ), Diff( EQUAL, "----" ), Diff( DELETE, "a" ), Diff( EQUAL, "3" ), Diff( INSERT, "BC" ) }, diffs ); + assertEquals( "diff_cleanupSemantic: Two overlap eliminations.", TDiffVector( { Diff( DELETE, "abcd" ), Diff( EQUAL, "1212" ), Diff( INSERT, "efghi" ), Diff( EQUAL, "----" ), Diff( DELETE, "A" ), Diff( EQUAL, "3" ), Diff( INSERT, "BC" ) } ), diffs ); } -void diff_match_patch_test::testDiffCleanupEfficiency() +TEST_F( diff_match_patch_test, testDiffCleanupEfficiency ) { // Cleanup operationally trivial equalities. dmp.Diff_EditCost = 4; auto diffs = TDiffVector(); dmp.diff_cleanupEfficiency( diffs ); - assertEquals( "diff_cleanupEfficiency: nullptr case.", {}, diffs ); + assertEquals( "diff_cleanupEfficiency: nullptr case.", TDiffVector(), diffs ); diffs = { Diff( DELETE, "ab" ), Diff( INSERT, "12" ), Diff( EQUAL, "wxyz" ), Diff( DELETE, "cd" ), Diff( INSERT, "34" ) }; dmp.diff_cleanupEfficiency( diffs ); - assertEquals( "diff_cleanupEfficiency: No elimination.", { Diff( DELETE, "ab" ), Diff( INSERT, "12" ), Diff( EQUAL, "wxyz" ), Diff( DELETE, "cd" ), Diff( INSERT, "34" ) }, diffs ); + assertEquals( "diff_cleanupEfficiency: No elimination.", TDiffVector( { Diff( DELETE, "ab" ), Diff( INSERT, "12" ), Diff( EQUAL, "wxyz" ), Diff( DELETE, "cd" ), Diff( INSERT, "34" ) } ), diffs ); diffs = { Diff( DELETE, "ab" ), Diff( INSERT, "12" ), Diff( EQUAL, "xyz" ), Diff( DELETE, "cd" ), Diff( INSERT, "34" ) }; dmp.diff_cleanupEfficiency( diffs ); - assertEquals( "diff_cleanupEfficiency: Four-edit elimination.", { Diff( DELETE, "abxyzcd" ), Diff( INSERT, "12xyz34" ) }, diffs ); + assertEquals( "diff_cleanupEfficiency: Four-edit elimination.", TDiffVector( { Diff( DELETE, "abxyzcd" ), Diff( INSERT, "12xyz34" ) } ), diffs ); diffs = { Diff( INSERT, "12" ), Diff( EQUAL, "x" ), Diff( DELETE, "cd" ), Diff( INSERT, "34" ) }; dmp.diff_cleanupEfficiency( diffs ); - assertEquals( "diff_cleanupEfficiency: Three-edit elimination.", { Diff( DELETE, "xcd" ), Diff( INSERT, "12x34" ) }, diffs ); + assertEquals( "diff_cleanupEfficiency: Three-edit elimination.", TDiffVector( { Diff( DELETE, "xcd" ), Diff( INSERT, "12x34" ) } ), diffs ); diffs = { Diff( DELETE, "ab" ), Diff( INSERT, "12" ), Diff( EQUAL, "xy" ), Diff( INSERT, "34" ), Diff( EQUAL, "z" ), Diff( DELETE, "cd" ), Diff( INSERT, "56" ) }; dmp.diff_cleanupEfficiency( diffs ); - assertEquals( "diff_cleanupEfficiency: Backpass elimination.", { Diff( DELETE, "abxyzcd" ), Diff( INSERT, "12xy34z56" ) }, diffs ); + assertEquals( "diff_cleanupEfficiency: Backpass elimination.", TDiffVector( { Diff( DELETE, "abxyzcd" ), Diff( INSERT, "12xy34z56" ) } ), diffs ); dmp.Diff_EditCost = 5; diffs = { Diff( DELETE, "ab" ), Diff( INSERT, "12" ), Diff( EQUAL, "wxyz" ), Diff( DELETE, "cd" ), Diff( INSERT, "34" ) }; dmp.diff_cleanupEfficiency( diffs ); - assertEquals( "diff_cleanupEfficiency: High cost elimination.", { Diff( DELETE, "abwxyzcd" ), Diff( INSERT, "12wxyz34" ) }, diffs ); + assertEquals( "diff_cleanupEfficiency: High cost elimination.", TDiffVector( { Diff( DELETE, "abwxyzcd" ), Diff( INSERT, "12wxyz34" ) } ), diffs ); dmp.Diff_EditCost = 4; } -void diff_match_patch_test::testDiffPrettyHtml() +TEST_F( diff_match_patch_test, testDiffPrettyHtml ) { // Pretty print. auto diffs = TDiffVector( { Diff( EQUAL, "a\n" ), Diff( DELETE, "b" ), Diff( INSERT, "c&d" ) } ); - assertEquals( "diff_prettyHtml:", "
<B>b</B>c&d", dmp.diff_prettyHtml( diffs ) ); + assertEquals( "diff_prettyHtml:", L"
<B>b</B>c&d", dmp.diff_prettyHtml( diffs ) ); } -void diff_match_patch_test::testDiffText() +TEST_F( diff_match_patch_test, testDiffText ) { // Compute the source and destination texts. - auto diffs = TDiffVector( { Diff( EQUAL, "jump" ), Diff( DELETE, "s" ), Diff( INSERT, "ed" ), Diff( EQUAL, " over " ), Diff( DELETE, "the" ), Diff( INSERT, "a" ), Diff( EQUAL, " lazy" ) } ); - assertEquals( "diff_text1:", "jumps over the lazy", dmp.diff_text1( diffs ) ); - assertEquals( "diff_text2:", "jumped over a lazy", dmp.diff_text2( diffs ) ); + auto diffs = { Diff( EQUAL, "jump" ), Diff( DELETE, "s" ), Diff( INSERT, "ed" ), Diff( EQUAL, " over " ), Diff( DELETE, "the" ), Diff( INSERT, "a" ), Diff( EQUAL, " lazy" ) }; + assertEquals( "diff_text1:", L"jumps over the lazy", dmp.diff_text1( diffs ) ); + assertEquals( "diff_text2:", L"jumped over a lazy", dmp.diff_text2( diffs ) ); } -void diff_match_patch_test::testDiffDelta() +TEST_F( diff_match_patch_test, testDiffDelta ) { // Convert a diff into delta string. auto diffs = TDiffVector( { Diff( EQUAL, "jump" ), Diff( DELETE, "s" ), Diff( INSERT, "ed" ), Diff( EQUAL, " over " ), Diff( DELETE, "the" ), Diff( INSERT, "a" ), Diff( EQUAL, " lazy" ), Diff( INSERT, "old dog" ) } ); std::wstring text1 = dmp.diff_text1( diffs ); - assertEquals( "diff_text1: Base text.", "jumps over the lazy", text1 ); + assertEquals( "diff_text1: Base text.", L"jumps over the lazy", text1 ); std::wstring delta = dmp.diff_toDelta( diffs ); std::wstring golden = L"=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog"; - assertEquals( "diff_toDelta:", "=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog", delta ); + assertEquals( "diff_toDelta:", L"=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog", delta ); // Convert delta string into a diff. assertEquals( "diff_fromDelta: Normal.", diffs, dmp.diff_fromDelta( text1, delta ) ); // Generates error (19 < 20). - bool exceptionTriggered = false; - try - { - dmp.diff_fromDelta( text1 + L"x", delta ); - assertFalse( "diff_fromDelta: Too long.", true ); - } - catch ( std::wstring ex ) - { - exceptionTriggered = true; - // Exception expected. - } - assertEquals( "diff_fromDelta: Too long - Exception triggered", true, exceptionTriggered ); + assertThrow( "diff_fromDelta: Too long.", dmp.diff_fromDelta( text1 + L"x", delta ), std::wstring ); + // Generates error (19 > 18). + assertThrow( "diff_fromDelta: Too short.", dmp.diff_fromDelta( text1.substr( 1 ), delta ), std::wstring ); - exceptionTriggered = false; - try - { - dmp.diff_fromDelta( text1.substr( 1 ), delta ); - assertFalse( "diff_fromDelta: Too short.", true ); - } - catch ( std::wstring ex ) - { - exceptionTriggered = true; - // Exception expected. - } - assertEquals( "diff_fromDelta: Too short - Exception triggered", true, exceptionTriggered ); // Generates error (%c3%xy invalid Unicode). - // This test does not work because QUrl::fromPercentEncoding("%xy") ->"?" - exceptionTriggered = false; - try - { - dmp.diff_fromDelta( "", "+%c3%xy" ); - assertFalse( "diff_fromDelta: Invalid character.", true ); - } - catch ( std::wstring ex ) - { - exceptionTriggered = true; - // Exception expected. - } - assertEquals( "diff_fromDelta: Invalid character - Exception triggered", true, exceptionTriggered ); + assertThrow( "diff_fromDelta: Invalid character.", dmp.diff_fromDelta( "", "+%c3%xy" ), std::wstring ); // Test deltas with special characters. diffs = { Diff( EQUAL, std::wstring( L"\u0680 " ) + kZero + std::wstring( L" \t %" ) ), Diff( DELETE, std::wstring( L"\u0681 " ) + kOne + std::wstring( L" \n ^" ) ), Diff( INSERT, std::wstring( L"\u0682 " ) + kTwo + std::wstring( L" \\ |" ) ) }; @@ -504,23 +483,23 @@ void diff_match_patch_test::testDiffDelta() assertEquals( "diff_text1: Unicode text", golden, text1 ); delta = dmp.diff_toDelta( diffs ); - assertEquals( "diff_toDelta: Unicode", "=7\t-7\t+%DA%82 %02 %5C %7C", delta ); + assertEquals( "diff_toDelta: Unicode", L"=7\t-7\t+%DA%82 %02 %5C %7C", delta ); assertEquals( "diff_fromDelta: Unicode", diffs, dmp.diff_fromDelta( text1, delta ) ); // Verify pool of unchanged characters. diffs = { Diff( INSERT, "A-Z a-z 0-9 - _ . ! ~ * ' ( ) ; / ? : @ & = + $ , # " ) }; std::wstring text2 = dmp.diff_text2( diffs ); - assertEquals( "diff_text2: Unchanged characters.", "A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", text2 ); + assertEquals( "diff_text2: Unchanged characters.", L"A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", text2 ); delta = dmp.diff_toDelta( diffs ); - assertEquals( "diff_toDelta: Unchanged characters.", "+A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", delta ); + assertEquals( "diff_toDelta: Unchanged characters.", L"+A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", delta ); // Convert delta string into a diff. assertEquals( "diff_fromDelta: Unchanged characters.", diffs, dmp.diff_fromDelta( {}, delta ) ); } -void diff_match_patch_test::testDiffXIndex() +TEST_F( diff_match_patch_test, testDiffXIndex ) { // Translate a location in text1 to text2. auto diffs = TDiffVector( { Diff( DELETE, "a" ), Diff( INSERT, "1234" ), Diff( EQUAL, "xyz" ) } ); @@ -530,7 +509,7 @@ void diff_match_patch_test::testDiffXIndex() assertEquals( "diff_xIndex: Translation on deletion.", 1, dmp.diff_xIndex( diffs, 3 ) ); } -void diff_match_patch_test::testDiffLevenshtein() +TEST_F( diff_match_patch_test, testDiffLevenshtein ) { auto diffs = TDiffVector( { Diff( DELETE, "abc" ), Diff( INSERT, "1234" ), Diff( EQUAL, "xyz" ) } ); assertEquals( "diff_levenshtein: Trailing equality.", 4, dmp.diff_levenshtein( diffs ) ); @@ -542,7 +521,7 @@ void diff_match_patch_test::testDiffLevenshtein() assertEquals( "diff_levenshtein: Middle equality.", 7, dmp.diff_levenshtein( diffs ) ); } -void diff_match_patch_test::testDiffBisect() +TEST_F( diff_match_patch_test, testDiffBisect ) { // Normal. std::wstring a = L"cat"; @@ -559,7 +538,7 @@ void diff_match_patch_test::testDiffBisect() assertEquals( "diff_bisect: Timeout.", diffs, dmp.diff_bisect( a, b, 0 ) ); } -void diff_match_patch_test::testDiffMain() +TEST_F( diff_match_patch_test, testDiffMain ) { // Perform a trivial diff. auto diffs = TDiffVector(); @@ -652,7 +631,7 @@ void diff_match_patch_test::testDiffMain() // MATCH TEST FUNCTIONS -void diff_match_patch_test::testMatchAlphabet() +TEST_F( diff_match_patch_test, testMatchAlphabet ) { // Initialise the bitmasks for Bitap. TCharPosMap bitmask; @@ -668,7 +647,7 @@ void diff_match_patch_test::testMatchAlphabet() assertEquals( "match_alphabet: Duplicates.", bitmask, dmp.match_alphabet( "abcaba" ) ); } -void diff_match_patch_test::testMatchBitap() +TEST_F( diff_match_patch_test, testMatchBitap ) { // Bitap algorithm. dmp.Match_Distance = 100; @@ -714,7 +693,7 @@ void diff_match_patch_test::testMatchBitap() assertEquals( "match_bitap: Distance test #3.", 0, dmp.match_bitap( "abcdefghijklmnopqrstuvwxyz", "abcdefg", 24 ) ); } -void diff_match_patch_test::testMatchMain() +TEST_F( diff_match_patch_test, testMatchMain ) { // Full match. assertEquals( "match_main: Equality.", 0, dmp.match_main( "abcdef", "abcdef", 1000 ) ); @@ -732,7 +711,7 @@ void diff_match_patch_test::testMatchMain() // PATCH TEST FUNCTIONS -void diff_match_patch_test::testPatchObj() +TEST_F( diff_match_patch_test, testPatchObj ) { // Patch Object. Patch p; @@ -745,35 +724,24 @@ void diff_match_patch_test::testPatchObj() assertEquals( "patch: toString.", strp, p.toString() ); } -void diff_match_patch_test::testPatchFromText() +TEST_F( diff_match_patch_test, testPatchFromText ) { assertTrue( "patch_fromText: #0.", dmp.patch_fromText( "" ).empty() ); std::wstring strp = L"@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n"; assertEquals( "patch_fromText: #1.", strp, dmp.patch_fromText( strp )[ 0 ].toString() ); - assertEquals( "patch_fromText: #2.", "@@ -1 +1 @@\n-a\n+b\n", dmp.patch_fromText( "@@ -1 +1 @@\n-a\n+b\n" )[ 0 ].toString() ); + assertEquals( "patch_fromText: #2.", L"@@ -1 +1 @@\n-a\n+b\n", dmp.patch_fromText( "@@ -1 +1 @@\n-a\n+b\n" )[ 0 ].toString() ); - assertEquals( "patch_fromText: #3.", "@@ -1,3 +0,0 @@\n-abc\n", dmp.patch_fromText( "@@ -1,3 +0,0 @@\n-abc\n" )[ 0 ].toString() ); + assertEquals( "patch_fromText: #3.", L"@@ -1,3 +0,0 @@\n-abc\n", dmp.patch_fromText( "@@ -1,3 +0,0 @@\n-abc\n" )[ 0 ].toString() ); - assertEquals( "patch_fromText: #4.", "@@ -0,0 +1,3 @@\n+abc\n", dmp.patch_fromText( "@@ -0,0 +1,3 @@\n+abc\n" )[ 0 ].toString() ); + assertEquals( "patch_fromText: #4.", L"@@ -0,0 +1,3 @@\n+abc\n", dmp.patch_fromText( "@@ -0,0 +1,3 @@\n+abc\n" )[ 0 ].toString() ); // Generates error. - bool exceptionTriggered = false; - try - { - dmp.patch_fromText( "Bad\nPatch\n" ); - assertFalse( "patch_fromText: #5.", true ); - } - catch ( std::wstring ex ) - { - exceptionTriggered = true; - // Exception expected. - } - assertEquals( "patch_fromText: #5 - Exception triggered", true, exceptionTriggered ); + assertThrow( "patch_fromText: #5.", dmp.patch_fromText( "Bad\nPatch\n" ), std::wstring ); } -void diff_match_patch_test::testPatchToText() +TEST_F( diff_match_patch_test, testPatchToText ) { std::wstring strp = L"@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n"; auto patches = dmp.patch_fromText( strp ); @@ -784,31 +752,31 @@ void diff_match_patch_test::testPatchToText() assertEquals( "patch_toText: Dua", strp, dmp.patch_toText( patches ) ); } -void diff_match_patch_test::testPatchAddContext() +TEST_F( diff_match_patch_test, testPatchAddContext ) { dmp.Patch_Margin = 4; auto p = dmp.patch_fromText( "@@ -21,4 +21,10 @@\n-jump\n+somersault\n" )[ 0 ]; dmp.patch_addContext( p, "The quick brown fox jumps over the lazy dog." ); - assertEquals( "patch_addContext: Simple case.", "@@ -17,12 +17,18 @@\n fox \n-jump\n+somersault\n s ov\n", p.toString() ); + assertEquals( "patch_addContext: Simple case.", L"@@ -17,12 +17,18 @@\n fox \n-jump\n+somersault\n s ov\n", p.toString() ); p = dmp.patch_fromText( "@@ -21,4 +21,10 @@\n-jump\n+somersault\n" )[ 0 ]; dmp.patch_addContext( p, "The quick brown fox jumps." ); - assertEquals( "patch_addContext: Not enough trailing context.", "@@ -17,10 +17,16 @@\n fox \n-jump\n+somersault\n s.\n", p.toString() ); + assertEquals( "patch_addContext: Not enough trailing context.", L"@@ -17,10 +17,16 @@\n fox \n-jump\n+somersault\n s.\n", p.toString() ); p = dmp.patch_fromText( "@@ -3 +3,2 @@\n-e\n+at\n" )[ 0 ]; dmp.patch_addContext( p, "The quick brown fox jumps." ); - assertEquals( "patch_addContext: Not enough leading context.", "@@ -1,7 +1,8 @@\n Th\n-e\n+at\n qui\n", p.toString() ); + assertEquals( "patch_addContext: Not enough leading context.", L"@@ -1,7 +1,8 @@\n Th\n-e\n+at\n qui\n", p.toString() ); p = dmp.patch_fromText( "@@ -3 +3,2 @@\n-e\n+at\n" )[ 0 ]; dmp.patch_addContext( p, "The quick brown fox jumps. The quick brown fox crashes." ); - assertEquals( "patch_addContext: Ambiguity.", "@@ -1,27 +1,28 @@\n Th\n-e\n+at\n quick brown fox jumps. \n", p.toString() ); + assertEquals( "patch_addContext: Ambiguity.", L"@@ -1,27 +1,28 @@\n Th\n-e\n+at\n quick brown fox jumps. \n", p.toString() ); } -void diff_match_patch_test::testPatchMake() +TEST_F( diff_match_patch_test, testPatchMake ) { TPatchVector patches; patches = dmp.patch_make( "", "" ); - assertEquals( "patch_make: nullptr case", "", dmp.patch_toText( patches ) ); + assertEquals( "patch_make: nullptr case", L"", dmp.patch_toText( patches ) ); std::wstring text1 = L"The quick brown fox jumps over the lazy dog."; std::wstring text2 = L"That quick brown fox jumped over a lazy dog."; @@ -832,7 +800,7 @@ void diff_match_patch_test::testPatchMake() assertEquals( "patch_make: Text1+Text2+Diff inputs (deprecated)", expectedPatch, dmp.patch_toText( patches ) ); patches = dmp.patch_make( "`1234567890-=[]\\;',./", "~!@#$%^&*()_+{}|:\"<>?" ); - assertEquals( "patch_toText: Character encoding.", "@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n", dmp.patch_toText( patches ) ); + assertEquals( "patch_toText: Character encoding.", L"@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n", dmp.patch_toText( patches ) ); diffs = { Diff( DELETE, "`1234567890-=[]\\;',./" ), Diff( INSERT, "~!@#$%^&*()_+{}|:\"<>?" ) }; assertEquals( "patch_fromText: Character decoding.", diffs, dmp.patch_fromText( "@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n" )[ 0 ].diffs ); @@ -848,13 +816,13 @@ void diff_match_patch_test::testPatchMake() assertEquals( "patch_make: Long string with repeats.", expectedPatch, dmp.patch_toText( patches ) ); } -void diff_match_patch_test::testPatchSplitMax() +TEST_F( diff_match_patch_test, testPatchSplitMax ) { // Confirm Match_MaxBits is 32. TPatchVector patches; patches = dmp.patch_make( "abcdefghijklmnopqrstuvwxyz01234567890", "XabXcdXefXghXijXklXmnXopXqrXstXuvXwxXyzX01X23X45X67X89X0" ); dmp.patch_splitMax( patches ); - assertEquals( "patch_splitMax: #1.", "@@ -1,32 +1,46 @@\n+X\n ab\n+X\n cd\n+X\n ef\n+X\n gh\n+X\n ij\n+X\n kl\n+X\n mn\n+X\n op\n+X\n qr\n+X\n st\n+X\n uv\n+X\n wx\n+X\n yz\n+X\n 012345\n@@ -25,13 +39,18 @@\n zX01\n+X\n 23\n+X\n 45\n+X\n 67\n+X\n 89\n+X\n 0\n", dmp.patch_toText( patches ) ); + assertEquals( "patch_splitMax: #1.", L"@@ -1,32 +1,46 @@\n+X\n ab\n+X\n cd\n+X\n ef\n+X\n gh\n+X\n ij\n+X\n kl\n+X\n mn\n+X\n op\n+X\n qr\n+X\n st\n+X\n uv\n+X\n wx\n+X\n yz\n+X\n 012345\n@@ -25,13 +39,18 @@\n zX01\n+X\n 23\n+X\n 45\n+X\n 67\n+X\n 89\n+X\n 0\n", dmp.patch_toText( patches ) ); patches = dmp.patch_make( "abcdef1234567890123456789012345678901234567890123456789012345678901234567890uvwxyz", "abcdefuvwxyz" ); std::wstring oldToText = dmp.patch_toText( patches ); @@ -863,33 +831,33 @@ void diff_match_patch_test::testPatchSplitMax() patches = dmp.patch_make( "1234567890123456789012345678901234567890123456789012345678901234567890", "abc" ); dmp.patch_splitMax( patches ); - assertEquals( "patch_splitMax: #3.", "@@ -1,32 +1,4 @@\n-1234567890123456789012345678\n 9012\n@@ -29,32 +1,4 @@\n-9012345678901234567890123456\n 7890\n@@ -57,14 +1,3 @@\n-78901234567890\n+abc\n", dmp.patch_toText( patches ) ); + assertEquals( "patch_splitMax: #3.", L"@@ -1,32 +1,4 @@\n-1234567890123456789012345678\n 9012\n@@ -29,32 +1,4 @@\n-9012345678901234567890123456\n 7890\n@@ -57,14 +1,3 @@\n-78901234567890\n+abc\n", dmp.patch_toText( patches ) ); patches = dmp.patch_make( "abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1", "abcdefghij , h : 1 , t : 1 abcdefghij , h : 1 , t : 1 abcdefghij , h : 0 , t : 1" ); dmp.patch_splitMax( patches ); - assertEquals( "patch_splitMax: #4.", "@@ -2,32 +2,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n@@ -29,32 +29,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n", dmp.patch_toText( patches ) ); + assertEquals( "patch_splitMax: #4.", L"@@ -2,32 +2,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n@@ -29,32 +29,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n", dmp.patch_toText( patches ) ); } -void diff_match_patch_test::testPatchAddPadding() +TEST_F( diff_match_patch_test, testPatchAddPadding ) { TPatchVector patches; patches = dmp.patch_make( "", "test" ); - assertEquals( "patch_addPadding: Both edges ful", "@@ -0,0 +1,4 @@\n+test\n", dmp.patch_toText( patches ) ); + assertEquals( "patch_addPadding: Both edges ful", L"@@ -0,0 +1,4 @@\n+test\n", dmp.patch_toText( patches ) ); dmp.patch_addPadding( patches ); - assertEquals( "patch_addPadding: Both edges full.", "@@ -1,8 +1,12 @@\n %01%02%03%04\n+test\n %01%02%03%04\n", dmp.patch_toText( patches ) ); + assertEquals( "patch_addPadding: Both edges full.", L"@@ -1,8 +1,12 @@\n %01%02%03%04\n+test\n %01%02%03%04\n", dmp.patch_toText( patches ) ); patches = dmp.patch_make( "XY", "XtestY" ); - assertEquals( "patch_addPadding: Both edges partial.", "@@ -1,2 +1,6 @@\n X\n+test\n Y\n", dmp.patch_toText( patches ) ); + assertEquals( "patch_addPadding: Both edges partial.", L"@@ -1,2 +1,6 @@\n X\n+test\n Y\n", dmp.patch_toText( patches ) ); dmp.patch_addPadding( patches ); - assertEquals( "patch_addPadding: Both edges partial.", "@@ -2,8 +2,12 @@\n %02%03%04X\n+test\n Y%01%02%03\n", dmp.patch_toText( patches ) ); + assertEquals( "patch_addPadding: Both edges partial.", L"@@ -2,8 +2,12 @@\n %02%03%04X\n+test\n Y%01%02%03\n", dmp.patch_toText( patches ) ); patches = dmp.patch_make( "XXXXYYYY", "XXXXtestYYYY" ); - assertEquals( "patch_addPadding: Both edges none.", "@@ -1,8 +1,12 @@\n XXXX\n+test\n YYYY\n", dmp.patch_toText( patches ) ); + assertEquals( "patch_addPadding: Both edges none.", L"@@ -1,8 +1,12 @@\n XXXX\n+test\n YYYY\n", dmp.patch_toText( patches ) ); dmp.patch_addPadding( patches ); - assertEquals( "patch_addPadding: Both edges none.", "@@ -5,8 +5,12 @@\n XXXX\n+test\n YYYY\n", dmp.patch_toText( patches ) ); + assertEquals( "patch_addPadding: Both edges none.", L"@@ -5,8 +5,12 @@\n XXXX\n+test\n YYYY\n", dmp.patch_toText( patches ) ); } -void diff_match_patch_test::testPatchApply() +TEST_F( diff_match_patch_test, testPatchApply ) { dmp.Match_Distance = 1000; dmp.Match_Threshold = 0.5f; @@ -903,42 +871,42 @@ void diff_match_patch_test::testPatchApply() assertEquals( "patch_apply: nullptr case.", L"Hello world.\t0", resultStr ); patches = dmp.patch_make( "The quick brown fox jumps over the lazy dog.", "That quick brown fox jumped over a lazy dog." ); - assertEquals( "patch_apply: Exact match.", "@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n", dmp.patch_toText( patches ) ); + assertEquals( "patch_apply: Exact match.", L"@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n", dmp.patch_toText( patches ) ); results = dmp.patch_apply( patches, "The quick brown fox jumps over the lazy dog." ); boolArray = results.second; resultStr = results.first + NUtils::to_wstring( boolArray ); - assertEquals( "patch_apply: Exact match.", "That quick brown fox jumped over a lazy dog.\ttrue\ttrue", resultStr ); + assertEquals( "patch_apply: Exact match.", L"That quick brown fox jumped over a lazy dog.\ttrue\ttrue", resultStr ); results = dmp.patch_apply( patches, "The quick red rabbit jumps over the tired tiger." ); boolArray = results.second; resultStr = results.first + NUtils::to_wstring( boolArray ); - assertEquals( "patch_apply: Partial match.", "That quick red rabbit jumped over a tired tiger.\ttrue\ttrue", resultStr ); + assertEquals( "patch_apply: Partial match.", L"That quick red rabbit jumped over a tired tiger.\ttrue\ttrue", resultStr ); results = dmp.patch_apply( patches, "I am the very model of a modern major general." ); boolArray = results.second; resultStr = results.first + NUtils::to_wstring( boolArray ); - assertEquals( "patch_apply: Failed match.", "I am the very model of a modern major general.\tfalse\tfalse", resultStr ); + assertEquals( "patch_apply: Failed match.", L"I am the very model of a modern major general.\tfalse\tfalse", resultStr ); patches = dmp.patch_make( "x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy" ); results = dmp.patch_apply( patches, "x123456789012345678901234567890-----++++++++++-----123456789012345678901234567890y" ); boolArray = results.second; resultStr = results.first + NUtils::to_wstring( boolArray ); - assertEquals( "patch_apply: Big delete, small change.", "xabcy\ttrue\ttrue", resultStr ); + assertEquals( "patch_apply: Big delete, small change.", L"xabcy\ttrue\ttrue", resultStr ); patches = dmp.patch_make( "x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy" ); results = dmp.patch_apply( patches, "x12345678901234567890---------------++++++++++---------------12345678901234567890y" ); boolArray = results.second; resultStr = results.first + NUtils::to_wstring( boolArray ); - assertEquals( "patch_apply: Big delete, large change 1.", "xabc12345678901234567890---------------++++++++++---------------12345678901234567890y\tfalse\ttrue", resultStr ); + assertEquals( "patch_apply: Big delete, large change 1.", L"xabc12345678901234567890---------------++++++++++---------------12345678901234567890y\tfalse\ttrue", resultStr ); dmp.Patch_DeleteThreshold = 0.6f; patches = dmp.patch_make( "x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy" ); results = dmp.patch_apply( patches, "x12345678901234567890---------------++++++++++---------------12345678901234567890y" ); boolArray = results.second; resultStr = results.first + NUtils::to_wstring( boolArray ); - assertEquals( "patch_apply: Big delete, large change 2.", "xabcy\ttrue\ttrue", resultStr ); + assertEquals( "patch_apply: Big delete, large change 2.", L"xabcy\ttrue\ttrue", resultStr ); dmp.Patch_DeleteThreshold = 0.5f; dmp.Match_Threshold = 0.0f; @@ -947,7 +915,7 @@ void diff_match_patch_test::testPatchApply() results = dmp.patch_apply( patches, "ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567890" ); boolArray = results.second; resultStr = results.first + NUtils::to_wstring( boolArray ); - assertEquals( "patch_apply: Compensate for failed patch.", "ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567YYYYYYYYYY890\tfalse\ttrue", resultStr ); + assertEquals( "patch_apply: Compensate for failed patch.", L"ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567YYYYYYYYYY890\tfalse\ttrue", resultStr ); dmp.Match_Threshold = 0.5f; dmp.Match_Distance = 1000; @@ -965,31 +933,17 @@ void diff_match_patch_test::testPatchApply() results = dmp.patch_apply( patches, "" ); boolArray = results.second; resultStr = results.first + L"\t" + NUtils::to_wstring( boolArray[ 0 ], false ); - assertEquals( "patch_apply: Edge exact match.", "test\ttrue", resultStr ); + assertEquals( "patch_apply: Edge exact match.", L"test\ttrue", resultStr ); patches = dmp.patch_make( "XY", "XtestY" ); results = dmp.patch_apply( patches, "XY" ); boolArray = results.second; resultStr = results.first + L"\t" + NUtils::to_wstring( boolArray[ 0 ], false ); - assertEquals( "patch_apply: Near edge exact match.", "XtestY\ttrue", resultStr ); + assertEquals( "patch_apply: Near edge exact match.", L"XtestY\ttrue", resultStr ); patches = dmp.patch_make( "y", "y123" ); results = dmp.patch_apply( patches, "x" ); boolArray = results.second; resultStr = results.first + L"\t" + NUtils::to_wstring( boolArray[ 0 ] ); - assertEquals( "patch_apply: Edge partial match.", "x123\ttrue", resultStr ); + assertEquals( "patch_apply: Edge partial match.", L"x123\ttrue", resultStr ); } - -/* -Compile instructions for cmake on Windows: -mkdir build -cd build -cmake .. -make -diff_match_patch_test.exe - -Compile insructions for OS X: -qmake -spec macx-g++ -make -./diff_match_patch -*/ diff --git a/cpp17/diff_match_patch_test.h b/cpp17/diff_match_patch_test.h index e7285fb8..35110051 100644 --- a/cpp17/diff_match_patch_test.h +++ b/cpp17/diff_match_patch_test.h @@ -19,9 +19,21 @@ #ifndef DIFF_MATCH_PATCH_TEST_H #define DIFF_MATCH_PATCH_TEST_H -#include +#ifdef USE_GTEST + #include "gtest/gtest.h" + #define assertEquals( msg, GOLDEN, COMPUTED ) EXPECT_EQ( GOLDEN, COMPUTED ) << msg + #define assertEmpty( msg, COMPUTED ) EXPECT_TRUE( COMPUTED.empty() ) << msg + #define assertTrue( msg, COMPUTED ) EXPECT_TRUE( COMPUTED ) << msg + #define assertFalse( msg, COMPUTED ) EXPECT_FALSE( COMPUTED ) << msg + #define PUBLIC_TESTING : public testing::Test + #define assertThrow( msg, STATEMENT, EXCEPTION_TYPE ) EXPECT_THROW( STATEMENT, EXCEPTION_TYPE ) << msg +#else + #include + #define PUBLIC_TESTING + #define TEST_F( className, funcName ) void diff_match_patch_test::funcName() +#endif -class diff_match_patch_test +class diff_match_patch_test PUBLIC_TESTING { public: using TStringVector = diff_match_patch::TStringVector; @@ -30,8 +42,10 @@ class diff_match_patch_test using TVariantVector = diff_match_patch::TVariantVector; diff_match_patch_test(); - void run_all_tests(); - void runTest( std::function< void() > test ); + +#ifndef USE_GTEST +public: + int run_all_tests(); // DIFF TEST FUNCTIONS void testDiffCommonPrefix(); @@ -68,9 +82,9 @@ class diff_match_patch_test void testPatchApply(); private: + bool runTest( std::function< void() > test ); std::size_t numPassedTests{ 0 }; std::size_t numFailedTests{ 0 }; - diff_match_patch dmp; // Define equality. template< typename T > @@ -86,7 +100,8 @@ class diff_match_patch_test failed = t1 != t2; } } - else + + if ( failed ) { // Build human readable description of both lists. auto lhsString = NUtils::to_wstring( lhs, true ); @@ -101,8 +116,6 @@ class diff_match_patch_test void assertEquals( const std::string &strCase, std::size_t n1, std::size_t n2 ); void assertEquals( const std::string &strCase, const std::wstring &s1, const std::wstring &s2 ); void assertEquals( const std::string &strCase, const std::string &s1, const std::string &s2 ); - void assertEquals( const std::string &strCase, const std::wstring &s1, const std::string &s2 ); - void assertEquals( const std::string &strCase, const std::string &s1, const std::wstring &s2 ); void assertEquals( const std::string &strCase, const Diff &d1, const Diff &d2 ); void assertEquals( const std::string &strCase, const TVariant &var1, const TVariant &var2 ); void assertEquals( const std::string &strCase, const TCharPosMap &m1, const TCharPosMap &m2 ); @@ -114,6 +127,39 @@ class diff_match_patch_test void reportFailure( const std::string &strCase, const std::wstring &expected, const std::wstring &actual ); void reportPassed( const std::string &strCase ); + #define assertThrow( msg, COMMAND, EXCEPTION_TYPE ) \ + { \ + bool exceptionTriggered = false; \ + try \ + { \ + COMMAND;\ + assertFalse( msg, true ); \ + } \ + catch ( const EXCEPTION_TYPE &ex ) \ + { \ + exceptionTriggered = true; \ + } \ + assertTrue( std::string( msg ) + std::string( " - Exception triggered" ), exceptionTriggered ); \ + } + +#endif +public: + bool equals( const TVariant &var1, const TVariant &var2 ); + + template< typename T > + bool equals( const T &lhs, const T &rhs ) + { + bool equal = ( lhs.size() == rhs.size() ); + for ( auto ii = 0ULL; equal && ( ii < lhs.size() ); ++ii ) + { + auto &&t1 = lhs[ ii ]; + auto &&t2 = rhs[ ii ]; + equal = t1 == t2; + } + return equal; + } + diff_match_patch dmp; + // Construct the two texts which made up the diff originally. TStringVector diff_rebuildtexts( const TDiffVector &diffs ); }; diff --git a/cpp17/diff_match_patch_test_assertEquals.cpp b/cpp17/diff_match_patch_test_assertEquals.cpp index 34cd7fc1..3665956f 100644 --- a/cpp17/diff_match_patch_test_assertEquals.cpp +++ b/cpp17/diff_match_patch_test_assertEquals.cpp @@ -21,7 +21,7 @@ #include "diff_match_patch_test.h" #include - +#ifndef USE_GTEST void diff_match_patch_test::reportFailure( const std::string &strCase, const std::wstring &expected, const std::wstring &actual ) { std::cout << "FAILED : " + strCase + "\n"; @@ -58,16 +58,6 @@ void diff_match_patch_test::assertEquals( const std::string &strCase, const std: return assertEquals( strCase, NUtils::to_wstring( s1 ), NUtils::to_wstring( s2 ) ); } -void diff_match_patch_test::assertEquals( const std::string &strCase, const std::string &s1, const std::wstring &s2 ) -{ - return assertEquals( strCase, NUtils::to_wstring( s1 ), s2 ); -} - -void diff_match_patch_test::assertEquals( const std::string &strCase, const std::wstring &s1, const std::string &s2 ) -{ - return assertEquals( strCase, s1, NUtils::to_wstring( s2 ) ); -} - void diff_match_patch_test::assertEquals( const std::string &strCase, const Diff &d1, const Diff &d2 ) { if ( d1 != d2 ) @@ -136,6 +126,15 @@ void diff_match_patch_test::assertFalse( const std::string &strCase, bool value reportPassed( strCase ); } +void diff_match_patch_test::assertEmpty( const std::string &strCase, const TStringVector &list ) +{ + if ( !list.empty() ) + { + throw strCase; + } +} +#endif + // Construct the two texts which made up the diff originally. diff_match_patch_test::TStringVector diff_match_patch_test::diff_rebuildtexts( const TDiffVector &diffs ) { @@ -153,11 +152,3 @@ diff_match_patch_test::TStringVector diff_match_patch_test::diff_rebuildtexts( c } return text; } - -void diff_match_patch_test::assertEmpty( const std::string &strCase, const TStringVector &list ) -{ - if ( !list.empty() ) - { - throw strCase; - } -} From a6196a5d9ee3cfdab3aaa860bede5eff2f0b042a Mon Sep 17 00:00:00 2001 From: Scott Aron Bloom Date: Tue, 30 Jan 2024 12:54:23 -0800 Subject: [PATCH 05/15] The handmade comparsion of vectors was incorrect, and in reality only compared the size of the vectors Using googletest exposed this, showing bugs in the following: converting from percent encoding (not return unicode vs utf8) diff_linesToCharsMunge didnt handle empty strings correctly diff_linesToCharsMunge didnt use wchar_t when concatenating --- cpp17/diff_match_patch.cpp | 8 ++++---- cpp17/diff_match_patch_utils.cpp | 14 ++++++++++---- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/cpp17/diff_match_patch.cpp b/cpp17/diff_match_patch.cpp index 1dc092fa..92d9ab13 100644 --- a/cpp17/diff_match_patch.cpp +++ b/cpp17/diff_match_patch.cpp @@ -661,7 +661,7 @@ std::wstring diff_match_patch::diff_linesToCharsMunge( const std::wstring &text, // text.split('\n') would would temporarily double our memory footprint. // Modifying text would create many large strings to garbage collect. bool firstTime = true; - while ( ( firstTime && ( lineEnd == -1 ) ) || lineEnd < ( text.length() - 1 ) ) + while ( ( firstTime && ( lineEnd == -1 ) && !text.empty() ) || lineEnd < ( text.length() - 1 ) ) { firstTime = false; lineEnd = text.find( '\n', lineStart ); @@ -674,13 +674,13 @@ std::wstring diff_match_patch::diff_linesToCharsMunge( const std::wstring &text, auto pos = lineHash.find( line ); if ( pos != lineHash.end() ) { - chars += static_cast< char >( ( *pos ).second ); + chars += static_cast< wchar_t >( ( *pos ).second ); } else { lineArray.emplace_back( line ); lineHash[ line ] = lineArray.size() - 1; - chars += static_cast< char >( lineArray.size() - 1 ); + chars += static_cast< wchar_t >( lineArray.size() - 1 ); } lineStart = lineEnd + 1; @@ -934,7 +934,7 @@ void diff_match_patch::diff_cleanupSemantic( TDiffVector &diffs ) if ( !lastEquality.empty() && ( lastEquality.length() <= std::max( length_insertions1, length_deletions1 ) ) && ( lastEquality.length() <= std::max( length_insertions2, length_deletions2 ) ) ) { // Duplicate record. - diffs[ equalities.top() ] = Diff( DELETE, lastEquality ); + diffs.insert( diffs.begin() + equalities.top(), Diff( DELETE, lastEquality ) ); // Change second copy to insert. diffs[ equalities.top() + 1 ].operation = INSERT; // Throw away the equality we just deleted. diff --git a/cpp17/diff_match_patch_utils.cpp b/cpp17/diff_match_patch_utils.cpp index 5243ed08..9470adee 100644 --- a/cpp17/diff_match_patch_utils.cpp +++ b/cpp17/diff_match_patch_utils.cpp @@ -88,7 +88,6 @@ namespace NUtils static_assert( sizeof( wchar_t ) <= 4, "wchar_t is greater that 32 bit" ); - auto sz = sizeof( wchar_t ); std::wstring_convert< std::codecvt_utf8< wchar_t > > utf8_conv; for ( auto &&c : input ) { @@ -121,7 +120,7 @@ namespace NUtils { if ( input.empty() ) return {}; - std::wstring retVal; + std::string retVal; retVal.reserve( input.length() ); for ( auto ii = 0ULL; ii < input.length(); ++ii ) { @@ -132,14 +131,21 @@ namespace NUtils auto b = input[ ++ii ]; a = getValue( a ); b = getValue( b ); - retVal += wchar_t( ( a << 4 ) | b ); + a = a << 4; + auto value = a | b; + retVal += std::string( 1, value ); } + else if ( c == '+' ) + retVal += ' '; else { retVal += c; } } - return retVal; + std::wstring_convert< std::codecvt_utf8< wchar_t > > utf8_conv; + auto asBytes = utf8_conv.from_bytes( retVal ); + + return asBytes; } bool endsWith( const std::wstring &string, const std::wstring &suffix ) From bf86db1407824842ef3943344af3f8d8d01082ee Mon Sep 17 00:00:00 2001 From: Scott Aron Bloom Date: Tue, 30 Jan 2024 14:18:43 -0800 Subject: [PATCH 06/15] Added demo from the github webpage --- cpp17/diff_match_patch_test.cpp | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/cpp17/diff_match_patch_test.cpp b/cpp17/diff_match_patch_test.cpp index eb22914c..1ee02f55 100644 --- a/cpp17/diff_match_patch_test.cpp +++ b/cpp17/diff_match_patch_test.cpp @@ -947,3 +947,22 @@ TEST_F( diff_match_patch_test, testPatchApply ) resultStr = results.first + L"\t" + NUtils::to_wstring( boolArray[ 0 ] ); assertEquals( "patch_apply: Edge partial match.", L"x123\ttrue", resultStr ); } + +TEST_F( diff_match_patch_test, fromGitHubExamples ) +{ + auto lhs = L"I am the very model of a modern Major-General, I've information vegetable, animal, and mineral, I know the kings of England, and I quote the fights historical, From Marathon to Waterloo, in order categorical."; + auto rhs = L"I am the very model of a cartoon individual, My animation's comical, unusual, and whimsical, I'm quite adept at funny gags, comedic theory I have read, From wicked puns and stupid jokes to anvils that drop on your head."; + auto diffs = dmp.diff_main( lhs, rhs ); + dmp.diff_cleanupSemantic( diffs ); + auto html = dmp.diff_prettyHtml( diffs ); + auto delta = dmp.diff_toDelta( diffs ); + auto htmlGolden = LR"(I am the very model of a modern Major-General, I've information vegetable, animal, and mineral, I know the kings of England, and I quote the fights historical, From Marathon to Waterloo, in order categoricalcartoon individual, My animation's comical, unusual, and whimsical, I'm quite adept at funny gags, comedic theory I have read, From wicked puns and stupid jokes to anvils that drop on your head.)"; + assertEquals( "gitHubDemos", htmlGolden, html ); + auto deltaGolden = L"=25\t-182\t+cartoon individual, My animation's comical, unusual, and whimsical, I'm quite adept at funny gags, comedic theory I have read, From wicked puns and stupid jokes to anvils that drop on your head\t=1"; + assertEquals( "gitHubDemos", deltaGolden, delta ); + + auto patches = dmp.patch_make( lhs, rhs ); + auto patch = dmp.patch_toText( patches ); + auto patchGolden = L"@@ -22,187 +22,198 @@\n f a \n-modern Major-General, I've information vegetable, animal, and mineral, I know the kings of England, and I quote the fights historical, From Marathon to Waterloo, in order categorical\n+cartoon individual, My animation's comical, unusual, and whimsical, I'm quite adept at funny gags, comedic theory I have read, From wicked puns and stupid jokes to anvils that drop on your head\n .\n"; + assertEquals( "gitHubDemos", patchGolden, patch ); +} From 6ae9244d26b579d7eb0820478fa48415a9b7eb27 Mon Sep 17 00:00:00 2001 From: Scott Aron Bloom Date: Tue, 30 Jan 2024 14:37:49 -0800 Subject: [PATCH 07/15] Clang-format --style google --- cpp17/diff_match_patch.cpp | 4164 +++++++++--------- cpp17/diff_match_patch.h | 592 +-- cpp17/diff_match_patch_test.cpp | 2050 +++++---- cpp17/diff_match_patch_test.h | 259 +- cpp17/diff_match_patch_test_assertEquals.cpp | 202 +- cpp17/diff_match_patch_utils.cpp | 330 +- cpp17/diff_match_patch_utils.h | 508 ++- 7 files changed, 4132 insertions(+), 3973 deletions(-) diff --git a/cpp17/diff_match_patch.cpp b/cpp17/diff_match_patch.cpp index 92d9ab13..71a55027 100644 --- a/cpp17/diff_match_patch.cpp +++ b/cpp17/diff_match_patch.cpp @@ -17,16 +17,17 @@ */ #include "diff_match_patch.h" -#include "diff_match_patch_utils.h" #include -#include +#include #include -#include #include +#include +#include #include #include -#include + +#include "diff_match_patch_utils.h" ////////////////////////// // @@ -39,56 +40,44 @@ * @param operation One of INSERT, DELETE or EQUAL * @param text The text being applied */ -Diff::Diff( Operation _operation, const std::wstring &_text ) : - operation( _operation ), - text( _text ) -{ - // Construct a diff with the specified operation and text. +Diff::Diff(Operation _operation, const std::wstring &_text) + : operation(_operation), text(_text) { + // Construct a diff with the specified operation and text. } -Diff::Diff() -{ -} +Diff::Diff() {} -Diff::Diff( Operation _operation, const wchar_t *_text ) : - Diff( _operation, ( _text ? std::wstring( _text ) : std::wstring( L"" ) ) ) -{ -} +Diff::Diff(Operation _operation, const wchar_t *_text) + : Diff(_operation, (_text ? std::wstring(_text) : std::wstring(L""))) {} -Diff::Diff( Operation _operation, const std::string &_text ) : - Diff( _operation, NUtils::to_wstring( _text ) ) -{ -} +Diff::Diff(Operation _operation, const std::string &_text) + : Diff(_operation, NUtils::to_wstring(_text)) {} -Diff::Diff( Operation _operation, const char *_text ) : - Diff( _operation, std::string( _text ) ) -{ -} +Diff::Diff(Operation _operation, const char *_text) + : Diff(_operation, std::string(_text)) {} -std::wstring Diff::strOperation( Operation op ) -{ - switch ( op ) - { - case INSERT: - return L"INSERT"; - case DELETE: - return L"DELETE"; - case EQUAL: - return L"EQUAL"; - } - throw "Invalid operation."; +std::wstring Diff::strOperation(Operation op) { + switch (op) { + case INSERT: + return L"INSERT"; + case DELETE: + return L"DELETE"; + case EQUAL: + return L"EQUAL"; + } + throw "Invalid operation."; } /** * Display a human-readable version of this Diff. * @return text version */ -std::wstring Diff::toString() const -{ - std::wstring prettyText = text; - // Replace linebreaks with Pilcrow signs. - std::replace( prettyText.begin(), prettyText.end(), L'\n', L'\u00b6' ); - return std::wstring( L"Diff(" ) + strOperation( operation ) + std::wstring( L",\"" ) + prettyText + std::wstring( L"\")" ); +std::wstring Diff::toString() const { + std::wstring prettyText = text; + // Replace linebreaks with Pilcrow signs. + std::replace(prettyText.begin(), prettyText.end(), L'\n', L'\u00b6'); + return std::wstring(L"Diff(") + strOperation(operation) + + std::wstring(L",\"") + prettyText + std::wstring(L"\")"); } /** @@ -96,15 +85,11 @@ std::wstring Diff::toString() const * @param d Another Diff to compare against * @return true or false */ -bool Diff::operator==( const Diff &d ) const -{ - return ( d.operation == this->operation ) && ( d.text == this->text ); +bool Diff::operator==(const Diff &d) const { + return (d.operation == this->operation) && (d.text == this->text); } -bool Diff::operator!=( const Diff &d ) const -{ - return !( operator==( d ) ); -} +bool Diff::operator!=(const Diff &d) const { return !(operator==(d)); } ///////////////////////////////////////////// // @@ -115,59 +100,44 @@ bool Diff::operator!=( const Diff &d ) const /** * Constructor. Initializes with an empty list of diffs. */ -Patch::Patch() -{ -} - -Patch::Patch( std::wstring &text ) -{ - std::wsmatch matches; - auto patchHeader = std::wregex( LR"(^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$)" ); - if ( !std::regex_match( text, matches, patchHeader ) || ( matches.size() != 5 ) ) - { - throw std::wstring( L"Invalid patch string: " + text ); - } - start1 = NUtils::toInt( matches[ 1 ].str() ); - if ( !matches[ 2 ].length() ) - { - start1--; - length1 = 1; - } - else if ( matches[ 2 ].str() == L"0" ) - { - length1 = 0; - } - else - { - start1--; - length1 = NUtils::toInt( matches[ 2 ].str() ); - } - - start2 = NUtils::toInt( matches[ 3 ].str() ); - if ( !matches[ 4 ].length() ) - { - start2--; - length2 = 1; - } - else if ( matches[ 4 ].str() == L"0" ) - { - length2 = 0; - } - else - { - start2--; - length2 = NUtils::toInt( matches[ 4 ].str() ); - } - text.erase( text.begin() ); -} - -bool Patch::isNull() const -{ - if ( start1 == 0 && start2 == 0 && length1 == 0 && length2 == 0 && diffs.empty() ) - { - return true; - } - return false; +Patch::Patch() {} + +Patch::Patch(std::wstring &text) { + std::wsmatch matches; + auto patchHeader = std::wregex(LR"(^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$)"); + if (!std::regex_match(text, matches, patchHeader) || (matches.size() != 5)) { + throw std::wstring(L"Invalid patch string: " + text); + } + start1 = NUtils::toInt(matches[1].str()); + if (!matches[2].length()) { + start1--; + length1 = 1; + } else if (matches[2].str() == L"0") { + length1 = 0; + } else { + start1--; + length1 = NUtils::toInt(matches[2].str()); + } + + start2 = NUtils::toInt(matches[3].str()); + if (!matches[4].length()) { + start2--; + length2 = 1; + } else if (matches[4].str() == L"0") { + length2 = 0; + } else { + start2--; + length2 = NUtils::toInt(matches[4].str()); + } + text.erase(text.begin()); +} + +bool Patch::isNull() const { + if (start1 == 0 && start2 == 0 && length1 == 0 && length2 == 0 && + diffs.empty()) { + return true; + } + return false; } /** @@ -176,54 +146,48 @@ bool Patch::isNull() const * Indices are printed as 1-based, not 0-based. * @return The GNU diff string */ -std::wstring Patch::toString() const -{ - auto text = getPatchHeader(); - // Escape the body of the patch with %xx notation. - for ( auto &&aDiff : diffs ) - { - switch ( aDiff.operation ) - { - case INSERT: - text += L"+"; - break; - case DELETE: - text += L"-"; - break; - case EQUAL: - text += L" "; - break; - } - text += NUtils::toPercentEncoding( aDiff.text, L" !~*'();/?:@&=+$,#" ) + std::wstring( L"\n" ); - } - - return text; -} - -std::wstring Patch::getPatchHeader() const -{ - auto coords1 = getCoordinateString( start1, length1 ); - auto coords2 = getCoordinateString( start2, length2 ); - auto text = std::wstring( L"@@ -" ) + coords1 + std::wstring( L" +" ) + coords2 + std::wstring( L" @@\n" ); - return text; -} - -std::wstring Patch::getCoordinateString( std::size_t start, std::size_t length ) const -{ - std::wstring retVal; - if ( length == 0 ) - { - retVal = std::to_wstring( start ) + std::wstring( L",0" ); - } - else if ( length == 1 ) - { - retVal = std::to_wstring( start + 1 ); - } - else - { - retVal = std::to_wstring( start + 1 ) + std::wstring( L"," ) + std::to_wstring( length ); - } - return retVal; +std::wstring Patch::toString() const { + auto text = getPatchHeader(); + // Escape the body of the patch with %xx notation. + for (auto &&aDiff : diffs) { + switch (aDiff.operation) { + case INSERT: + text += L"+"; + break; + case DELETE: + text += L"-"; + break; + case EQUAL: + text += L" "; + break; + } + text += NUtils::toPercentEncoding(aDiff.text, L" !~*'();/?:@&=+$,#") + + std::wstring(L"\n"); + } + + return text; +} + +std::wstring Patch::getPatchHeader() const { + auto coords1 = getCoordinateString(start1, length1); + auto coords2 = getCoordinateString(start2, length2); + auto text = std::wstring(L"@@ -") + coords1 + std::wstring(L" +") + coords2 + + std::wstring(L" @@\n"); + return text; +} + +std::wstring Patch::getCoordinateString(std::size_t start, + std::size_t length) const { + std::wstring retVal; + if (length == 0) { + retVal = std::to_wstring(start) + std::wstring(L",0"); + } else if (length == 1) { + retVal = std::to_wstring(start + 1); + } else { + retVal = std::to_wstring(start + 1) + std::wstring(L",") + + std::to_wstring(length); + } + return retVal; } ///////////////////////////////////////////// @@ -233,2139 +197,1969 @@ std::wstring Patch::getCoordinateString( std::size_t start, std::size_t length ) ///////////////////////////////////////////// // all class members initialized in the class -diff_match_patch::diff_match_patch() -{ -} - -TDiffVector diff_match_patch::diff_main( const std::wstring &text1, const std::wstring &text2 ) -{ - return diff_main( text1, text2, true ); -} - -TDiffVector diff_match_patch::diff_main( const std::wstring &text1, const std::wstring &text2, bool checklines ) -{ - // Set a deadline by which time the diff must be complete. - clock_t deadline; - if ( Diff_Timeout <= 0 ) - { - deadline = std::numeric_limits< clock_t >::max(); - } - else - { - deadline = clock() + (clock_t)( Diff_Timeout * CLOCKS_PER_SEC ); - } - return diff_main( text1, text2, checklines, deadline ); -} - -TDiffVector diff_match_patch::diff_main( const std::wstring &text1, const std::wstring &text2, bool checklines, clock_t deadline ) -{ - // Check for equality (speedup). - TDiffVector diffs; - if ( text1 == text2 ) - { - if ( !text1.empty() ) - { - diffs.emplace_back( EQUAL, text1 ); - } - return diffs; - } - - if ( !text1.empty() && text2.empty() ) - { - diffs.emplace_back( DELETE, text1 ); - return diffs; - } - - if ( text1.empty() && !text2.empty() ) - { - diffs.emplace_back( INSERT, text2 ); - return diffs; - } - - // Trim off common prefix (speedup). - auto commonlength = diff_commonPrefix( text1, text2 ); - auto commonprefix = text1.substr( 0, commonlength ); - auto textChopped1 = text1.substr( commonlength ); - auto textChopped2 = text2.substr( commonlength ); - - // Trim off common suffix (speedup). - commonlength = diff_commonSuffix( textChopped1, textChopped2 ); - auto commonsuffix = textChopped1.substr( textChopped1.length() - commonlength ); - textChopped1 = textChopped1.substr( 0, textChopped1.length() - commonlength ); - textChopped2 = textChopped2.substr( 0, textChopped2.length() - commonlength ); - - // Compute the diff on the middle block. - diffs = diff_compute( textChopped1, textChopped2, checklines, deadline ); - - // Restore the prefix and suffix. - if ( !commonprefix.empty() ) - { - diffs.emplace( diffs.begin(), EQUAL, commonprefix ); - } - if ( !commonsuffix.empty() ) - { - diffs.emplace_back( EQUAL, commonsuffix ); +diff_match_patch::diff_match_patch() {} + +TDiffVector diff_match_patch::diff_main(const std::wstring &text1, + const std::wstring &text2) { + return diff_main(text1, text2, true); +} + +TDiffVector diff_match_patch::diff_main(const std::wstring &text1, + const std::wstring &text2, + bool checklines) { + // Set a deadline by which time the diff must be complete. + clock_t deadline; + if (Diff_Timeout <= 0) { + deadline = std::numeric_limits::max(); + } else { + deadline = clock() + (clock_t)(Diff_Timeout * CLOCKS_PER_SEC); + } + return diff_main(text1, text2, checklines, deadline); +} + +TDiffVector diff_match_patch::diff_main(const std::wstring &text1, + const std::wstring &text2, + bool checklines, clock_t deadline) { + // Check for equality (speedup). + TDiffVector diffs; + if (text1 == text2) { + if (!text1.empty()) { + diffs.emplace_back(EQUAL, text1); } - - diff_cleanupMerge( diffs ); - return diffs; -} + } -TDiffVector diff_match_patch::diff_main( const std::string &text1, const std::string &text2 ) -{ - return diff_main( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ) ); -} - -TDiffVector diff_match_patch::diff_main( const std::string &text1, const std::string &text2, bool checklines ) -{ - return diff_main( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ), checklines ); -} - -TDiffVector diff_match_patch::diff_main( const std::string &text1, const std::string &text2, bool checklines, clock_t deadline ) -{ - return diff_main( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ), checklines, deadline ); -} - -TDiffVector diff_match_patch::diff_compute( const std::wstring &text1, const std::wstring &text2, bool checklines, clock_t deadline ) -{ - TDiffVector diffs; - - if ( text1.empty() ) - { - // Just add some text (speedup). - diffs.emplace_back( INSERT, text2 ); - return diffs; - } + if (!text1.empty() && text2.empty()) { + diffs.emplace_back(DELETE, text1); + return diffs; + } - if ( text2.empty() ) - { - // Just delete some text (speedup). - diffs.emplace_back( DELETE, text1 ); - return diffs; - } + if (text1.empty() && !text2.empty()) { + diffs.emplace_back(INSERT, text2); + return diffs; + } - { - auto [ longtext, shorttext ] = ( text1.length() > text2.length() ) ? std::make_pair( text1, text2 ) : std::make_pair( text2, text1 ); - auto i = longtext.find( shorttext ); - if ( i != std::string::npos ) - { - // Shorter text is inside the longer text (speedup). - const Operation op = ( text1.length() > text2.length() ) ? DELETE : INSERT; - diffs.emplace_back( op, longtext.substr( 0, i ) ); - diffs.emplace_back( EQUAL, shorttext ); - diffs.emplace_back( op, safeMid( longtext, i + shorttext.length() ) ); - return diffs; - } + // Trim off common prefix (speedup). + auto commonlength = diff_commonPrefix(text1, text2); + auto commonprefix = text1.substr(0, commonlength); + auto textChopped1 = text1.substr(commonlength); + auto textChopped2 = text2.substr(commonlength); - if ( shorttext.length() == 1 ) - { - // Single character string. - // After the previous speedup, the character can't be an equality. - diffs.emplace_back( DELETE, text1 ); - diffs.emplace_back( INSERT, text2 ); - return diffs; - } - // Garbage collect longtext and shorttext by scoping out. - } + // Trim off common suffix (speedup). + commonlength = diff_commonSuffix(textChopped1, textChopped2); + auto commonsuffix = textChopped1.substr(textChopped1.length() - commonlength); + textChopped1 = textChopped1.substr(0, textChopped1.length() - commonlength); + textChopped2 = textChopped2.substr(0, textChopped2.length() - commonlength); - // Check to see if the problem can be split in two. - const TStringVector hm = diff_halfMatch( text1, text2 ); - if ( !hm.empty() ) - { - // A half-match was found, sort out the return data. - auto &&text1_a = hm[ 0 ]; - auto &&text1_b = hm[ 1 ]; - auto &&text2_a = hm[ 2 ]; - auto &&text2_b = hm[ 3 ]; - auto &&mid_common = hm[ 4 ]; - // Send both pairs off for separate processing. - diffs = diff_main( text1_a, text2_a, checklines, deadline ); - const TDiffVector diffs_b = diff_main( text1_b, text2_b, checklines, deadline ); - // Merge the results. - diffs.emplace_back( EQUAL, mid_common ); - diffs.insert( diffs.end(), diffs_b.begin(), diffs_b.end() ); - return diffs; - } + // Compute the diff on the middle block. + diffs = diff_compute(textChopped1, textChopped2, checklines, deadline); - // Perform a real diff. - if ( checklines && ( text1.length() > 100 ) && ( text2.length() > 100 ) ) - { - return diff_lineMode( text1, text2, deadline ); - } + // Restore the prefix and suffix. + if (!commonprefix.empty()) { + diffs.emplace(diffs.begin(), EQUAL, commonprefix); + } + if (!commonsuffix.empty()) { + diffs.emplace_back(EQUAL, commonsuffix); + } - return diff_bisect( text1, text2, deadline ); -} - -TDiffVector diff_match_patch::diff_compute( const std::string &text1, const std::string &text2, bool checklines, clock_t deadline ) -{ - return diff_compute( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ), checklines, deadline ); -} - -TDiffVector diff_match_patch::diff_lineMode( std::wstring text1, std::wstring text2, clock_t deadline ) -{ - // Scan the text on a line-by-line basis first. - auto a = diff_linesToChars( text1, text2 ); - text1 = std::get< std::wstring >( a[ 0 ] ); - text2 = std::get< std::wstring >( a[ 1 ] ); - auto linearray = std::get< TStringVector >( a[ 2 ] ); - - auto diffs = diff_main( text1, text2, false, deadline ); - - // Convert the diff back to original text. - diff_charsToLines( diffs, linearray ); - // Eliminate freak matches (e.g. blank lines) - diff_cleanupSemantic( diffs ); - - // Rediff any replacement blocks, this time character-by-character. - // Add a dummy entry at the end. - diffs.emplace_back( EQUAL, L"" ); - std::size_t pointer = 0; - int count_delete = 0; - int count_insert = 0; - std::wstring text_delete; - std::wstring text_insert; - while ( pointer < diffs.size() ) - { - switch ( diffs[ pointer ].operation ) - { - case INSERT: - count_insert++; - text_insert += diffs[ pointer ].text; - break; - case DELETE: - count_delete++; - text_delete += diffs[ pointer ].text; - break; - case EQUAL: - // Upon reaching an equality, check for prior redundancies. - if ( count_delete >= 1 && count_insert >= 1 ) - { - // Delete the offending records and add the merged ones. - auto numElements = count_delete + count_insert; - auto start = diffs.begin() + pointer - numElements; - auto end = start + numElements; - diffs.erase( start, end ); - pointer = pointer - count_delete - count_insert; - auto subDiff = diff_main( text_delete, text_insert, false, deadline ); - diffs.insert( diffs.begin() + pointer, subDiff.begin(), subDiff.end() ); - pointer = pointer + subDiff.size(); - } - count_insert = 0; - count_delete = 0; - text_delete.clear(); - text_insert.clear(); - break; - } - pointer++; - } - diffs.pop_back(); // Remove the dummy entry at the end. + diff_cleanupMerge(diffs); - return diffs; + return diffs; } -TDiffVector diff_match_patch::diff_lineMode( std::string text1, std::string text2, clock_t deadline ) -{ - return diff_lineMode( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ), deadline ); -} - -// using int64_t rather thant size_t due to the backward walking nature of the algorithm -TDiffVector diff_match_patch::diff_bisect( const std::wstring &text1, const std::wstring &text2, clock_t deadline ) -{ - // Cache the text lengths to prevent multiple calls. - auto text1_length = static_cast< int64_t >( text1.length() ); - auto text2_length = static_cast< int64_t >( text2.length() ); - auto max_d = ( text1_length + text2_length + 1 ) / 2; - auto v_offset = max_d; - auto v_length = 2 * max_d; - auto v1 = std::vector< int64_t >( v_length, -1 ); - auto v2 = std::vector< int64_t >( v_length, -1 ); - v1[ v_offset + 1 ] = 0; - v2[ v_offset + 1 ] = 0; - auto delta = text1_length - text2_length; - // If the total number of characters is odd, then the front path will - // collide with the reverse path. - bool front = ( delta % 2 != 0 ); - // Offsets for start and end of k loop. - // Prevents mapping of space beyond the grid. - int64_t k1start = 0; - int64_t k1end = 0; - int64_t k2start = 0; - int64_t k2end = 0; - for ( int64_t d = 0; d < max_d; d++ ) - { - // Bail out if deadline is reached. - if ( clock() > deadline ) - { - break; - } - - // Walk the front path one step. - for ( auto k1 = -d + k1start; k1 <= d - k1end; k1 += 2 ) - { - auto k1_offset = v_offset + k1; - int64_t x1; - if ( ( k1 == -d ) || ( k1 != d ) && ( v1[ k1_offset - 1 ] < v1[ k1_offset + 1 ] ) ) - { - x1 = v1[ k1_offset + 1 ]; - } - else - { - x1 = v1[ k1_offset - 1 ] + 1; - } - int64_t y1 = x1 - k1; - while ( ( x1 < text1_length ) && ( y1 < text2_length ) && ( text1[ x1 ] == text2[ y1 ] ) ) - { - x1++; - y1++; - } - v1[ k1_offset ] = x1; - if ( x1 > text1_length ) - { - // Ran off the right of the graph. - k1end += 2; - } - else if ( y1 > text2_length ) - { - // Ran off the bottom of the graph. - k1start += 2; - } - else if ( front ) - { - auto k2_offset = v_offset + delta - k1; - if ( ( k2_offset >= 0 ) && ( k2_offset < v_length ) && ( v2[ k2_offset ] != -1 ) ) - { - // Mirror x2 onto top-left coordinate system. - auto x2 = text1_length - v2[ k2_offset ]; - if ( x1 >= x2 ) - { - // Overlap detected. - return diff_bisectSplit( text1, text2, x1, y1, deadline ); - } - } - } - } - - // Walk the reverse path one step. - for ( auto k2 = -d + k2start; k2 <= d - k2end; k2 += 2 ) - { - auto k2_offset = v_offset + k2; - int64_t x2; - if ( ( k2 == -d ) || ( k2 != d ) && ( v2[ k2_offset - 1 ] < v2[ k2_offset + 1 ] ) ) - { - x2 = v2[ k2_offset + 1 ]; - } - else - { - x2 = v2[ k2_offset - 1 ] + 1; - } - auto y2 = x2 - k2; - while ( ( x2 < text1_length ) && ( y2 < text2_length ) && ( text1[ text1_length - x2 - 1 ] == text2[ text2_length - y2 - 1 ] ) ) - { - x2++; - y2++; - } - v2[ k2_offset ] = x2; - if ( x2 > text1_length ) - { - // Ran off the left of the graph. - k2end += 2; - } - else if ( y2 > text2_length ) - { - // Ran off the top of the graph. - k2start += 2; - } - else if ( !front ) - { - auto k1_offset = v_offset + delta - k2; - if ( ( k1_offset >= 0 ) && ( k1_offset < v_length ) && ( v1[ k1_offset ] != -1 ) ) - { - auto x1 = v1[ k1_offset ]; - auto y1 = v_offset + x1 - k1_offset; - // Mirror x2 onto top-left coordinate system. - x2 = text1_length - v2[ k2_offset ]; - if ( x1 >= x2 ) - { - // Overlap detected. - return diff_bisectSplit( text1, text2, x1, y1, deadline ); - } - } - } - } - } - // Diff took too long and hit the deadline or - // number of diffs equals number of characters, no commonality at all. - auto diffs = TDiffVector( { Diff( DELETE, text1 ), Diff( INSERT, text2 ) } ); - return diffs; +TDiffVector diff_match_patch::diff_main(const std::string &text1, + const std::string &text2) { + return diff_main(NUtils::to_wstring(text1), NUtils::to_wstring(text2)); } -TDiffVector diff_match_patch::diff_bisect( const std::string &text1, const std::string &text2, clock_t deadline ) -{ - return diff_bisect( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ), deadline ); -} - -TDiffVector diff_match_patch::diff_bisectSplit( const std::wstring &text1, const std::wstring &text2, std::size_t x, std::size_t y, clock_t deadline ) -{ - auto text1a = text1.substr( 0, x ); - auto text2a = text2.substr( 0, y ); - auto text1b = safeMid( text1, x ); - auto text2b = safeMid( text2, y ); - - // Compute both diffs serially. - TDiffVector diffs = diff_main( text1a, text2a, false, deadline ); - TDiffVector diffsb = diff_main( text1b, text2b, false, deadline ); - - diffs.insert( diffs.end(), diffsb.begin(), diffsb.end() ); - return diffs; +TDiffVector diff_match_patch::diff_main(const std::string &text1, + const std::string &text2, + bool checklines) { + return diff_main(NUtils::to_wstring(text1), NUtils::to_wstring(text2), + checklines); } -TDiffVector diff_match_patch::diff_bisectSplit( const std::string &text1, const std::string &text2, std::size_t x, std::size_t y, clock_t deadline ) -{ - return diff_bisectSplit( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ), x, y, deadline ); +TDiffVector diff_match_patch::diff_main(const std::string &text1, + const std::string &text2, + bool checklines, clock_t deadline) { + return diff_main(NUtils::to_wstring(text1), NUtils::to_wstring(text2), + checklines, deadline); } -diff_match_patch::TVariantVector diff_match_patch::diff_linesToChars( const std::wstring &text1, const std::wstring &text2 ) -{ - TStringVector lineArray; - std::unordered_map< std::wstring, std::size_t > lineHash; - // e.g. linearray[4] == "Hello\n" - // e.g. linehash.get("Hello\n") == 4 - - // "\x00" is a valid character, but various debuggers don't like it. - // So we'll insert a junk entry to avoid generating a nullptr character. - lineArray.emplace_back( L"" ); +TDiffVector diff_match_patch::diff_compute(const std::wstring &text1, + const std::wstring &text2, + bool checklines, clock_t deadline) { + TDiffVector diffs; - const std::wstring chars1 = diff_linesToCharsMunge( text1, lineArray, lineHash ); - const std::wstring chars2 = diff_linesToCharsMunge( text2, lineArray, lineHash ); - - TVariantVector listRet; - listRet.emplace_back( chars1 ); - listRet.emplace_back( chars2 ); - listRet.emplace_back( lineArray ); - return listRet; -} + if (text1.empty()) { + // Just add some text (speedup). + diffs.emplace_back(INSERT, text2); + return diffs; + } -std::vector< diff_match_patch::diff_match_patch::TVariant > diff_match_patch::diff_linesToChars( const std::string &text1, const std::string &text2 ) -{ - return diff_linesToChars( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ) ); -} - -std::wstring diff_match_patch::diff_linesToCharsMunge( const std::wstring &text, TStringVector &lineArray, std::unordered_map< std::wstring, std::size_t > &lineHash ) -{ - std::size_t lineStart = 0; - std::size_t lineEnd = std::string::npos; - std::wstring line; - std::wstring chars; - // Walk the text, pulling out a substring for each line. - // text.split('\n') would would temporarily double our memory footprint. - // Modifying text would create many large strings to garbage collect. - bool firstTime = true; - while ( ( firstTime && ( lineEnd == -1 ) && !text.empty() ) || lineEnd < ( text.length() - 1 ) ) - { - firstTime = false; - lineEnd = text.find( '\n', lineStart ); - if ( lineEnd == -1 ) - { - lineEnd = text.length() - 1; + if (text2.empty()) { + // Just delete some text (speedup). + diffs.emplace_back(DELETE, text1); + return diffs; + } + + { + auto [longtext, shorttext] = (text1.length() > text2.length()) + ? std::make_pair(text1, text2) + : std::make_pair(text2, text1); + auto i = longtext.find(shorttext); + if (i != std::string::npos) { + // Shorter text is inside the longer text (speedup). + const Operation op = (text1.length() > text2.length()) ? DELETE : INSERT; + diffs.emplace_back(op, longtext.substr(0, i)); + diffs.emplace_back(EQUAL, shorttext); + diffs.emplace_back(op, safeMid(longtext, i + shorttext.length())); + return diffs; + } + + if (shorttext.length() == 1) { + // Single character string. + // After the previous speedup, the character can't be an equality. + diffs.emplace_back(DELETE, text1); + diffs.emplace_back(INSERT, text2); + return diffs; + } + // Garbage collect longtext and shorttext by scoping out. + } + + // Check to see if the problem can be split in two. + const TStringVector hm = diff_halfMatch(text1, text2); + if (!hm.empty()) { + // A half-match was found, sort out the return data. + auto &&text1_a = hm[0]; + auto &&text1_b = hm[1]; + auto &&text2_a = hm[2]; + auto &&text2_b = hm[3]; + auto &&mid_common = hm[4]; + // Send both pairs off for separate processing. + diffs = diff_main(text1_a, text2_a, checklines, deadline); + const TDiffVector diffs_b = + diff_main(text1_b, text2_b, checklines, deadline); + // Merge the results. + diffs.emplace_back(EQUAL, mid_common); + diffs.insert(diffs.end(), diffs_b.begin(), diffs_b.end()); + return diffs; + } + + // Perform a real diff. + if (checklines && (text1.length() > 100) && (text2.length() > 100)) { + return diff_lineMode(text1, text2, deadline); + } + + return diff_bisect(text1, text2, deadline); +} + +TDiffVector diff_match_patch::diff_compute(const std::string &text1, + const std::string &text2, + bool checklines, clock_t deadline) { + return diff_compute(NUtils::to_wstring(text1), NUtils::to_wstring(text2), + checklines, deadline); +} + +TDiffVector diff_match_patch::diff_lineMode(std::wstring text1, + std::wstring text2, + clock_t deadline) { + // Scan the text on a line-by-line basis first. + auto a = diff_linesToChars(text1, text2); + text1 = std::get(a[0]); + text2 = std::get(a[1]); + auto linearray = std::get(a[2]); + + auto diffs = diff_main(text1, text2, false, deadline); + + // Convert the diff back to original text. + diff_charsToLines(diffs, linearray); + // Eliminate freak matches (e.g. blank lines) + diff_cleanupSemantic(diffs); + + // Rediff any replacement blocks, this time character-by-character. + // Add a dummy entry at the end. + diffs.emplace_back(EQUAL, L""); + std::size_t pointer = 0; + int count_delete = 0; + int count_insert = 0; + std::wstring text_delete; + std::wstring text_insert; + while (pointer < diffs.size()) { + switch (diffs[pointer].operation) { + case INSERT: + count_insert++; + text_insert += diffs[pointer].text; + break; + case DELETE: + count_delete++; + text_delete += diffs[pointer].text; + break; + case EQUAL: + // Upon reaching an equality, check for prior redundancies. + if (count_delete >= 1 && count_insert >= 1) { + // Delete the offending records and add the merged ones. + auto numElements = count_delete + count_insert; + auto start = diffs.begin() + pointer - numElements; + auto end = start + numElements; + diffs.erase(start, end); + pointer = pointer - count_delete - count_insert; + auto subDiff = diff_main(text_delete, text_insert, false, deadline); + diffs.insert(diffs.begin() + pointer, subDiff.begin(), subDiff.end()); + pointer = pointer + subDiff.size(); } - line = safeMid( text, lineStart, lineEnd + 1 - lineStart ); - - auto pos = lineHash.find( line ); - if ( pos != lineHash.end() ) - { - chars += static_cast< wchar_t >( ( *pos ).second ); + count_insert = 0; + count_delete = 0; + text_delete.clear(); + text_insert.clear(); + break; + } + pointer++; + } + diffs.pop_back(); // Remove the dummy entry at the end. + + return diffs; +} + +TDiffVector diff_match_patch::diff_lineMode(std::string text1, + std::string text2, + clock_t deadline) { + return diff_lineMode(NUtils::to_wstring(text1), NUtils::to_wstring(text2), + deadline); +} + +// using int64_t rather thant size_t due to the backward walking nature of the +// algorithm +TDiffVector diff_match_patch::diff_bisect(const std::wstring &text1, + const std::wstring &text2, + clock_t deadline) { + // Cache the text lengths to prevent multiple calls. + auto text1_length = static_cast(text1.length()); + auto text2_length = static_cast(text2.length()); + auto max_d = (text1_length + text2_length + 1) / 2; + auto v_offset = max_d; + auto v_length = 2 * max_d; + auto v1 = std::vector(v_length, -1); + auto v2 = std::vector(v_length, -1); + v1[v_offset + 1] = 0; + v2[v_offset + 1] = 0; + auto delta = text1_length - text2_length; + // If the total number of characters is odd, then the front path will + // collide with the reverse path. + bool front = (delta % 2 != 0); + // Offsets for start and end of k loop. + // Prevents mapping of space beyond the grid. + int64_t k1start = 0; + int64_t k1end = 0; + int64_t k2start = 0; + int64_t k2end = 0; + for (int64_t d = 0; d < max_d; d++) { + // Bail out if deadline is reached. + if (clock() > deadline) { + break; + } + + // Walk the front path one step. + for (auto k1 = -d + k1start; k1 <= d - k1end; k1 += 2) { + auto k1_offset = v_offset + k1; + int64_t x1; + if ((k1 == -d) || (k1 != d) && (v1[k1_offset - 1] < v1[k1_offset + 1])) { + x1 = v1[k1_offset + 1]; + } else { + x1 = v1[k1_offset - 1] + 1; + } + int64_t y1 = x1 - k1; + while ((x1 < text1_length) && (y1 < text2_length) && + (text1[x1] == text2[y1])) { + x1++; + y1++; + } + v1[k1_offset] = x1; + if (x1 > text1_length) { + // Ran off the right of the graph. + k1end += 2; + } else if (y1 > text2_length) { + // Ran off the bottom of the graph. + k1start += 2; + } else if (front) { + auto k2_offset = v_offset + delta - k1; + if ((k2_offset >= 0) && (k2_offset < v_length) && + (v2[k2_offset] != -1)) { + // Mirror x2 onto top-left coordinate system. + auto x2 = text1_length - v2[k2_offset]; + if (x1 >= x2) { + // Overlap detected. + return diff_bisectSplit(text1, text2, x1, y1, deadline); + } } - else - { - lineArray.emplace_back( line ); - lineHash[ line ] = lineArray.size() - 1; - chars += static_cast< wchar_t >( lineArray.size() - 1 ); - } - - lineStart = lineEnd + 1; - } - return chars; -} - -void diff_match_patch::diff_charsToLines( TDiffVector &diffs, const TStringVector &lineArray ) -{ - // Qt has no mutable Qforeach construct. - for ( auto &&diff : diffs ) - { - std::wstring text; - for ( auto &&y : diff.text ) - { - text += lineArray[ y ]; + } + } + + // Walk the reverse path one step. + for (auto k2 = -d + k2start; k2 <= d - k2end; k2 += 2) { + auto k2_offset = v_offset + k2; + int64_t x2; + if ((k2 == -d) || (k2 != d) && (v2[k2_offset - 1] < v2[k2_offset + 1])) { + x2 = v2[k2_offset + 1]; + } else { + x2 = v2[k2_offset - 1] + 1; + } + auto y2 = x2 - k2; + while ((x2 < text1_length) && (y2 < text2_length) && + (text1[text1_length - x2 - 1] == text2[text2_length - y2 - 1])) { + x2++; + y2++; + } + v2[k2_offset] = x2; + if (x2 > text1_length) { + // Ran off the left of the graph. + k2end += 2; + } else if (y2 > text2_length) { + // Ran off the top of the graph. + k2start += 2; + } else if (!front) { + auto k1_offset = v_offset + delta - k2; + if ((k1_offset >= 0) && (k1_offset < v_length) && + (v1[k1_offset] != -1)) { + auto x1 = v1[k1_offset]; + auto y1 = v_offset + x1 - k1_offset; + // Mirror x2 onto top-left coordinate system. + x2 = text1_length - v2[k2_offset]; + if (x1 >= x2) { + // Overlap detected. + return diff_bisectSplit(text1, text2, x1, y1, deadline); + } } - diff.text = text; + } + } + } + // Diff took too long and hit the deadline or + // number of diffs equals number of characters, no commonality at all. + auto diffs = TDiffVector({Diff(DELETE, text1), Diff(INSERT, text2)}); + return diffs; +} + +TDiffVector diff_match_patch::diff_bisect(const std::string &text1, + const std::string &text2, + clock_t deadline) { + return diff_bisect(NUtils::to_wstring(text1), NUtils::to_wstring(text2), + deadline); +} + +TDiffVector diff_match_patch::diff_bisectSplit(const std::wstring &text1, + const std::wstring &text2, + std::size_t x, std::size_t y, + clock_t deadline) { + auto text1a = text1.substr(0, x); + auto text2a = text2.substr(0, y); + auto text1b = safeMid(text1, x); + auto text2b = safeMid(text2, y); + + // Compute both diffs serially. + TDiffVector diffs = diff_main(text1a, text2a, false, deadline); + TDiffVector diffsb = diff_main(text1b, text2b, false, deadline); + + diffs.insert(diffs.end(), diffsb.begin(), diffsb.end()); + return diffs; +} + +TDiffVector diff_match_patch::diff_bisectSplit(const std::string &text1, + const std::string &text2, + std::size_t x, std::size_t y, + clock_t deadline) { + return diff_bisectSplit(NUtils::to_wstring(text1), NUtils::to_wstring(text2), + x, y, deadline); +} + +diff_match_patch::TVariantVector diff_match_patch::diff_linesToChars( + const std::wstring &text1, const std::wstring &text2) { + TStringVector lineArray; + std::unordered_map lineHash; + // e.g. linearray[4] == "Hello\n" + // e.g. linehash.get("Hello\n") == 4 + + // "\x00" is a valid character, but various debuggers don't like it. + // So we'll insert a junk entry to avoid generating a nullptr character. + lineArray.emplace_back(L""); + + const std::wstring chars1 = + diff_linesToCharsMunge(text1, lineArray, lineHash); + const std::wstring chars2 = + diff_linesToCharsMunge(text2, lineArray, lineHash); + + TVariantVector listRet; + listRet.emplace_back(chars1); + listRet.emplace_back(chars2); + listRet.emplace_back(lineArray); + return listRet; +} + +std::vector +diff_match_patch::diff_linesToChars(const std::string &text1, + const std::string &text2) { + return diff_linesToChars(NUtils::to_wstring(text1), + NUtils::to_wstring(text2)); +} + +std::wstring diff_match_patch::diff_linesToCharsMunge( + const std::wstring &text, TStringVector &lineArray, + std::unordered_map &lineHash) { + std::size_t lineStart = 0; + std::size_t lineEnd = std::string::npos; + std::wstring line; + std::wstring chars; + // Walk the text, pulling out a substring for each line. + // text.split('\n') would would temporarily double our memory footprint. + // Modifying text would create many large strings to garbage collect. + bool firstTime = true; + while ((firstTime && (lineEnd == -1) && !text.empty()) || + lineEnd < (text.length() - 1)) { + firstTime = false; + lineEnd = text.find('\n', lineStart); + if (lineEnd == -1) { + lineEnd = text.length() - 1; + } + line = safeMid(text, lineStart, lineEnd + 1 - lineStart); + + auto pos = lineHash.find(line); + if (pos != lineHash.end()) { + chars += static_cast((*pos).second); + } else { + lineArray.emplace_back(line); + lineHash[line] = lineArray.size() - 1; + chars += static_cast(lineArray.size() - 1); + } + + lineStart = lineEnd + 1; + } + return chars; +} + +void diff_match_patch::diff_charsToLines(TDiffVector &diffs, + const TStringVector &lineArray) { + // Qt has no mutable Qforeach construct. + for (auto &&diff : diffs) { + std::wstring text; + for (auto &&y : diff.text) { + text += lineArray[y]; } + diff.text = text; + } } -std::size_t diff_match_patch::diff_commonPrefix( const std::wstring &text1, const std::wstring &text2 ) -{ - // Performance analysis: http://neil.fraser.name/news/2007/10/09/ - const auto n = std::min( text1.length(), text2.length() ); - for ( std::size_t i = 0; i < n; i++ ) - { - if ( text1[ i ] != text2[ i ] ) - { - return i; - } +std::size_t diff_match_patch::diff_commonPrefix(const std::wstring &text1, + const std::wstring &text2) { + // Performance analysis: http://neil.fraser.name/news/2007/10/09/ + const auto n = std::min(text1.length(), text2.length()); + for (std::size_t i = 0; i < n; i++) { + if (text1[i] != text2[i]) { + return i; } - return n; + } + return n; } -std::size_t diff_match_patch::diff_commonPrefix( const std::string &text1, const std::string &text2 ) -{ - return diff_commonPrefix( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ) ); +std::size_t diff_match_patch::diff_commonPrefix(const std::string &text1, + const std::string &text2) { + return diff_commonPrefix(NUtils::to_wstring(text1), + NUtils::to_wstring(text2)); } -std::size_t diff_match_patch::diff_commonSuffix( const std::wstring &text1, const std::wstring &text2 ) -{ - // Performance analysis: http://neil.fraser.name/news/2007/10/09/ - const auto text1_length = text1.length(); - const auto text2_length = text2.length(); - const auto n = std::min( text1_length, text2_length ); - for ( std::size_t i = 1; i <= n; i++ ) - { - if ( text1[ text1_length - i ] != text2[ text2_length - i ] ) - { - return i - 1; - } +std::size_t diff_match_patch::diff_commonSuffix(const std::wstring &text1, + const std::wstring &text2) { + // Performance analysis: http://neil.fraser.name/news/2007/10/09/ + const auto text1_length = text1.length(); + const auto text2_length = text2.length(); + const auto n = std::min(text1_length, text2_length); + for (std::size_t i = 1; i <= n; i++) { + if (text1[text1_length - i] != text2[text2_length - i]) { + return i - 1; } - return n; + } + return n; } -std::size_t diff_match_patch::diff_commonSuffix( const std::string &text1, const std::string &text2 ) -{ - return diff_commonSuffix( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ) ); +std::size_t diff_match_patch::diff_commonSuffix(const std::string &text1, + const std::string &text2) { + return diff_commonSuffix(NUtils::to_wstring(text1), + NUtils::to_wstring(text2)); } -std::size_t diff_match_patch::diff_commonOverlap( const std::wstring &text1, const std::wstring &text2 ) -{ - // Cache the text lengths to prevent multiple calls. - const auto text1_length = text1.length(); - const auto text2_length = text2.length(); - // Eliminate the nullptr case. - if ( text1_length == 0 || text2_length == 0 ) - { - return 0; - } - // Truncate the longer string. - std::wstring text1_trunc = text1; - std::wstring text2_trunc = text2; - if ( text1_length > text2_length ) - { - text1_trunc = text1.substr( text1_length - text2_length ); - } - else if ( text1_length < text2_length ) - { - text2_trunc = text2.substr( 0, text1_length ); - } - const auto text_length = std::min( text1_length, text2_length ); - // Quick check for the worst case. - if ( text1_trunc == text2_trunc ) - { - return text_length; - } - - // Start by looking for a single character match - // and increase length until no match is found. - // Performance analysis: http://neil.fraser.name/news/2010/11/04/ - std::size_t best = 0; - std::size_t length = 1; - while ( true ) - { - std::wstring pattern = ( length < text1_trunc.length() ) ? text1_trunc.substr( text_length - length ) : std::wstring(); - if ( pattern.empty() ) - return best; - - auto found = text2_trunc.find( pattern ); - if ( found == std::string::npos ) - { - return best; +std::size_t diff_match_patch::diff_commonOverlap(const std::wstring &text1, + const std::wstring &text2) { + // Cache the text lengths to prevent multiple calls. + const auto text1_length = text1.length(); + const auto text2_length = text2.length(); + // Eliminate the nullptr case. + if (text1_length == 0 || text2_length == 0) { + return 0; + } + // Truncate the longer string. + std::wstring text1_trunc = text1; + std::wstring text2_trunc = text2; + if (text1_length > text2_length) { + text1_trunc = text1.substr(text1_length - text2_length); + } else if (text1_length < text2_length) { + text2_trunc = text2.substr(0, text1_length); + } + const auto text_length = std::min(text1_length, text2_length); + // Quick check for the worst case. + if (text1_trunc == text2_trunc) { + return text_length; + } + + // Start by looking for a single character match + // and increase length until no match is found. + // Performance analysis: http://neil.fraser.name/news/2010/11/04/ + std::size_t best = 0; + std::size_t length = 1; + while (true) { + std::wstring pattern = (length < text1_trunc.length()) + ? text1_trunc.substr(text_length - length) + : std::wstring(); + if (pattern.empty()) return best; + + auto found = text2_trunc.find(pattern); + if (found == std::string::npos) { + return best; + } + length += found; + if (found == 0 || text1_trunc.substr(text_length - length) == + text2_trunc.substr(0, length)) { + best = length; + length++; + } + } +} + +std::size_t diff_match_patch::diff_commonOverlap(const std::string &text1, + const std::string &text2) { + return diff_commonOverlap(NUtils::to_wstring(text1), + NUtils::to_wstring(text2)); +} + +diff_match_patch::TStringVector diff_match_patch::diff_halfMatch( + const std::wstring &text1, const std::wstring &text2) { + if (Diff_Timeout <= 0) { + // Don't risk returning a non-optimal diff if we have unlimited time. + return {}; + } + const std::wstring longtext = text1.length() > text2.length() ? text1 : text2; + const std::wstring shorttext = + text1.length() > text2.length() ? text2 : text1; + if (longtext.length() < 4 || shorttext.length() * 2 < longtext.length()) { + return {}; // Pointless. + } + + // First check if the second quarter is the seed for a half-match. + const TStringVector hm1 = + diff_halfMatchI(longtext, shorttext, (longtext.length() + 3) / 4); + // Check again based on the third quarter. + const TStringVector hm2 = + diff_halfMatchI(longtext, shorttext, (longtext.length() + 1) / 2); + TStringVector hm; + if (hm1.empty() && hm2.empty()) { + return {}; + } else if (hm2.empty()) { + hm = hm1; + } else if (hm1.empty()) { + hm = hm2; + } else { + // Both matched. Select the longest. + hm = hm1[4].length() > hm2[4].length() ? hm1 : hm2; + } + + // A half-match was found, sort out the return data. + if (text1.length() > text2.length()) { + return hm; + } else { + TStringVector listRet({hm[2], hm[3], hm[0], hm[1], hm[4]}); + return listRet; + } +} + +diff_match_patch::TStringVector diff_match_patch::diff_halfMatch( + const std::string &text1, const std::string &text2) { + return diff_halfMatch(NUtils::to_wstring(text1), NUtils::to_wstring(text2)); +} + +diff_match_patch::TStringVector diff_match_patch::diff_halfMatchI( + const std::wstring &longtext, const std::wstring &shorttext, + std::size_t i) { + // Start with a 1/4 length substring at position i as a seed. + const std::wstring seed = safeMid(longtext, i, longtext.length() / 4); + std::size_t j = std::string::npos; + std::wstring best_common; + std::wstring best_longtext_a, best_longtext_b; + std::wstring best_shorttext_a, best_shorttext_b; + while ((j = shorttext.find(seed, j + 1)) != std::string::npos) { + const auto prefixLength = + diff_commonPrefix(safeMid(longtext, i), safeMid(shorttext, j)); + const auto suffixLength = + diff_commonSuffix(longtext.substr(0, i), shorttext.substr(0, j)); + if (best_common.length() < suffixLength + prefixLength) { + best_common = safeMid(shorttext, j - suffixLength, suffixLength) + + safeMid(shorttext, j, prefixLength); + best_longtext_a = longtext.substr(0, i - suffixLength); + best_longtext_b = safeMid(longtext, i + prefixLength); + best_shorttext_a = shorttext.substr(0, j - suffixLength); + best_shorttext_b = safeMid(shorttext, j + prefixLength); + } + } + if (best_common.length() * 2 >= longtext.length()) { + TStringVector listRet({best_longtext_a, best_longtext_b, best_shorttext_a, + best_shorttext_b, best_common}); + return listRet; + } else { + return {}; + } +} + +diff_match_patch::TStringVector diff_match_patch::diff_halfMatchI( + const std::string &longtext, const std::string &shorttext, std::size_t i) { + return diff_halfMatchI(NUtils::to_wstring(longtext), + NUtils::to_wstring(shorttext), i); +} + +void diff_match_patch::diff_cleanupSemantic(TDiffVector &diffs) { + if (diffs.empty()) return; + + bool changes = false; + // Stack of indices where equalities are found. + std::stack equalities; // stack of equalities + // Always equal to equalities[equalitiesLength-1][1] + std::wstring lastEquality; + std::size_t pointer = 0; // Index of current position. + // Number of characters that changed prior to the equality. + std::size_t length_insertions1 = 0; + std::size_t length_deletions1 = 0; + // Number of characters that changed after the equality. + std::size_t length_insertions2 = 0; + std::size_t length_deletions2 = 0; + while (pointer < diffs.size()) { + if (diffs[pointer].operation == EQUAL) { // Equality found. + equalities.push(pointer); + length_insertions1 = length_insertions2; + length_deletions1 = length_deletions2; + length_insertions2 = 0; + length_deletions2 = 0; + lastEquality = diffs[pointer].text; + } else { // an insertion or deletion + if (diffs[pointer].operation == INSERT) { + length_insertions2 += diffs[pointer].text.length(); + } else { + length_deletions2 += diffs[pointer].text.length(); + } + // Eliminate an equality that is smaller or equal to the edits on both + // sides of it. + if (!lastEquality.empty() && + (lastEquality.length() <= + std::max(length_insertions1, length_deletions1)) && + (lastEquality.length() <= + std::max(length_insertions2, length_deletions2))) { + // Duplicate record. + diffs.insert(diffs.begin() + equalities.top(), + Diff(DELETE, lastEquality)); + // Change second copy to insert. + diffs[equalities.top() + 1].operation = INSERT; + // Throw away the equality we just deleted. + equalities.pop(); + if (!equalities.empty()) { + equalities.pop(); } - length += found; - if ( found == 0 || text1_trunc.substr( text_length - length ) == text2_trunc.substr( 0, length ) ) - { - best = length; - length++; + pointer = !equalities.empty() ? equalities.top() : -1; + length_insertions1 = 0; // Reset the counters. + length_deletions1 = 0; + length_insertions2 = 0; + length_deletions2 = 0; + lastEquality.clear(); + changes = true; + } + } + pointer++; + } + + // Normalize the diff. + if (changes) { + diff_cleanupMerge(diffs); + } + diff_cleanupSemanticLossless(diffs); + + // Find any overlaps between deletions and insertions. + // e.g: abcxxxxxxdef + // -> abcxxxdef + // e.g: xxxabcdefxxx + // -> defxxxabc + // Only extract an overlap if it is as big as the edit ahead or behind it. + pointer = 1; + while (pointer < diffs.size()) { + if (diffs[pointer - 1].operation == DELETE && + diffs[pointer].operation == INSERT) { + auto deletion = diffs[pointer - 1].text; + auto insertion = diffs[pointer].text; + std::size_t overlap_length1 = diff_commonOverlap(deletion, insertion); + std::size_t overlap_length2 = diff_commonOverlap(insertion, deletion); + if (overlap_length1 >= overlap_length2) { + if (overlap_length1 >= deletion.length() / 2.0 || + overlap_length1 >= insertion.length() / 2.0) { + // Overlap found. + // Insert an equality and trim the surrounding edits. + diffs.emplace(diffs.begin() + pointer, EQUAL, + insertion.substr(0, overlap_length1)); + diffs[pointer - 1].text = + deletion.substr(0, deletion.length() - overlap_length1); + diffs[pointer + 1].text = insertion.substr(overlap_length1); + pointer++; } - } -} - -std::size_t diff_match_patch::diff_commonOverlap( const std::string &text1, const std::string &text2 ) -{ - return diff_commonOverlap( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ) ); -} - -diff_match_patch::TStringVector diff_match_patch::diff_halfMatch( const std::wstring &text1, const std::wstring &text2 ) -{ - if ( Diff_Timeout <= 0 ) - { - // Don't risk returning a non-optimal diff if we have unlimited time. - return {}; - } - const std::wstring longtext = text1.length() > text2.length() ? text1 : text2; - const std::wstring shorttext = text1.length() > text2.length() ? text2 : text1; - if ( longtext.length() < 4 || shorttext.length() * 2 < longtext.length() ) - { - return {}; // Pointless. - } - - // First check if the second quarter is the seed for a half-match. - const TStringVector hm1 = diff_halfMatchI( longtext, shorttext, ( longtext.length() + 3 ) / 4 ); - // Check again based on the third quarter. - const TStringVector hm2 = diff_halfMatchI( longtext, shorttext, ( longtext.length() + 1 ) / 2 ); - TStringVector hm; - if ( hm1.empty() && hm2.empty() ) - { - return {}; - } - else if ( hm2.empty() ) - { - hm = hm1; - } - else if ( hm1.empty() ) - { - hm = hm2; - } - else - { - // Both matched. Select the longest. - hm = hm1[ 4 ].length() > hm2[ 4 ].length() ? hm1 : hm2; - } - - // A half-match was found, sort out the return data. - if ( text1.length() > text2.length() ) - { - return hm; - } - else - { - TStringVector listRet( { hm[ 2 ], hm[ 3 ], hm[ 0 ], hm[ 1 ], hm[ 4 ] } ); - return listRet; - } -} - -diff_match_patch::TStringVector diff_match_patch::diff_halfMatch( const std::string &text1, const std::string &text2 ) -{ - return diff_halfMatch( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ) ); -} - -diff_match_patch::TStringVector diff_match_patch::diff_halfMatchI( const std::wstring &longtext, const std::wstring &shorttext, std::size_t i ) -{ - // Start with a 1/4 length substring at position i as a seed. - const std::wstring seed = safeMid( longtext, i, longtext.length() / 4 ); - std::size_t j = std::string::npos; - std::wstring best_common; - std::wstring best_longtext_a, best_longtext_b; - std::wstring best_shorttext_a, best_shorttext_b; - while ( ( j = shorttext.find( seed, j + 1 ) ) != std::string::npos ) - { - const auto prefixLength = diff_commonPrefix( safeMid( longtext, i ), safeMid( shorttext, j ) ); - const auto suffixLength = diff_commonSuffix( longtext.substr( 0, i ), shorttext.substr( 0, j ) ); - if ( best_common.length() < suffixLength + prefixLength ) - { - best_common = safeMid( shorttext, j - suffixLength, suffixLength ) + safeMid( shorttext, j, prefixLength ); - best_longtext_a = longtext.substr( 0, i - suffixLength ); - best_longtext_b = safeMid( longtext, i + prefixLength ); - best_shorttext_a = shorttext.substr( 0, j - suffixLength ); - best_shorttext_b = safeMid( shorttext, j + prefixLength ); + } else { + if (overlap_length2 >= deletion.length() / 2.0 || + overlap_length2 >= insertion.length() / 2.0) { + // Reverse overlap found. + // Insert an equality and swap and trim the surrounding edits. + diffs.emplace(diffs.begin() + pointer, EQUAL, + deletion.substr(0, overlap_length2)); + diffs[pointer - 1].operation = INSERT; + diffs[pointer - 1].text = + insertion.substr(0, insertion.length() - overlap_length2); + diffs[pointer + 1].operation = DELETE; + diffs[pointer + 1].text = deletion.substr(overlap_length2); + pointer++; } - } - if ( best_common.length() * 2 >= longtext.length() ) - { - TStringVector listRet( { best_longtext_a, best_longtext_b, best_shorttext_a, best_shorttext_b, best_common } ); - return listRet; - } - else - { - return {}; - } -} - -diff_match_patch::TStringVector diff_match_patch::diff_halfMatchI( const std::string &longtext, const std::string &shorttext, std::size_t i ) -{ - return diff_halfMatchI( NUtils::to_wstring( longtext ), NUtils::to_wstring( shorttext ), i ); -} - -void diff_match_patch::diff_cleanupSemantic( TDiffVector &diffs ) -{ - if ( diffs.empty() ) - return; - - bool changes = false; - // Stack of indices where equalities are found. - std::stack< std::size_t > equalities; // stack of equalities - // Always equal to equalities[equalitiesLength-1][1] - std::wstring lastEquality; - std::size_t pointer = 0; // Index of current position. - // Number of characters that changed prior to the equality. - std::size_t length_insertions1 = 0; - std::size_t length_deletions1 = 0; - // Number of characters that changed after the equality. - std::size_t length_insertions2 = 0; - std::size_t length_deletions2 = 0; - while ( pointer < diffs.size() ) - { - if ( diffs[ pointer ].operation == EQUAL ) - { // Equality found. - equalities.push( pointer ); - length_insertions1 = length_insertions2; - length_deletions1 = length_deletions2; - length_insertions2 = 0; - length_deletions2 = 0; - lastEquality = diffs[ pointer ].text; + } + pointer++; + } + pointer++; + } +} + +void diff_match_patch::diff_cleanupSemanticLossless(TDiffVector &diffs) { + int pointer = 1; + // Intentionally ignore the first and last element (don't need checking). + while ((pointer != -1) && !diffs.empty() && (pointer < (diffs.size() - 1))) { + if (diffs[pointer - 1].operation == EQUAL && + diffs[pointer + 1].operation == EQUAL) { + // This is a single edit surrounded by equalities. + auto equality1 = diffs[pointer - 1].text; + auto edit = diffs[pointer].text; + auto equality2 = diffs[pointer + 1].text; + + // First, shift the edit as far left as possible. + auto commonOffset = diff_commonSuffix(equality1, edit); + if (commonOffset > 0) { + auto commonString = safeMid(edit, edit.length() - commonOffset); + equality1 = equality1.substr(0, equality1.length() - commonOffset); + edit = commonString + edit.substr(0, edit.length() - commonOffset); + equality2 = commonString + equality2; + } + + // Second, step character by character right, + // looking for the best fit. + auto bestEquality1 = equality1; + auto bestEdit = edit; + auto bestEquality2 = equality2; + auto bestScore = diff_cleanupSemanticScore(equality1, edit) + + diff_cleanupSemanticScore(edit, equality2); + while (!edit.empty() && !equality2.empty() && edit[0] == equality2[0]) { + equality1 += edit[0]; + edit = edit.substr(1) + equality2[0]; + equality2 = equality2.substr(1); + auto score = diff_cleanupSemanticScore(equality1, edit) + + diff_cleanupSemanticScore(edit, equality2); + // The >= encourages trailing rather than leading whitespace on + // edits. + if (score >= bestScore) { + bestScore = score; + bestEquality1 = equality1; + bestEdit = edit; + bestEquality2 = equality2; } - else - { // an insertion or deletion - if ( diffs[ pointer ].operation == INSERT ) - { - length_insertions2 += diffs[ pointer ].text.length(); - } - else - { - length_deletions2 += diffs[ pointer ].text.length(); - } - // Eliminate an equality that is smaller or equal to the edits on both - // sides of it. - if ( !lastEquality.empty() && ( lastEquality.length() <= std::max( length_insertions1, length_deletions1 ) ) && ( lastEquality.length() <= std::max( length_insertions2, length_deletions2 ) ) ) - { - // Duplicate record. - diffs.insert( diffs.begin() + equalities.top(), Diff( DELETE, lastEquality ) ); - // Change second copy to insert. - diffs[ equalities.top() + 1 ].operation = INSERT; - // Throw away the equality we just deleted. - equalities.pop(); - if ( !equalities.empty() ) - { - equalities.pop(); - } - pointer = !equalities.empty() ? equalities.top() : -1; - length_insertions1 = 0; // Reset the counters. - length_deletions1 = 0; - length_insertions2 = 0; - length_deletions2 = 0; - lastEquality.clear(); - changes = true; - } + } + + if (diffs[pointer - 1].text != bestEquality1) { + // We have an improvement, save it back to the diff. + if (!bestEquality1.empty()) { + diffs[pointer - 1].text = bestEquality1; + } else { + diffs.erase(diffs.begin() + pointer - 1); + pointer--; } - pointer++; - } - - // Normalize the diff. - if ( changes ) - { - diff_cleanupMerge( diffs ); - } - diff_cleanupSemanticLossless( diffs ); - - // Find any overlaps between deletions and insertions. - // e.g: abcxxxxxxdef - // -> abcxxxdef - // e.g: xxxabcdefxxx - // -> defxxxabc - // Only extract an overlap if it is as big as the edit ahead or behind it. - pointer = 1; - while ( pointer < diffs.size() ) - { - if ( diffs[ pointer - 1 ].operation == DELETE && diffs[ pointer ].operation == INSERT ) - { - auto deletion = diffs[ pointer - 1 ].text; - auto insertion = diffs[ pointer ].text; - std::size_t overlap_length1 = diff_commonOverlap( deletion, insertion ); - std::size_t overlap_length2 = diff_commonOverlap( insertion, deletion ); - if ( overlap_length1 >= overlap_length2 ) - { - if ( overlap_length1 >= deletion.length() / 2.0 || overlap_length1 >= insertion.length() / 2.0 ) - { - // Overlap found. - // Insert an equality and trim the surrounding edits. - diffs.emplace( diffs.begin() + pointer, EQUAL, insertion.substr( 0, overlap_length1 ) ); - diffs[ pointer - 1 ].text = deletion.substr( 0, deletion.length() - overlap_length1 ); - diffs[ pointer + 1 ].text = insertion.substr( overlap_length1 ); - pointer++; - } - } - else - { - if ( overlap_length2 >= deletion.length() / 2.0 || overlap_length2 >= insertion.length() / 2.0 ) - { - // Reverse overlap found. - // Insert an equality and swap and trim the surrounding edits. - diffs.emplace( diffs.begin() + pointer, EQUAL, deletion.substr( 0, overlap_length2 ) ); - diffs[ pointer - 1 ].operation = INSERT; - diffs[ pointer - 1 ].text = insertion.substr( 0, insertion.length() - overlap_length2 ); - diffs[ pointer + 1 ].operation = DELETE; - diffs[ pointer + 1 ].text = deletion.substr( overlap_length2 ); - pointer++; - } - } - pointer++; - } - pointer++; - } -} - -void diff_match_patch::diff_cleanupSemanticLossless( TDiffVector &diffs ) -{ - int pointer = 1; - // Intentionally ignore the first and last element (don't need checking). - while ( ( pointer != -1 ) && !diffs.empty() && ( pointer < ( diffs.size() - 1 ) ) ) - { - if ( diffs[ pointer - 1 ].operation == EQUAL && diffs[ pointer + 1 ].operation == EQUAL ) - { - // This is a single edit surrounded by equalities. - auto equality1 = diffs[ pointer - 1 ].text; - auto edit = diffs[ pointer ].text; - auto equality2 = diffs[ pointer + 1 ].text; - - // First, shift the edit as far left as possible. - auto commonOffset = diff_commonSuffix( equality1, edit ); - if ( commonOffset > 0 ) - { - auto commonString = safeMid( edit, edit.length() - commonOffset ); - equality1 = equality1.substr( 0, equality1.length() - commonOffset ); - edit = commonString + edit.substr( 0, edit.length() - commonOffset ); - equality2 = commonString + equality2; - } - - // Second, step character by character right, - // looking for the best fit. - auto bestEquality1 = equality1; - auto bestEdit = edit; - auto bestEquality2 = equality2; - auto bestScore = diff_cleanupSemanticScore( equality1, edit ) + diff_cleanupSemanticScore( edit, equality2 ); - while ( !edit.empty() && !equality2.empty() && edit[ 0 ] == equality2[ 0 ] ) - { - equality1 += edit[ 0 ]; - edit = edit.substr( 1 ) + equality2[ 0 ]; - equality2 = equality2.substr( 1 ); - auto score = diff_cleanupSemanticScore( equality1, edit ) + diff_cleanupSemanticScore( edit, equality2 ); - // The >= encourages trailing rather than leading whitespace on - // edits. - if ( score >= bestScore ) - { - bestScore = score; - bestEquality1 = equality1; - bestEdit = edit; - bestEquality2 = equality2; - } - } - - if ( diffs[ pointer - 1 ].text != bestEquality1 ) - { - // We have an improvement, save it back to the diff. - if ( !bestEquality1.empty() ) - { - diffs[ pointer - 1 ].text = bestEquality1; - } - else - { - diffs.erase( diffs.begin() + pointer - 1 ); - pointer--; - } - diffs[ pointer ].text = bestEdit; - if ( !bestEquality2.empty() ) - { - diffs[ pointer + 1 ].text = bestEquality2; - } - else - { - diffs.erase( diffs.begin() + pointer + 1 ); - pointer--; - } - } + diffs[pointer].text = bestEdit; + if (!bestEquality2.empty()) { + diffs[pointer + 1].text = bestEquality2; + } else { + diffs.erase(diffs.begin() + pointer + 1); + pointer--; } - pointer++; - } -} - -int64_t diff_match_patch::diff_cleanupSemanticScore( const std::wstring &one, const std::wstring &two ) -{ - if ( one.empty() || two.empty() ) - { - // Edges are the best. - return 6; - } - - // Each port of this function behaves slightly differently due to - // subtle differences in each language's definition of things like - // 'whitespace'. Since this function's purpose is largely cosmetic, - // the choice has been made to use each language's native features - // rather than force total conformity. - auto char1 = one[ one.length() - 1 ]; - auto char2 = two[ 0 ]; - bool nonAlphaNumeric1 = !std::iswalnum( char1 ); - bool nonAlphaNumeric2 = !std::iswalnum( char2 ); - bool whitespace1 = nonAlphaNumeric1 && std::iswspace( char1 ); - bool whitespace2 = nonAlphaNumeric2 && std::iswspace( char2 ); - bool lineBreak1 = whitespace1 && std::iswcntrl( char1 ); - bool lineBreak2 = whitespace2 && std::iswcntrl( char2 ); - bool blankLine1 = lineBreak1 && std::regex_search( one, BLANKLINEEND ); - bool blankLine2 = lineBreak2 && std::regex_search( two, BLANKLINESTART ); - - if ( blankLine1 || blankLine2 ) - { - // Five points for blank lines. - return 5; - } - else if ( lineBreak1 || lineBreak2 ) - { - // Four points for line breaks. - return 4; - } - else if ( nonAlphaNumeric1 && !whitespace1 && whitespace2 ) - { - // Three points for end of sentences. - return 3; - } - else if ( whitespace1 || whitespace2 ) - { - // Two points for whitespace. - return 2; - } - else if ( nonAlphaNumeric1 || nonAlphaNumeric2 ) - { - // One point for non-alphanumeric. - return 1; - } - return 0; -} - -int64_t diff_match_patch::diff_cleanupSemanticScore( const std::string &one, const std::string &two ) -{ - return diff_cleanupSemanticScore( NUtils::to_wstring( one ), NUtils::to_wstring( two ) ); + } + } + pointer++; + } +} + +int64_t diff_match_patch::diff_cleanupSemanticScore(const std::wstring &one, + const std::wstring &two) { + if (one.empty() || two.empty()) { + // Edges are the best. + return 6; + } + + // Each port of this function behaves slightly differently due to + // subtle differences in each language's definition of things like + // 'whitespace'. Since this function's purpose is largely cosmetic, + // the choice has been made to use each language's native features + // rather than force total conformity. + auto char1 = one[one.length() - 1]; + auto char2 = two[0]; + bool nonAlphaNumeric1 = !std::iswalnum(char1); + bool nonAlphaNumeric2 = !std::iswalnum(char2); + bool whitespace1 = nonAlphaNumeric1 && std::iswspace(char1); + bool whitespace2 = nonAlphaNumeric2 && std::iswspace(char2); + bool lineBreak1 = whitespace1 && std::iswcntrl(char1); + bool lineBreak2 = whitespace2 && std::iswcntrl(char2); + bool blankLine1 = lineBreak1 && std::regex_search(one, BLANKLINEEND); + bool blankLine2 = lineBreak2 && std::regex_search(two, BLANKLINESTART); + + if (blankLine1 || blankLine2) { + // Five points for blank lines. + return 5; + } else if (lineBreak1 || lineBreak2) { + // Four points for line breaks. + return 4; + } else if (nonAlphaNumeric1 && !whitespace1 && whitespace2) { + // Three points for end of sentences. + return 3; + } else if (whitespace1 || whitespace2) { + // Two points for whitespace. + return 2; + } else if (nonAlphaNumeric1 || nonAlphaNumeric2) { + // One point for non-alphanumeric. + return 1; + } + return 0; +} + +int64_t diff_match_patch::diff_cleanupSemanticScore(const std::string &one, + const std::string &two) { + return diff_cleanupSemanticScore(NUtils::to_wstring(one), + NUtils::to_wstring(two)); } // Define some regex patterns for matching boundaries. -std::wregex diff_match_patch::BLANKLINEEND = std::wregex( LR"(\n\r?\n$)" ); -std::wregex diff_match_patch::BLANKLINESTART = std::wregex( LR"(^\r?\n\r?\n)" ); - -void diff_match_patch::diff_cleanupEfficiency( TDiffVector &diffs ) -{ - bool changes = false; - // Stack of indices where equalities are found. - std::stack< std::size_t > equalities; - // Always equal to equalities[equalitiesLength-1][1] - std::wstring lastEquality; - std::size_t pointer = 0; // Index of current position. - // Is there an insertion operation before the last equality. - bool pre_ins = false; - // Is there a deletion operation before the last equality. - bool pre_del = false; - // Is there an insertion operation after the last equality. - bool post_ins = false; - // Is there a deletion operation after the last equality. - bool post_del = false; - while ( pointer < diffs.size() ) - { - if ( diffs[ pointer ].operation == EQUAL ) - { // Equality found. - if ( diffs[ pointer ].text.length() < Diff_EditCost && ( post_ins || post_del ) ) - { - // Candidate found. - equalities.push( pointer ); - pre_ins = post_ins; - pre_del = post_del; - lastEquality = diffs[ pointer ].text; - } - else - { - // Not a candidate, and can never become one. - equalities = {}; - lastEquality.clear(); - } - post_ins = post_del = false; - } - else - { // An insertion or deletion. - if ( diffs[ pointer ].operation == DELETE ) - { - post_del = true; - } - else - { - post_ins = true; - } - /* - * Five types to be split: - * ABXYCD - * AXCD - * ABXC - * AXCD - * ABXC - */ - if ( ( lastEquality.length() != 0 ) && ( ( pre_ins && pre_del && post_ins && post_del ) || ( ( lastEquality.length() < Diff_EditCost / 2 ) && ( ( pre_ins ? 1 : 0 ) + ( pre_del ? 1 : 0 ) + ( post_ins ? 1 : 0 ) + ( post_del ? 1 : 0 ) ) == 3 ) ) ) - { - // Duplicate record. - diffs.emplace( diffs.begin() + equalities.top(), DELETE, lastEquality ); - // Change second copy to insert. - diffs[ equalities.top() + 1 ].operation = INSERT; - equalities.pop(); // Throw away the equality we just deleted. - lastEquality.clear(); - if ( pre_ins && pre_del ) - { - // No changes made which could affect previous entry, keep going. - post_ins = post_del = true; - equalities = {}; - } - else - { - if ( !equalities.empty() ) - { - equalities.pop(); - } - - pointer = !equalities.empty() ? equalities.top() : -1; - post_ins = post_del = false; - } - changes = true; - } +std::wregex diff_match_patch::BLANKLINEEND = std::wregex(LR"(\n\r?\n$)"); +std::wregex diff_match_patch::BLANKLINESTART = std::wregex(LR"(^\r?\n\r?\n)"); + +void diff_match_patch::diff_cleanupEfficiency(TDiffVector &diffs) { + bool changes = false; + // Stack of indices where equalities are found. + std::stack equalities; + // Always equal to equalities[equalitiesLength-1][1] + std::wstring lastEquality; + std::size_t pointer = 0; // Index of current position. + // Is there an insertion operation before the last equality. + bool pre_ins = false; + // Is there a deletion operation before the last equality. + bool pre_del = false; + // Is there an insertion operation after the last equality. + bool post_ins = false; + // Is there a deletion operation after the last equality. + bool post_del = false; + while (pointer < diffs.size()) { + if (diffs[pointer].operation == EQUAL) { // Equality found. + if (diffs[pointer].text.length() < Diff_EditCost && + (post_ins || post_del)) { + // Candidate found. + equalities.push(pointer); + pre_ins = post_ins; + pre_del = post_del; + lastEquality = diffs[pointer].text; + } else { + // Not a candidate, and can never become one. + equalities = {}; + lastEquality.clear(); + } + post_ins = post_del = false; + } else { // An insertion or deletion. + if (diffs[pointer].operation == DELETE) { + post_del = true; + } else { + post_ins = true; + } + /* + * Five types to be split: + * ABXYCD + * AXCD + * ABXC + * AXCD + * ABXC + */ + if ((lastEquality.length() != 0) && + ((pre_ins && pre_del && post_ins && post_del) || + ((lastEquality.length() < Diff_EditCost / 2) && + ((pre_ins ? 1 : 0) + (pre_del ? 1 : 0) + (post_ins ? 1 : 0) + + (post_del ? 1 : 0)) == 3))) { + // Duplicate record. + diffs.emplace(diffs.begin() + equalities.top(), DELETE, lastEquality); + // Change second copy to insert. + diffs[equalities.top() + 1].operation = INSERT; + equalities.pop(); // Throw away the equality we just deleted. + lastEquality.clear(); + if (pre_ins && pre_del) { + // No changes made which could affect previous entry, keep going. + post_ins = post_del = true; + equalities = {}; + } else { + if (!equalities.empty()) { + equalities.pop(); + } + + pointer = !equalities.empty() ? equalities.top() : -1; + post_ins = post_del = false; } + changes = true; + } + } + pointer++; + } + + if (changes) { + diff_cleanupMerge(diffs); + } +} + +void diff_match_patch::diff_cleanupMerge(TDiffVector &diffs) { + diffs.emplace_back(EQUAL, L""); + int pointer = 0; + int count_delete = 0; + int count_insert = 0; + std::wstring text_delete; + std::wstring text_insert; + + while (pointer < diffs.size()) { + switch (diffs[pointer].operation) { + case INSERT: + count_insert++; + text_insert += diffs[pointer].text; pointer++; - } - - if ( changes ) - { - diff_cleanupMerge( diffs ); - } -} - -void diff_match_patch::diff_cleanupMerge( TDiffVector &diffs ) -{ - diffs.emplace_back( EQUAL, L"" ); - int pointer = 0; - int count_delete = 0; - int count_insert = 0; - std::wstring text_delete; - std::wstring text_insert; - - while ( pointer < diffs.size() ) - { - switch ( diffs[ pointer ].operation ) - { - case INSERT: - count_insert++; - text_insert += diffs[ pointer ].text; - pointer++; - break; - case DELETE: - count_delete++; - text_delete += diffs[ pointer ].text; + break; + case DELETE: + count_delete++; + text_delete += diffs[pointer].text; + pointer++; + break; + case EQUAL: + // Upon reaching an equality, check for prior redundancies. + if (count_delete + count_insert > 1) { + if (count_delete != 0 && count_insert != 0) { + // Factor out any common prefixies. + auto commonlength = diff_commonPrefix(text_insert, text_delete); + if (commonlength != 0) { + if ((pointer > (count_delete + count_insert)) && + diffs[pointer - (count_delete + count_insert) - 1] + .operation == EQUAL) { + diffs[pointer - count_delete - count_insert - 1].text += + text_insert.substr(0, commonlength); + } else { + diffs.emplace(diffs.begin(), EQUAL, + text_insert.substr(0, commonlength)); pointer++; - break; - case EQUAL: - // Upon reaching an equality, check for prior redundancies. - if ( count_delete + count_insert > 1 ) - { - if ( count_delete != 0 && count_insert != 0 ) - { - // Factor out any common prefixies. - auto commonlength = diff_commonPrefix( text_insert, text_delete ); - if ( commonlength != 0 ) - { - if ( ( pointer > ( count_delete + count_insert ) ) && diffs[ pointer - ( count_delete + count_insert ) - 1 ].operation == EQUAL ) - { - diffs[ pointer - count_delete - count_insert - 1 ].text += text_insert.substr( 0, commonlength ); - } - else - { - diffs.emplace( diffs.begin(), EQUAL, text_insert.substr( 0, commonlength ) ); - pointer++; - } - text_insert = text_insert.substr( commonlength ); - text_delete = text_delete.substr( commonlength ); - } - // Factor out any common suffixies. - commonlength = diff_commonSuffix( text_insert, text_delete ); - if ( commonlength != 0 ) - { - diffs[ pointer ].text = safeMid( text_insert, text_insert.length() - commonlength ) + diffs[ pointer ].text; - text_insert = text_insert.substr( 0, text_insert.length() - commonlength ); - text_delete = text_delete.substr( 0, text_delete.length() - commonlength ); - } - } - // Delete the offending records and add the merged ones. - pointer -= count_delete + count_insert; - NUtils::Splice( diffs, pointer, count_delete + count_insert ); - if ( !text_delete.empty() ) - { - NUtils::Splice( diffs, pointer, 0, { Diff( DELETE, text_delete ) } ); - pointer++; - } - if ( !text_insert.empty() ) - { - NUtils::Splice( diffs, pointer, 0, { Diff( INSERT, text_insert ) } ); - pointer++; - } - pointer++; - } - else if ( pointer != 0 && diffs[ pointer - 1 ].operation == EQUAL ) - { - // Merge this equality with the previous one. - diffs[ pointer - 1 ].text += diffs[ pointer ].text; - diffs.erase( diffs.begin() + pointer ); - } - else - { - pointer++; - } - count_insert = 0; - count_delete = 0; - text_delete.clear(); - text_insert.clear(); - break; - } - } - if ( diffs.back().text.empty() ) - { - diffs.pop_back(); // Remove the dummy entry at the end. - } - - // Second pass: look for single edits surrounded on both sides by - // equalities which can be shifted sideways to eliminate an equality. - // e.g: ABAC -> ABAC - bool changes = false; - pointer = 1; - // Intentionally ignore the first and last element (don't need checking). - while ( !diffs.empty() && pointer < ( diffs.size() - 1 ) ) - { - if ( diffs[ pointer - 1 ].operation == EQUAL && diffs[ pointer + 1 ].operation == EQUAL ) - { - // This is a single edit surrounded by equalities. - if ( NUtils::endsWith( diffs[ pointer ].text, diffs[ pointer - 1 ].text ) ) - { - // Shift the edit over the previous equality. - diffs[ pointer ].text = diffs[ pointer - 1 ].text + diffs[ pointer ].text.substr( 0, diffs[ pointer ].text.length() - diffs[ pointer - 1 ].text.length() ); - diffs[ pointer + 1 ].text = diffs[ pointer - 1 ].text + diffs[ pointer + 1 ].text; - NUtils::Splice( diffs, pointer - 1, 1 ); - changes = true; + } + text_insert = text_insert.substr(commonlength); + text_delete = text_delete.substr(commonlength); } - else if ( diffs[ pointer ].text.find( diffs[ pointer + 1 ].text ) == 0 ) - { - // Shift the edit over the next equality. - diffs[ pointer - 1 ].text += diffs[ pointer + 1 ].text; - diffs[ pointer ].text = diffs[ pointer ].text.substr( diffs[ pointer + 1 ].text.length() ) + diffs[ pointer + 1 ].text; - NUtils::Splice( diffs, pointer + 1, 1 ); - changes = true; + // Factor out any common suffixies. + commonlength = diff_commonSuffix(text_insert, text_delete); + if (commonlength != 0) { + diffs[pointer].text = + safeMid(text_insert, text_insert.length() - commonlength) + + diffs[pointer].text; + text_insert = + text_insert.substr(0, text_insert.length() - commonlength); + text_delete = + text_delete.substr(0, text_delete.length() - commonlength); } + } + // Delete the offending records and add the merged ones. + pointer -= count_delete + count_insert; + NUtils::Splice(diffs, pointer, count_delete + count_insert); + if (!text_delete.empty()) { + NUtils::Splice(diffs, pointer, 0, {Diff(DELETE, text_delete)}); + pointer++; + } + if (!text_insert.empty()) { + NUtils::Splice(diffs, pointer, 0, {Diff(INSERT, text_insert)}); + pointer++; + } + pointer++; + } else if (pointer != 0 && diffs[pointer - 1].operation == EQUAL) { + // Merge this equality with the previous one. + diffs[pointer - 1].text += diffs[pointer].text; + diffs.erase(diffs.begin() + pointer); + } else { + pointer++; } - pointer++; - } - // If shifts were made, the diff needs reordering and another shift sweep. - if ( changes ) - { - diff_cleanupMerge( diffs ); - } -} -std::size_t diff_match_patch::diff_xIndex( const TDiffVector &diffs, std::size_t loc ) -{ - std::size_t chars1 = 0; - std::size_t chars2 = 0; - std::size_t last_chars1 = 0; - std::size_t last_chars2 = 0; - Diff lastDiff; - for ( auto &&aDiff : diffs ) - { - if ( aDiff.operation != INSERT ) - { - // Equality or deletion. - chars1 += aDiff.text.length(); - } - if ( aDiff.operation != DELETE ) - { - // Equality or insertion. - chars2 += aDiff.text.length(); - } - if ( chars1 > loc ) - { - // Overshot the location. - lastDiff = aDiff; - break; - } - last_chars1 = chars1; - last_chars2 = chars2; - } - if ( lastDiff.operation == DELETE ) - { - // The location was deleted. - return last_chars2; - } - // Add the remaining character length. - return last_chars2 + ( loc - last_chars1 ); -} - -std::wstring diff_match_patch::diff_prettyHtml( const TDiffVector &diffs ) -{ - std::wstring html; - std::wstring text; - for ( auto &&aDiff : diffs ) - { - text = aDiff.text; - NUtils::replace( text, L"&", L"&" ); - NUtils::replace( text, L"<", L"<" ); - NUtils::replace( text, L">", L">" ); - NUtils::replace( text, L"\n", L"¶
" ); - switch ( aDiff.operation ) - { - case INSERT: - html += std::wstring( L"" ) + text + std::wstring( L"" ); - break; - case DELETE: - html += std::wstring( L"" ) + text + std::wstring( L"" ); - break; - case EQUAL: - html += std::wstring( L"" ) + text + std::wstring( L"" ); - break; - } - } - return html; -} - -std::wstring diff_match_patch::diff_text1( const TDiffVector &diffs ) -{ - std::wstring text; - for ( auto &&aDiff : diffs ) - { - if ( aDiff.operation != INSERT ) - { - text += aDiff.text; - } - } - return text; -} - -std::wstring diff_match_patch::diff_text2( const TDiffVector &diffs ) -{ - std::wstring text; - for ( auto &&aDiff : diffs ) - { - if ( aDiff.operation != DELETE ) - { - text += aDiff.text; + count_insert = 0; + count_delete = 0; + text_delete.clear(); + text_insert.clear(); + break; + } + } + if (diffs.back().text.empty()) { + diffs.pop_back(); // Remove the dummy entry at the end. + } + + // Second pass: look for single edits surrounded on both sides by + // equalities which can be shifted sideways to eliminate an equality. + // e.g: ABAC -> ABAC + bool changes = false; + pointer = 1; + // Intentionally ignore the first and last element (don't need checking). + while (!diffs.empty() && pointer < (diffs.size() - 1)) { + if (diffs[pointer - 1].operation == EQUAL && + diffs[pointer + 1].operation == EQUAL) { + // This is a single edit surrounded by equalities. + if (NUtils::endsWith(diffs[pointer].text, diffs[pointer - 1].text)) { + // Shift the edit over the previous equality. + diffs[pointer].text = + diffs[pointer - 1].text + + diffs[pointer].text.substr(0, diffs[pointer].text.length() - + diffs[pointer - 1].text.length()); + diffs[pointer + 1].text = + diffs[pointer - 1].text + diffs[pointer + 1].text; + NUtils::Splice(diffs, pointer - 1, 1); + changes = true; + } else if (diffs[pointer].text.find(diffs[pointer + 1].text) == 0) { + // Shift the edit over the next equality. + diffs[pointer - 1].text += diffs[pointer + 1].text; + diffs[pointer].text = + diffs[pointer].text.substr(diffs[pointer + 1].text.length()) + + diffs[pointer + 1].text; + NUtils::Splice(diffs, pointer + 1, 1); + changes = true; + } + } + pointer++; + } + // If shifts were made, the diff needs reordering and another shift sweep. + if (changes) { + diff_cleanupMerge(diffs); + } +} +std::size_t diff_match_patch::diff_xIndex(const TDiffVector &diffs, + std::size_t loc) { + std::size_t chars1 = 0; + std::size_t chars2 = 0; + std::size_t last_chars1 = 0; + std::size_t last_chars2 = 0; + Diff lastDiff; + for (auto &&aDiff : diffs) { + if (aDiff.operation != INSERT) { + // Equality or deletion. + chars1 += aDiff.text.length(); + } + if (aDiff.operation != DELETE) { + // Equality or insertion. + chars2 += aDiff.text.length(); + } + if (chars1 > loc) { + // Overshot the location. + lastDiff = aDiff; + break; + } + last_chars1 = chars1; + last_chars2 = chars2; + } + if (lastDiff.operation == DELETE) { + // The location was deleted. + return last_chars2; + } + // Add the remaining character length. + return last_chars2 + (loc - last_chars1); +} + +std::wstring diff_match_patch::diff_prettyHtml(const TDiffVector &diffs) { + std::wstring html; + std::wstring text; + for (auto &&aDiff : diffs) { + text = aDiff.text; + NUtils::replace(text, L"&", L"&"); + NUtils::replace(text, L"<", L"<"); + NUtils::replace(text, L">", L">"); + NUtils::replace(text, L"\n", L"¶
"); + switch (aDiff.operation) { + case INSERT: + html += std::wstring(L"") + text + + std::wstring(L""); + break; + case DELETE: + html += std::wstring(L"") + text + + std::wstring(L""); + break; + case EQUAL: + html += std::wstring(L"") + text + std::wstring(L""); + break; + } + } + return html; +} + +std::wstring diff_match_patch::diff_text1(const TDiffVector &diffs) { + std::wstring text; + for (auto &&aDiff : diffs) { + if (aDiff.operation != INSERT) { + text += aDiff.text; + } + } + return text; +} + +std::wstring diff_match_patch::diff_text2(const TDiffVector &diffs) { + std::wstring text; + for (auto &&aDiff : diffs) { + if (aDiff.operation != DELETE) { + text += aDiff.text; + } + } + return text; +} + +std::size_t diff_match_patch::diff_levenshtein(const TDiffVector &diffs) { + std::size_t levenshtein = 0; + std::size_t insertions = 0; + std::size_t deletions = 0; + for (auto &&aDiff : diffs) { + switch (aDiff.operation) { + case INSERT: + insertions += aDiff.text.length(); + break; + case DELETE: + deletions += aDiff.text.length(); + break; + case EQUAL: + // A deletion and an insertion is one substitution. + levenshtein += std::max(insertions, deletions); + insertions = 0; + deletions = 0; + break; + } + } + levenshtein += std::max(insertions, deletions); + return levenshtein; +} + +std::wstring diff_match_patch::diff_toDelta(const TDiffVector &diffs) { + std::wstring text; + for (auto &&aDiff : diffs) { + switch (aDiff.operation) { + case INSERT: + text += L"+" + + NUtils::toPercentEncoding(aDiff.text, L" !~*'();/?:@&=+$,#") + + L"\t"; + break; + case DELETE: + text += L"-" + std::to_wstring(aDiff.text.length()) + L"\t"; + break; + case EQUAL: + text += L"=" + std::to_wstring(aDiff.text.length()) + L"\t"; + break; + } + } + if (!text.empty()) { + // Strip off trailing tab character. + text = text.substr(0, text.length() - 1); + } + return text; +} + +TDiffVector diff_match_patch::diff_fromDelta(const std::wstring &text1, + const std::wstring &delta) { + TDiffVector diffs; + std::size_t pointer = 0; // Cursor in text1 + auto tokens = NUtils::splitString(delta, L"\t", false); + for (auto &&token : tokens) { + if (token.empty()) { + // Blank tokens are ok (from a trailing \t). + continue; + } + // Each token begins with a one character parameter which specifies the + // operation of this token (delete, insert, equality). + std::wstring param = safeMid(token, 1); + switch (token[0]) { + case '+': + NUtils::replace(param, L"+", L"%2b"); + param = NUtils::fromPercentEncoding(param); + diffs.emplace_back(INSERT, param); + break; + case '-': + // Fall through. + case '=': { + auto n = NUtils::toInt(param); + if (n < 0) { + throw std::wstring(L"Negative number in diff_fromDelta: " + param); } - } - return text; -} - -std::size_t diff_match_patch::diff_levenshtein( const TDiffVector &diffs ) -{ - std::size_t levenshtein = 0; - std::size_t insertions = 0; - std::size_t deletions = 0; - for ( auto &&aDiff : diffs ) - { - switch ( aDiff.operation ) - { - case INSERT: - insertions += aDiff.text.length(); - break; - case DELETE: - deletions += aDiff.text.length(); - break; - case EQUAL: - // A deletion and an insertion is one substitution. - levenshtein += std::max( insertions, deletions ); - insertions = 0; - deletions = 0; - break; + std::wstring text; + if ((pointer + n) > text1.length()) { + throw std::wstring(L"Delta length (" + std::to_wstring(pointer + n) + + L") larger than source text length (" + + std::to_wstring(text1.length()) + L")."); } - } - levenshtein += std::max( insertions, deletions ); - return levenshtein; -} -std::wstring diff_match_patch::diff_toDelta( const TDiffVector &diffs ) -{ - std::wstring text; - for ( auto &&aDiff : diffs ) - { - switch ( aDiff.operation ) - { - case INSERT: - text += L"+" + NUtils::toPercentEncoding( aDiff.text, L" !~*'();/?:@&=+$,#" ) + L"\t"; - break; - case DELETE: - text += L"-" + std::to_wstring( aDiff.text.length() ) + L"\t"; - break; - case EQUAL: - text += L"=" + std::to_wstring( aDiff.text.length() ) + L"\t"; - break; - } - } - if ( !text.empty() ) - { - // Strip off trailing tab character. - text = text.substr( 0, text.length() - 1 ); - } - return text; -} - -TDiffVector diff_match_patch::diff_fromDelta( const std::wstring &text1, const std::wstring &delta ) -{ - TDiffVector diffs; - std::size_t pointer = 0; // Cursor in text1 - auto tokens = NUtils::splitString( delta, L"\t", false ); - for ( auto &&token : tokens ) - { - if ( token.empty() ) - { - // Blank tokens are ok (from a trailing \t). - continue; - } - // Each token begins with a one character parameter which specifies the - // operation of this token (delete, insert, equality). - std::wstring param = safeMid( token, 1 ); - switch ( token[ 0 ] ) - { - case '+': - NUtils::replace( param, L"+", L"%2b" ); - param = NUtils::fromPercentEncoding( param ); - diffs.emplace_back( INSERT, param ); - break; - case '-': - // Fall through. - case '=': - { - auto n = NUtils::toInt( param ); - if ( n < 0 ) - { - throw std::wstring( L"Negative number in diff_fromDelta: " + param ); - } - std::wstring text; - if ( ( pointer + n ) > text1.length() ) - { - throw std::wstring( L"Delta length (" + std::to_wstring( pointer + n ) + L") larger than source text length (" + std::to_wstring( text1.length() ) + L")." ); - } - - text = safeMid( text1, pointer, n ); - pointer += n; - if ( token[ 0 ] == L'=' ) - { - diffs.emplace_back( EQUAL, text ); - } - else - { - diffs.emplace_back( DELETE, text ); - } - break; - } - default: - throw std::wstring( L"Invalid diff operation in diff_fromDelta: " + token[ 0 ] ); + text = safeMid(text1, pointer, n); + pointer += n; + if (token[0] == L'=') { + diffs.emplace_back(EQUAL, text); + } else { + diffs.emplace_back(DELETE, text); } + break; + } + default: + throw std::wstring(L"Invalid diff operation in diff_fromDelta: " + + token[0]); } - if ( pointer != text1.length() ) - { - throw std::wstring( L"Delta length (" ) + std::to_wstring( pointer ) + L") smaller than source text length (" + std::to_wstring( text1.length() ) + L")"; - } - return diffs; + } + if (pointer != text1.length()) { + throw std::wstring(L"Delta length (") + std::to_wstring(pointer) + + L") smaller than source text length (" + + std::to_wstring(text1.length()) + L")"; + } + return diffs; } -TDiffVector diff_match_patch::diff_fromDelta( const std::string &text1, const std::string &delta ) -{ - return diff_fromDelta( NUtils::to_wstring( text1 ), NUtils::to_wstring( delta ) ); +TDiffVector diff_match_patch::diff_fromDelta(const std::string &text1, + const std::string &delta) { + return diff_fromDelta(NUtils::to_wstring(text1), NUtils::to_wstring(delta)); } // MATCH FUNCTIONS -std::size_t diff_match_patch::match_main( const std::wstring &text, const std::wstring &pattern, std::size_t loc ) -{ - // Check for null inputs not needed since null can't be passed via std::wstring - - loc = std::max( 0ULL, std::min( loc, text.length() ) ); - if ( text == pattern ) - { - // Shortcut (potentially not guaranteed by the algorithm) - return 0; - } - else if ( text.empty() ) - { - // Nothing to match. - return -1; - } - else if ( loc + pattern.length() <= text.length() && safeMid( text, loc, pattern.length() ) == pattern ) - { - // Perfect match at the perfect spot! (Includes case of nullptr pattern) - return loc; - } - else - { - // Do a fuzzy compare. - return match_bitap( text, pattern, loc ); - } -} - -std::size_t diff_match_patch::match_main( const std::string &text, const std::string &pattern, std::size_t loc ) -{ - return match_main( NUtils::to_wstring( text ), NUtils::to_wstring( pattern ), loc ); -} - -std::size_t diff_match_patch::match_bitap( const std::wstring &text, const std::wstring &pattern, std::size_t loc ) -{ - if ( !( Match_MaxBits == 0 || pattern.length() <= Match_MaxBits ) ) - { - throw "Pattern too long for this application."; - } - - // Initialise the alphabet. - auto &&s = match_alphabet( pattern ); - - // Highest score beyond which we give up. - double score_threshold = Match_Threshold; - // Is there a nearby exact match? (speedup) - auto best_loc = text.find( pattern, loc ); - if ( best_loc != std::string::npos ) - { - score_threshold = std::min( match_bitapScore( 0, best_loc, loc, pattern ), score_threshold ); - // What about in the other direction? (speedup) - auto start = std::min( loc + pattern.length(), text.length() ); - best_loc = text.rfind( pattern, start ); - if ( best_loc != std::string::npos ) - { - score_threshold = std::min( match_bitapScore( 0, best_loc, loc, pattern ), score_threshold ); - } - } +std::size_t diff_match_patch::match_main(const std::wstring &text, + const std::wstring &pattern, + std::size_t loc) { + // Check for null inputs not needed since null can't be passed via + // std::wstring - // Initialise the bit arrays. - auto matchmask = 1 << ( pattern.length() - 1 ); - best_loc = std::string::npos; - - std::size_t bin_min, bin_mid; - auto bin_max = pattern.length() + text.length(); - std::vector< int64_t > rd; - std::vector< int64_t > last_rd; - for ( int d = 0; d < pattern.length(); d++ ) - { - // Scan for the best match; each iteration allows for one more error. - // Run a binary search to determine how far from 'loc' we can stray at - // this error level. - bin_min = 0; - bin_mid = bin_max; - while ( bin_min < bin_mid ) - { - if ( match_bitapScore( d, loc + bin_mid, loc, pattern ) <= score_threshold ) - { - bin_min = bin_mid; - } - else - { - bin_max = bin_mid; - } - bin_mid = ( bin_max - bin_min ) / 2 + bin_min; - } - // Use the result from this iteration as the maximum for the next. + loc = std::max(0ULL, std::min(loc, text.length())); + if (text == pattern) { + // Shortcut (potentially not guaranteed by the algorithm) + return 0; + } else if (text.empty()) { + // Nothing to match. + return -1; + } else if (loc + pattern.length() <= text.length() && + safeMid(text, loc, pattern.length()) == pattern) { + // Perfect match at the perfect spot! (Includes case of nullptr pattern) + return loc; + } else { + // Do a fuzzy compare. + return match_bitap(text, pattern, loc); + } +} + +std::size_t diff_match_patch::match_main(const std::string &text, + const std::string &pattern, + std::size_t loc) { + return match_main(NUtils::to_wstring(text), NUtils::to_wstring(pattern), loc); +} + +std::size_t diff_match_patch::match_bitap(const std::wstring &text, + const std::wstring &pattern, + std::size_t loc) { + if (!(Match_MaxBits == 0 || pattern.length() <= Match_MaxBits)) { + throw "Pattern too long for this application."; + } + + // Initialise the alphabet. + auto &&s = match_alphabet(pattern); + + // Highest score beyond which we give up. + double score_threshold = Match_Threshold; + // Is there a nearby exact match? (speedup) + auto best_loc = text.find(pattern, loc); + if (best_loc != std::string::npos) { + score_threshold = + std::min(match_bitapScore(0, best_loc, loc, pattern), score_threshold); + // What about in the other direction? (speedup) + auto start = std::min(loc + pattern.length(), text.length()); + best_loc = text.rfind(pattern, start); + if (best_loc != std::string::npos) { + score_threshold = std::min(match_bitapScore(0, best_loc, loc, pattern), + score_threshold); + } + } + + // Initialise the bit arrays. + auto matchmask = 1 << (pattern.length() - 1); + best_loc = std::string::npos; + + std::size_t bin_min, bin_mid; + auto bin_max = pattern.length() + text.length(); + std::vector rd; + std::vector last_rd; + for (int d = 0; d < pattern.length(); d++) { + // Scan for the best match; each iteration allows for one more error. + // Run a binary search to determine how far from 'loc' we can stray at + // this error level. + bin_min = 0; + bin_mid = bin_max; + while (bin_min < bin_mid) { + if (match_bitapScore(d, loc + bin_mid, loc, pattern) <= score_threshold) { + bin_min = bin_mid; + } else { bin_max = bin_mid; - auto start = std::max( 1ULL, ( loc > bin_mid ) ? ( loc - bin_mid + 1 ) : 0 ); - auto finish = std::min( loc + bin_mid, text.length() ) + pattern.length(); - - rd = std::vector< int64_t >( finish + 2, 0 ); - rd[ finish + 1 ] = ( 1 << d ) - 1; - for ( auto j = finish; ( j != -1 ) && ( j >= start ); j-- ) - { - int64_t charMatch; - if ( text.length() <= j - 1 ) - { - // Out of range. - charMatch = 0; - } - else - { - auto pos = s.find( text[ j - 1 ] ); - if ( pos == s.end() ) - charMatch = 0; - else - charMatch = ( *pos ).second; - } - if ( d == 0 ) - { - // First pass: exact match. - rd[ j ] = ( ( rd[ j + 1 ] << 1 ) | 1 ) & charMatch; - } - else - { - // Subsequent passes: fuzzy match. - rd[ j ] = ( ( rd[ j + 1 ] << 1 ) | 1 ) & charMatch | ( ( ( last_rd[ j + 1 ] | last_rd[ j ] ) << 1 ) | 1 ) | last_rd[ j + 1 ]; - } - if ( ( rd[ j ] & matchmask ) != 0 ) - { - double score = match_bitapScore( d, j - 1, loc, pattern ); - // This match will almost certainly be better than any existing - // match. But check anyway. - if ( score <= score_threshold ) - { - // Told you so. - score_threshold = score; - best_loc = j - 1; - if ( best_loc > loc ) - { - // When passing loc, don't exceed our current distance from loc. - start = std::max( 1ULL, ( 2 * loc > best_loc ) ? 2 * loc - best_loc : 1 ); - } - else - { - // Already passed loc, downhill from here on in. - break; - } - } - } - } - if ( match_bitapScore( d + 1, loc, loc, pattern ) > score_threshold ) - { - // No hope for a (better) match at greater error levels. + } + bin_mid = (bin_max - bin_min) / 2 + bin_min; + } + // Use the result from this iteration as the maximum for the next. + bin_max = bin_mid; + auto start = std::max(1ULL, (loc > bin_mid) ? (loc - bin_mid + 1) : 0); + auto finish = std::min(loc + bin_mid, text.length()) + pattern.length(); + + rd = std::vector(finish + 2, 0); + rd[finish + 1] = (1 << d) - 1; + for (auto j = finish; (j != -1) && (j >= start); j--) { + int64_t charMatch; + if (text.length() <= j - 1) { + // Out of range. + charMatch = 0; + } else { + auto pos = s.find(text[j - 1]); + if (pos == s.end()) + charMatch = 0; + else + charMatch = (*pos).second; + } + if (d == 0) { + // First pass: exact match. + rd[j] = ((rd[j + 1] << 1) | 1) & charMatch; + } else { + // Subsequent passes: fuzzy match. + rd[j] = ((rd[j + 1] << 1) | 1) & charMatch | + (((last_rd[j + 1] | last_rd[j]) << 1) | 1) | last_rd[j + 1]; + } + if ((rd[j] & matchmask) != 0) { + double score = match_bitapScore(d, j - 1, loc, pattern); + // This match will almost certainly be better than any existing + // match. But check anyway. + if (score <= score_threshold) { + // Told you so. + score_threshold = score; + best_loc = j - 1; + if (best_loc > loc) { + // When passing loc, don't exceed our current distance from loc. + start = + std::max(1ULL, (2 * loc > best_loc) ? 2 * loc - best_loc : 1); + } else { + // Already passed loc, downhill from here on in. break; + } } - last_rd = std::move( rd ); - } - return best_loc; -} - -std::size_t diff_match_patch::match_bitap( const std::string &text, const std::string &pattern, std::size_t loc ) -{ - return match_bitap( NUtils::to_wstring( text ), NUtils::to_wstring( pattern ), loc ); -} - -double diff_match_patch::match_bitapScore( int64_t e, int64_t x, int64_t loc, const std::wstring &pattern ) -{ - const float accuracy = static_cast< float >( e ) / pattern.length(); - const auto proximity = std::abs( loc - x ); - if ( Match_Distance == 0 ) - { - // Dodge divide by zero error. - return proximity == 0 ? accuracy : 1.0; - } - return accuracy + ( proximity / static_cast< float >( Match_Distance ) ); -} - -diff_match_patch::TCharPosMap diff_match_patch::match_alphabet( const std::wstring &pattern ) -{ - TCharPosMap s; - std::size_t i; - for ( i = 0; i < pattern.length(); i++ ) - { - auto c = pattern[ i ]; - s[ c ] = 0; - } - for ( i = 0; i < pattern.length(); i++ ) - { - auto c = pattern[ i ]; - auto pos = s.find( c ); - std::size_t prev = 0; - if ( pos != s.end() ) - prev = ( *pos ).second; - s[ c ] = prev | ( 1ULL << ( pattern.length() - i - 1 ) ); - } - return s; -} - -diff_match_patch::TCharPosMap diff_match_patch::match_alphabet( const std::string &pattern ) -{ - return match_alphabet( NUtils::to_wstring( pattern ) ); + } + } + if (match_bitapScore(d + 1, loc, loc, pattern) > score_threshold) { + // No hope for a (better) match at greater error levels. + break; + } + last_rd = std::move(rd); + } + return best_loc; +} + +std::size_t diff_match_patch::match_bitap(const std::string &text, + const std::string &pattern, + std::size_t loc) { + return match_bitap(NUtils::to_wstring(text), NUtils::to_wstring(pattern), + loc); +} + +double diff_match_patch::match_bitapScore(int64_t e, int64_t x, int64_t loc, + const std::wstring &pattern) { + const float accuracy = static_cast(e) / pattern.length(); + const auto proximity = std::abs(loc - x); + if (Match_Distance == 0) { + // Dodge divide by zero error. + return proximity == 0 ? accuracy : 1.0; + } + return accuracy + (proximity / static_cast(Match_Distance)); +} + +diff_match_patch::TCharPosMap diff_match_patch::match_alphabet( + const std::wstring &pattern) { + TCharPosMap s; + std::size_t i; + for (i = 0; i < pattern.length(); i++) { + auto c = pattern[i]; + s[c] = 0; + } + for (i = 0; i < pattern.length(); i++) { + auto c = pattern[i]; + auto pos = s.find(c); + std::size_t prev = 0; + if (pos != s.end()) prev = (*pos).second; + s[c] = prev | (1ULL << (pattern.length() - i - 1)); + } + return s; +} + +diff_match_patch::TCharPosMap diff_match_patch::match_alphabet( + const std::string &pattern) { + return match_alphabet(NUtils::to_wstring(pattern)); } // PATCH FUNCTIONS -void diff_match_patch::patch_addContext( Patch &patch, const std::wstring &text ) -{ - if ( text.empty() ) - { - return; - } - std::wstring pattern = safeMid( text, patch.start2, patch.length1 ); - std::size_t padding = 0; - - // Look for the first and last matches of pattern in text. If two different - // matches are found, increase the pattern length. - while ( ( text.find( pattern ) != text.rfind( pattern ) ) && ( pattern.length() < ( Match_MaxBits - Patch_Margin - Patch_Margin ) ) ) - { - padding += Patch_Margin; - pattern = safeMid( text, std::max( 0ULL, ( ( patch.start2 > padding ) ? patch.start2 - padding : 0ULL ) ), std::min( text.length(), patch.start2 + patch.length1 + padding ) - std::max( 0ULL, ( patch.start2 > padding ) ? patch.start2 - padding : 0 ) ); - } - // Add one chunk for good luck. +void diff_match_patch::patch_addContext(Patch &patch, + const std::wstring &text) { + if (text.empty()) { + return; + } + std::wstring pattern = safeMid(text, patch.start2, patch.length1); + std::size_t padding = 0; + + // Look for the first and last matches of pattern in text. If two different + // matches are found, increase the pattern length. + while ((text.find(pattern) != text.rfind(pattern)) && + (pattern.length() < (Match_MaxBits - Patch_Margin - Patch_Margin))) { padding += Patch_Margin; - - // Add the prefix. - std::wstring prefix = safeMid( text, std::max( 0ULL, ( ( patch.start2 > padding ) ? patch.start2 - padding : 0ULL ) ), patch.start2 - std::max( 0ULL, ( ( patch.start2 > padding ) ? patch.start2 - padding : 0ULL ) ) ); - if ( !prefix.empty() ) - { - patch.diffs.emplace( patch.diffs.begin(), EQUAL, prefix ); - } - // Add the suffix. - std::wstring suffix = safeMid( text, patch.start2 + patch.length1, std::min( text.length(), patch.start2 + patch.length1 + padding ) - ( patch.start2 + patch.length1 ) ); - if ( !suffix.empty() ) - { - patch.diffs.emplace_back( EQUAL, suffix ); - } - - // Roll back the start points. - patch.start1 -= prefix.length(); - patch.start2 -= prefix.length(); - // Extend the lengths. - patch.length1 += prefix.length() + suffix.length(); - patch.length2 += prefix.length() + suffix.length(); -} - -void diff_match_patch::patch_addContext( Patch &patch, const std::string &text ) -{ - return patch_addContext( patch, NUtils::to_wstring( text ) ); -} - -TPatchVector diff_match_patch::patch_make( const std::wstring &text1, const std::wstring &text2 ) -{ - // Check for null inputs not needed since null can't be passed via std::wstring - - // No diffs provided, compute our own. - TDiffVector diffs = diff_main( text1, text2, true ); - if ( diffs.size() > 2 ) - { - diff_cleanupSemantic( diffs ); - diff_cleanupEfficiency( diffs ); - } - - return patch_make( text1, diffs ); -} - -TPatchVector diff_match_patch::patch_make( const TDiffVector &diffs ) -{ - // No origin string provided, compute our own. - const std::wstring text1 = diff_text1( diffs ); - return patch_make( text1, diffs ); -} - -TPatchVector diff_match_patch::patch_make( const std::wstring &text1, const std::wstring & /*text2*/, const TDiffVector &diffs ) -{ - // text2 is entirely unused. - return patch_make( text1, diffs ); -} - -TPatchVector diff_match_patch::patch_make( const std::wstring &text1, const TDiffVector &diffs ) -{ - // Check for null inputs not needed since null can't be passed via std::wstring - - TPatchVector patches; - if ( diffs.empty() ) - { - return patches; // Get rid of the nullptr case. - } - Patch patch; - std::size_t char_count1 = 0; // Number of characters into the text1 string. - std::size_t char_count2 = 0; // Number of characters into the text2 string. - // Start with text1 (prepatch_text) and apply the diffs until we arrive at - // text2 (postpatch_text). We recreate the patches one by one to determine - // context info. - std::wstring prepatch_text = text1; - std::wstring postpatch_text = text1; - for ( auto &&aDiff : diffs ) - { - if ( patch.diffs.empty() && aDiff.operation != EQUAL ) - { - // A new patch starts here. - patch.start1 = char_count1; - patch.start2 = char_count2; - } - - switch ( aDiff.operation ) - { - case INSERT: - patch.diffs.emplace_back( aDiff ); - patch.length2 += aDiff.text.length(); - postpatch_text = postpatch_text.substr( 0, char_count2 ) + aDiff.text + safeMid( postpatch_text, char_count2 ); - break; - case DELETE: - patch.length1 += aDiff.text.length(); - patch.diffs.emplace_back( aDiff ); - postpatch_text = postpatch_text.substr( 0, char_count2 ) + safeMid( postpatch_text, char_count2 + aDiff.text.length() ); - break; - case EQUAL: - if ( aDiff.text.length() <= 2 * Patch_Margin && !patch.diffs.empty() && !( aDiff == diffs.back() ) ) - { - // Small equality inside a patch. - patch.diffs.emplace_back( aDiff ); - patch.length1 += aDiff.text.length(); - patch.length2 += aDiff.text.length(); - } - - if ( aDiff.text.length() >= 2 * Patch_Margin ) - { - // Time for a new patch. - if ( !patch.diffs.empty() ) - { - patch_addContext( patch, prepatch_text ); - patches.emplace_back( patch ); - patch = Patch(); - // Unlike Unidiff, our patch lists have a rolling context. - // http://code.google.com/p/google-diff-match-patch/wiki/Unidiff - // Update prepatch text & pos to reflect the application of the - // just completed patch. - prepatch_text = postpatch_text; - char_count1 = char_count2; - } - } - break; + pattern = safeMid( + text, + std::max(0ULL, + ((patch.start2 > padding) ? patch.start2 - padding : 0ULL)), + std::min(text.length(), patch.start2 + patch.length1 + padding) - + std::max(0ULL, + (patch.start2 > padding) ? patch.start2 - padding : 0)); + } + // Add one chunk for good luck. + padding += Patch_Margin; + + // Add the prefix. + std::wstring prefix = safeMid( + text, + std::max(0ULL, + ((patch.start2 > padding) ? patch.start2 - padding : 0ULL)), + patch.start2 - + std::max(0ULL, + ((patch.start2 > padding) ? patch.start2 - padding : 0ULL))); + if (!prefix.empty()) { + patch.diffs.emplace(patch.diffs.begin(), EQUAL, prefix); + } + // Add the suffix. + std::wstring suffix = + safeMid(text, patch.start2 + patch.length1, + std::min(text.length(), patch.start2 + patch.length1 + padding) - + (patch.start2 + patch.length1)); + if (!suffix.empty()) { + patch.diffs.emplace_back(EQUAL, suffix); + } + + // Roll back the start points. + patch.start1 -= prefix.length(); + patch.start2 -= prefix.length(); + // Extend the lengths. + patch.length1 += prefix.length() + suffix.length(); + patch.length2 += prefix.length() + suffix.length(); +} + +void diff_match_patch::patch_addContext(Patch &patch, const std::string &text) { + return patch_addContext(patch, NUtils::to_wstring(text)); +} + +TPatchVector diff_match_patch::patch_make(const std::wstring &text1, + const std::wstring &text2) { + // Check for null inputs not needed since null can't be passed via + // std::wstring + + // No diffs provided, compute our own. + TDiffVector diffs = diff_main(text1, text2, true); + if (diffs.size() > 2) { + diff_cleanupSemantic(diffs); + diff_cleanupEfficiency(diffs); + } + + return patch_make(text1, diffs); +} + +TPatchVector diff_match_patch::patch_make(const TDiffVector &diffs) { + // No origin string provided, compute our own. + const std::wstring text1 = diff_text1(diffs); + return patch_make(text1, diffs); +} + +TPatchVector diff_match_patch::patch_make(const std::wstring &text1, + const std::wstring & /*text2*/, + const TDiffVector &diffs) { + // text2 is entirely unused. + return patch_make(text1, diffs); +} + +TPatchVector diff_match_patch::patch_make(const std::wstring &text1, + const TDiffVector &diffs) { + // Check for null inputs not needed since null can't be passed via + // std::wstring + + TPatchVector patches; + if (diffs.empty()) { + return patches; // Get rid of the nullptr case. + } + Patch patch; + std::size_t char_count1 = 0; // Number of characters into the text1 string. + std::size_t char_count2 = 0; // Number of characters into the text2 string. + // Start with text1 (prepatch_text) and apply the diffs until we arrive at + // text2 (postpatch_text). We recreate the patches one by one to determine + // context info. + std::wstring prepatch_text = text1; + std::wstring postpatch_text = text1; + for (auto &&aDiff : diffs) { + if (patch.diffs.empty() && aDiff.operation != EQUAL) { + // A new patch starts here. + patch.start1 = char_count1; + patch.start2 = char_count2; + } + + switch (aDiff.operation) { + case INSERT: + patch.diffs.emplace_back(aDiff); + patch.length2 += aDiff.text.length(); + postpatch_text = postpatch_text.substr(0, char_count2) + aDiff.text + + safeMid(postpatch_text, char_count2); + break; + case DELETE: + patch.length1 += aDiff.text.length(); + patch.diffs.emplace_back(aDiff); + postpatch_text = + postpatch_text.substr(0, char_count2) + + safeMid(postpatch_text, char_count2 + aDiff.text.length()); + break; + case EQUAL: + if (aDiff.text.length() <= 2 * Patch_Margin && !patch.diffs.empty() && + !(aDiff == diffs.back())) { + // Small equality inside a patch. + patch.diffs.emplace_back(aDiff); + patch.length1 += aDiff.text.length(); + patch.length2 += aDiff.text.length(); } - // Update the current character count. - if ( aDiff.operation != INSERT ) - { - char_count1 += aDiff.text.length(); + if (aDiff.text.length() >= 2 * Patch_Margin) { + // Time for a new patch. + if (!patch.diffs.empty()) { + patch_addContext(patch, prepatch_text); + patches.emplace_back(patch); + patch = Patch(); + // Unlike Unidiff, our patch lists have a rolling context. + // http://code.google.com/p/google-diff-match-patch/wiki/Unidiff + // Update prepatch text & pos to reflect the application of the + // just completed patch. + prepatch_text = postpatch_text; + char_count1 = char_count2; + } } - if ( aDiff.operation != DELETE ) - { - char_count2 += aDiff.text.length(); + break; + } + + // Update the current character count. + if (aDiff.operation != INSERT) { + char_count1 += aDiff.text.length(); + } + if (aDiff.operation != DELETE) { + char_count2 += aDiff.text.length(); + } + } + // Pick up the leftover patch if not empty. + if (!patch.diffs.empty()) { + patch_addContext(patch, prepatch_text); + patches.emplace_back(patch); + } + + return patches; +} + +TPatchVector diff_match_patch::patch_make(const std::string &text1, + const TDiffVector &diffs) { + return patch_make(NUtils::to_wstring(text1), diffs); +} + +TPatchVector diff_match_patch::patch_make(const std::string &text1, + const std::string &text2, + const TDiffVector &diffs) { + return patch_make(NUtils::to_wstring(text1), NUtils::to_wstring(text2), + diffs); +} + +TPatchVector diff_match_patch::patch_make(const std::string &text1, + const std::string &text2) { + return patch_make(NUtils::to_wstring(text1), NUtils::to_wstring(text2)); +} + +TPatchVector diff_match_patch::patch_deepCopy(const TPatchVector &patches) { + TPatchVector patchesCopy; + for (auto &&aPatch : patches) { + Patch patchCopy = Patch(); + for (auto &&aDiff : aPatch.diffs) { + patchCopy.diffs.emplace_back(aDiff.operation, aDiff.text); + } + patchCopy.start1 = aPatch.start1; + patchCopy.start2 = aPatch.start2; + patchCopy.length1 = aPatch.length1; + patchCopy.length2 = aPatch.length2; + patchesCopy.emplace_back(patchCopy); + } + return patchesCopy; +} + +std::pair > diff_match_patch::patch_apply( + TPatchVector patches, std::wstring text) { + if (patches.empty()) { + return {text, std::vector(0)}; + } + + // Deep copy the patches so that no changes are made to originals. + patches = patch_deepCopy(patches); + + std::wstring nullPadding = patch_addPadding(patches); + text = nullPadding + text + nullPadding; + patch_splitMax(patches); + + std::size_t x = 0; + // delta keeps track of the offset between the expected and actual location + // of the previous patch. If there are patches expected at positions 10 and + // 20, but the first patch was found at 12, delta is 2 and the second patch + // has an effective expected position of 22. + uint64_t delta = 0; + std::vector results(patches.size()); + for (auto &&aPatch : patches) { + auto expected_loc = aPatch.start2 + delta; + std::wstring text1 = diff_text1(aPatch.diffs); + std::size_t start_loc; + std::size_t end_loc = std::string::npos; + if (text1.length() > Match_MaxBits) { + // patch_splitMax will only provide an oversized pattern in the case of + // a monster delete. + start_loc = + match_main(text, text1.substr(0, Match_MaxBits), expected_loc); + if (start_loc != -1) { + end_loc = match_main(text, text1.substr(text1.length() - Match_MaxBits), + expected_loc + text1.length() - Match_MaxBits); + if (end_loc == -1 || start_loc >= end_loc) { + // Can't find valid trailing context. Drop this patch. + start_loc = -1; } - } - // Pick up the leftover patch if not empty. - if ( !patch.diffs.empty() ) - { - patch_addContext( patch, prepatch_text ); - patches.emplace_back( patch ); - } - - return patches; -} - -TPatchVector diff_match_patch::patch_make( const std::string &text1, const TDiffVector &diffs ) -{ - return patch_make( NUtils::to_wstring( text1 ), diffs ); -} - -TPatchVector diff_match_patch::patch_make( const std::string &text1, const std::string &text2, const TDiffVector &diffs ) -{ - return patch_make( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ), diffs ); -} - -TPatchVector diff_match_patch::patch_make( const std::string &text1, const std::string &text2 ) -{ - return patch_make( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ) ); -} - -TPatchVector diff_match_patch::patch_deepCopy( const TPatchVector &patches ) -{ - TPatchVector patchesCopy; - for ( auto &&aPatch : patches ) - { - Patch patchCopy = Patch(); - for ( auto &&aDiff : aPatch.diffs ) - { - patchCopy.diffs.emplace_back( aDiff.operation, aDiff.text ); - } - patchCopy.start1 = aPatch.start1; - patchCopy.start2 = aPatch.start2; - patchCopy.length1 = aPatch.length1; - patchCopy.length2 = aPatch.length2; - patchesCopy.emplace_back( patchCopy ); - } - return patchesCopy; -} - -std::pair< std::wstring, std::vector< bool > > diff_match_patch::patch_apply( TPatchVector patches, std::wstring text ) -{ - if ( patches.empty() ) - { - return { text, std::vector< bool >( 0 ) }; - } - - // Deep copy the patches so that no changes are made to originals. - patches = patch_deepCopy( patches ); - - std::wstring nullPadding = patch_addPadding( patches ); - text = nullPadding + text + nullPadding; - patch_splitMax( patches ); - - std::size_t x = 0; - // delta keeps track of the offset between the expected and actual location - // of the previous patch. If there are patches expected at positions 10 and - // 20, but the first patch was found at 12, delta is 2 and the second patch - // has an effective expected position of 22. - uint64_t delta = 0; - std::vector< bool > results( patches.size() ); - for ( auto &&aPatch : patches ) - { - auto expected_loc = aPatch.start2 + delta; - std::wstring text1 = diff_text1( aPatch.diffs ); - std::size_t start_loc; - std::size_t end_loc = std::string::npos; - if ( text1.length() > Match_MaxBits ) - { - // patch_splitMax will only provide an oversized pattern in the case of - // a monster delete. - start_loc = match_main( text, text1.substr( 0, Match_MaxBits ), expected_loc ); - if ( start_loc != -1 ) - { - end_loc = match_main( text, text1.substr( text1.length() - Match_MaxBits ), expected_loc + text1.length() - Match_MaxBits ); - if ( end_loc == -1 || start_loc >= end_loc ) - { - // Can't find valid trailing context. Drop this patch. - start_loc = -1; - } + } + } else { + start_loc = match_main(text, text1, expected_loc); + } + if (start_loc == -1) { + // No match found. :( + results[x] = false; + // Subtract the delta for this failed patch from subsequent patches. + delta -= aPatch.length2 - aPatch.length1; + } else { + // Found a match. :) + results[x] = true; + delta = start_loc - expected_loc; + std::wstring text2; + if (end_loc == -1) { + text2 = safeMid(text, start_loc, text1.length()); + } else { + text2 = safeMid(text, start_loc, end_loc + Match_MaxBits - start_loc); + } + if (text1 == text2) { + // Perfect match, just shove the replacement text in. + text = text.substr(0, start_loc) + diff_text2(aPatch.diffs) + + safeMid(text, start_loc + text1.length()); + } else { + // Imperfect match. Run a diff to get a framework of equivalent + // indices. + TDiffVector diffs = diff_main(text1, text2, false); + if (text1.length() > Match_MaxBits && + diff_levenshtein(diffs) / static_cast(text1.length()) > + Patch_DeleteThreshold) { + // The end points match, but the content is unacceptably bad. + results[x] = false; + } else { + diff_cleanupSemanticLossless(diffs); + std::size_t index1 = 0; + for (auto &&aDiff : aPatch.diffs) { + if (aDiff.operation != EQUAL) { + auto index2 = diff_xIndex(diffs, index1); + if (aDiff.operation == INSERT) { + // Insertion + text = text.substr(0, start_loc + index2) + aDiff.text + + safeMid(text, start_loc + index2); + } else if (aDiff.operation == DELETE) { + // Deletion + text = + text.substr(0, start_loc + index2) + + safeMid(text, start_loc + + diff_xIndex( + diffs, index1 + aDiff.text.length())); + } } - } - else - { - start_loc = match_main( text, text1, expected_loc ); - } - if ( start_loc == -1 ) - { - // No match found. :( - results[ x ] = false; - // Subtract the delta for this failed patch from subsequent patches. - delta -= aPatch.length2 - aPatch.length1; - } - else - { - // Found a match. :) - results[ x ] = true; - delta = start_loc - expected_loc; - std::wstring text2; - if ( end_loc == -1 ) - { - text2 = safeMid( text, start_loc, text1.length() ); - } - else - { - text2 = safeMid( text, start_loc, end_loc + Match_MaxBits - start_loc ); - } - if ( text1 == text2 ) - { - // Perfect match, just shove the replacement text in. - text = text.substr( 0, start_loc ) + diff_text2( aPatch.diffs ) + safeMid( text, start_loc + text1.length() ); - } - else - { - // Imperfect match. Run a diff to get a framework of equivalent - // indices. - TDiffVector diffs = diff_main( text1, text2, false ); - if ( text1.length() > Match_MaxBits && diff_levenshtein( diffs ) / static_cast< float >( text1.length() ) > Patch_DeleteThreshold ) - { - // The end points match, but the content is unacceptably bad. - results[ x ] = false; - } - else - { - diff_cleanupSemanticLossless( diffs ); - std::size_t index1 = 0; - for ( auto &&aDiff : aPatch.diffs ) - { - if ( aDiff.operation != EQUAL ) - { - auto index2 = diff_xIndex( diffs, index1 ); - if ( aDiff.operation == INSERT ) - { - // Insertion - text = text.substr( 0, start_loc + index2 ) + aDiff.text + safeMid( text, start_loc + index2 ); - } - else if ( aDiff.operation == DELETE ) - { - // Deletion - text = text.substr( 0, start_loc + index2 ) + safeMid( text, start_loc + diff_xIndex( diffs, index1 + aDiff.text.length() ) ); - } - } - if ( aDiff.operation != DELETE ) - { - index1 += aDiff.text.length(); - } - } - } + if (aDiff.operation != DELETE) { + index1 += aDiff.text.length(); } + } } - x++; - } - // Strip the padding off. - text = safeMid( text, nullPadding.length(), text.length() - 2 * nullPadding.length() ); - return { text, results }; -} - -std::pair< std::wstring, std::vector< bool > > diff_match_patch::patch_apply( TPatchVector patches, std::string text ) -{ - return patch_apply( patches, NUtils::to_wstring( text ) ); -} - -std::wstring diff_match_patch::patch_addPadding( TPatchVector &patches ) -{ - auto paddingLength = Patch_Margin; - std::wstring nullPadding; - for ( char x = 1; x <= paddingLength; x++ ) - { - nullPadding += NUtils::to_wstring( x ); - } - - // Bump all the patches forward. - for ( auto &&aPatch : patches ) - { - aPatch.start1 += paddingLength; - aPatch.start2 += paddingLength; - } - - // Add some padding on start of first diff. - //auto && patch = patches.front(); - //TDiffVector & diffs = patch.diffs; - if ( patches.front().diffs.empty() || patches.front().diffs.front().operation != EQUAL ) - { - // Add nullPadding equality. - patches.front().diffs.emplace( patches.front().diffs.begin(), EQUAL, nullPadding ); - patches.front().start1 -= paddingLength; // Should be 0. - patches.front().start2 -= paddingLength; // Should be 0. - patches.front().length1 += paddingLength; - patches.front().length2 += paddingLength; - } - else if ( paddingLength > patches.front().diffs.front().text.length() ) - { - // Grow first equality. - auto &&firstDiff = patches.front().diffs.front(); - auto extraLength = paddingLength - firstDiff.text.length(); - firstDiff.text = nullPadding.substr( firstDiff.text.length() ) + firstDiff.text; - patches.front().start1 -= extraLength; - patches.front().start2 -= extraLength; - patches.front().length1 += extraLength; - patches.front().length2 += extraLength; - } - - // Add some padding on end of last diff. - //patch = patches.back(); - //diffs = patch.diffs; - if ( ( patches.back().diffs.size() == 0 ) || patches.back().diffs.back().operation != EQUAL ) - { - // Add nullPadding equality. - patches.back().diffs.emplace_back( EQUAL, nullPadding ); - patches.back().length1 += paddingLength; - patches.back().length2 += paddingLength; - } - else if ( paddingLength > patches.back().diffs.back().text.length() ) - { - // Grow last equality. - //Diff &lastDiff = patches.back().diffs.back(); - auto extraLength = paddingLength - patches.back().diffs.back().text.length(); - patches.back().diffs.back().text += nullPadding.substr( 0, extraLength ); - patches.back().length1 += extraLength; - patches.back().length2 += extraLength; - } - - return nullPadding; -} - -void diff_match_patch::patch_splitMax( TPatchVector &patches ) -{ - auto patch_size = Match_MaxBits; - for ( int x = 0; x < patches.size(); x++ ) - { - if ( patches[ x ].length1 <= patch_size ) - { - continue; + } + } + x++; + } + // Strip the padding off. + text = safeMid(text, nullPadding.length(), + text.length() - 2 * nullPadding.length()); + return {text, results}; +} + +std::pair > diff_match_patch::patch_apply( + TPatchVector patches, std::string text) { + return patch_apply(patches, NUtils::to_wstring(text)); +} + +std::wstring diff_match_patch::patch_addPadding(TPatchVector &patches) { + auto paddingLength = Patch_Margin; + std::wstring nullPadding; + for (char x = 1; x <= paddingLength; x++) { + nullPadding += NUtils::to_wstring(x); + } + + // Bump all the patches forward. + for (auto &&aPatch : patches) { + aPatch.start1 += paddingLength; + aPatch.start2 += paddingLength; + } + + // Add some padding on start of first diff. + // auto && patch = patches.front(); + // TDiffVector & diffs = patch.diffs; + if (patches.front().diffs.empty() || + patches.front().diffs.front().operation != EQUAL) { + // Add nullPadding equality. + patches.front().diffs.emplace(patches.front().diffs.begin(), EQUAL, + nullPadding); + patches.front().start1 -= paddingLength; // Should be 0. + patches.front().start2 -= paddingLength; // Should be 0. + patches.front().length1 += paddingLength; + patches.front().length2 += paddingLength; + } else if (paddingLength > patches.front().diffs.front().text.length()) { + // Grow first equality. + auto &&firstDiff = patches.front().diffs.front(); + auto extraLength = paddingLength - firstDiff.text.length(); + firstDiff.text = + nullPadding.substr(firstDiff.text.length()) + firstDiff.text; + patches.front().start1 -= extraLength; + patches.front().start2 -= extraLength; + patches.front().length1 += extraLength; + patches.front().length2 += extraLength; + } + + // Add some padding on end of last diff. + // patch = patches.back(); + // diffs = patch.diffs; + if ((patches.back().diffs.size() == 0) || + patches.back().diffs.back().operation != EQUAL) { + // Add nullPadding equality. + patches.back().diffs.emplace_back(EQUAL, nullPadding); + patches.back().length1 += paddingLength; + patches.back().length2 += paddingLength; + } else if (paddingLength > patches.back().diffs.back().text.length()) { + // Grow last equality. + // Diff &lastDiff = patches.back().diffs.back(); + auto extraLength = + paddingLength - patches.back().diffs.back().text.length(); + patches.back().diffs.back().text += nullPadding.substr(0, extraLength); + patches.back().length1 += extraLength; + patches.back().length2 += extraLength; + } + + return nullPadding; +} + +void diff_match_patch::patch_splitMax(TPatchVector &patches) { + auto patch_size = Match_MaxBits; + for (int x = 0; x < patches.size(); x++) { + if (patches[x].length1 <= patch_size) { + continue; + } + Patch bigpatch = patches[x]; + // Remove the big old patch. + NUtils::Splice(patches, x--, 1); + auto start1 = bigpatch.start1; + auto start2 = bigpatch.start2; + std::wstring precontext; + while (!bigpatch.diffs.empty()) { + // Create one of several smaller patches. + Patch patch; + bool empty = true; + patch.start1 = start1 - precontext.length(); + patch.start2 = start2 - precontext.length(); + if (precontext.length() != 0) { + patch.length1 = patch.length2 = precontext.length(); + patch.diffs.emplace_back(EQUAL, precontext); + } + while (!bigpatch.diffs.empty() && + (patch.length1 < (patch_size - Patch_Margin))) { + auto diff_type = bigpatch.diffs[0].operation; + auto diff_text = bigpatch.diffs[0].text; + if (diff_type == INSERT) { + // Insertions are harmless. + patch.length2 += diff_text.length(); + start2 += diff_text.length(); + patch.diffs.push_back(bigpatch.diffs.front()); + bigpatch.diffs.erase(bigpatch.diffs.begin()); + empty = false; + } else if ((diff_type == DELETE) && (patch.diffs.size() == 1) && + (patch.diffs.front().operation == EQUAL) && + (diff_text.length() > 2 * patch_size)) { + // This is a large deletion. Let it pass in one chunk. + patch.length1 += diff_text.length(); + start1 += diff_text.length(); + empty = false; + patch.diffs.emplace_back(diff_type, diff_text); + bigpatch.diffs.erase(bigpatch.diffs.begin()); + } else { + // Deletion or equality. Only take as much as we can stomach. + diff_text = diff_text.substr( + 0, std::min(diff_text.length(), + (patch_size > (patch.length1 + Patch_Margin)) + ? (patch_size - patch.length1 - Patch_Margin) + : (-1 * 1ULL))); + patch.length1 += diff_text.length(); + start1 += diff_text.length(); + if (diff_type == EQUAL) { + patch.length2 += diff_text.length(); + start2 += diff_text.length(); + } else { + empty = false; + } + patch.diffs.emplace_back(diff_type, diff_text); + if (diff_text == bigpatch.diffs[0].text) { + bigpatch.diffs.erase(bigpatch.diffs.begin()); + } else { + bigpatch.diffs[0].text = + bigpatch.diffs[0].text.substr(diff_text.length()); + } } - Patch bigpatch = patches[ x ]; - // Remove the big old patch. - NUtils::Splice( patches, x--, 1 ); - auto start1 = bigpatch.start1; - auto start2 = bigpatch.start2; - std::wstring precontext; - while ( !bigpatch.diffs.empty() ) - { - // Create one of several smaller patches. - Patch patch; - bool empty = true; - patch.start1 = start1 - precontext.length(); - patch.start2 = start2 - precontext.length(); - if ( precontext.length() != 0 ) - { - patch.length1 = patch.length2 = precontext.length(); - patch.diffs.emplace_back( EQUAL, precontext ); - } - while ( !bigpatch.diffs.empty() && ( patch.length1 < ( patch_size - Patch_Margin ) ) ) - { - auto diff_type = bigpatch.diffs[ 0 ].operation; - auto diff_text = bigpatch.diffs[ 0 ].text; - if ( diff_type == INSERT ) - { - // Insertions are harmless. - patch.length2 += diff_text.length(); - start2 += diff_text.length(); - patch.diffs.push_back( bigpatch.diffs.front() ); - bigpatch.diffs.erase( bigpatch.diffs.begin() ); - empty = false; - } - else if ( ( diff_type == DELETE ) && ( patch.diffs.size() == 1 ) && ( patch.diffs.front().operation == EQUAL ) && ( diff_text.length() > 2 * patch_size ) ) - { - // This is a large deletion. Let it pass in one chunk. - patch.length1 += diff_text.length(); - start1 += diff_text.length(); - empty = false; - patch.diffs.emplace_back( diff_type, diff_text ); - bigpatch.diffs.erase( bigpatch.diffs.begin() ); - } - else - { - // Deletion or equality. Only take as much as we can stomach. - diff_text = diff_text.substr( 0, std::min( diff_text.length(), ( patch_size > ( patch.length1 + Patch_Margin ) ) ? ( patch_size - patch.length1 - Patch_Margin ) : ( -1 * 1ULL ) ) ); - patch.length1 += diff_text.length(); - start1 += diff_text.length(); - if ( diff_type == EQUAL ) - { - patch.length2 += diff_text.length(); - start2 += diff_text.length(); - } - else - { - empty = false; - } - patch.diffs.emplace_back( diff_type, diff_text ); - if ( diff_text == bigpatch.diffs[ 0 ].text ) - { - bigpatch.diffs.erase( bigpatch.diffs.begin() ); - } - else - { - bigpatch.diffs[ 0 ].text = bigpatch.diffs[ 0 ].text.substr( diff_text.length() ); - } - } - } - // Compute the head context for the next patch. - precontext = diff_text2( patch.diffs ); - precontext = precontext.substr( std::max( 0ULL, ( precontext.length() > Patch_Margin ) ? ( precontext.length() - Patch_Margin ) : 0 ) ); - - std::wstring postcontext; - // Append the end context for this patch. - if ( diff_text1( bigpatch.diffs ).length() > Patch_Margin ) - { - postcontext = diff_text1( bigpatch.diffs ).substr( 0, Patch_Margin ); - } - else - { - postcontext = diff_text1( bigpatch.diffs ); - } - - if ( postcontext.length() != 0 ) - { - patch.length1 += postcontext.length(); - patch.length2 += postcontext.length(); - if ( ( patch.diffs.size() != 0 ) && ( patch.diffs[ patch.diffs.size() - 1 ].operation == EQUAL ) ) - { - patch.diffs[ patch.diffs.size() - 1 ].text += postcontext; - } - else - { - patch.diffs.emplace_back( EQUAL, postcontext ); - } - } - if ( !empty ) - { - NUtils::Splice( patches, ++x, 0ULL, patch ); - } + } + // Compute the head context for the next patch. + precontext = diff_text2(patch.diffs); + precontext = precontext.substr( + std::max(0ULL, (precontext.length() > Patch_Margin) + ? (precontext.length() - Patch_Margin) + : 0)); + + std::wstring postcontext; + // Append the end context for this patch. + if (diff_text1(bigpatch.diffs).length() > Patch_Margin) { + postcontext = diff_text1(bigpatch.diffs).substr(0, Patch_Margin); + } else { + postcontext = diff_text1(bigpatch.diffs); + } + + if (postcontext.length() != 0) { + patch.length1 += postcontext.length(); + patch.length2 += postcontext.length(); + if ((patch.diffs.size() != 0) && + (patch.diffs[patch.diffs.size() - 1].operation == EQUAL)) { + patch.diffs[patch.diffs.size() - 1].text += postcontext; + } else { + patch.diffs.emplace_back(EQUAL, postcontext); } + } + if (!empty) { + NUtils::Splice(patches, ++x, 0ULL, patch); + } } + } } -std::wstring diff_match_patch::patch_toText( const TPatchVector &patches ) -{ - std::wstring text; - for ( auto &&aPatch : patches ) - { - text += aPatch.toString(); - } - return text; +std::wstring diff_match_patch::patch_toText(const TPatchVector &patches) { + std::wstring text; + for (auto &&aPatch : patches) { + text += aPatch.toString(); + } + return text; } -TPatchVector diff_match_patch::patch_fromText( const std::wstring &textline ) -{ - TPatchVector patches; - if ( textline.empty() ) - { - return patches; - } - auto text = NUtils::splitString( textline, L"\n", true ); - int textPointer = 0; - std::wstring line; - while ( textPointer < text.size() ) - { - patches.push_back( text[ textPointer ] ); - auto &patch = patches.back(); - textPointer++; - - while ( textPointer < text.size() ) - { - if ( text[ textPointer ].empty() ) - { - ++textPointer; - continue; - } - - auto sign = text[ textPointer ][ 0 ]; - - line = text[ textPointer ].substr( 1 ); - NUtils::replace( line, L"+", L"%2b" ); - line = NUtils::fromPercentEncoding( line ); - if ( sign == '-' ) - { - // Deletion. - patch.diffs.emplace_back( DELETE, line ); - } - else if ( sign == '+' ) - { - // Insertion. - patch.diffs.emplace_back( INSERT, line ); - } - else if ( sign == ' ' ) - { - // Minor equality. - patch.diffs.emplace_back( EQUAL, line ); - } - else if ( sign == '@' ) - { - // Start of next patch. - break; - } - else - { - // WTF? - throw std::wstring( std::wstring( L"Invalid patch mode '" ) + sign + std::wstring( L" in: " ) + line ); - return {}; - } - textPointer++; - } - } +TPatchVector diff_match_patch::patch_fromText(const std::wstring &textline) { + TPatchVector patches; + if (textline.empty()) { return patches; + } + auto text = NUtils::splitString(textline, L"\n", true); + int textPointer = 0; + std::wstring line; + while (textPointer < text.size()) { + patches.push_back(text[textPointer]); + auto &patch = patches.back(); + textPointer++; + + while (textPointer < text.size()) { + if (text[textPointer].empty()) { + ++textPointer; + continue; + } + + auto sign = text[textPointer][0]; + + line = text[textPointer].substr(1); + NUtils::replace(line, L"+", L"%2b"); + line = NUtils::fromPercentEncoding(line); + if (sign == '-') { + // Deletion. + patch.diffs.emplace_back(DELETE, line); + } else if (sign == '+') { + // Insertion. + patch.diffs.emplace_back(INSERT, line); + } else if (sign == ' ') { + // Minor equality. + patch.diffs.emplace_back(EQUAL, line); + } else if (sign == '@') { + // Start of next patch. + break; + } else { + // WTF? + throw std::wstring(std::wstring(L"Invalid patch mode '") + sign + + std::wstring(L" in: ") + line); + return {}; + } + textPointer++; + } + } + return patches; } -TPatchVector diff_match_patch::patch_fromText( const std::string &textline ) -{ - return patch_fromText( NUtils::to_wstring( textline ) ); +TPatchVector diff_match_patch::patch_fromText(const std::string &textline) { + return patch_fromText(NUtils::to_wstring(textline)); } -std::wstring diff_match_patch::safeMid( const std::wstring &str, std::size_t pos ) -{ - return safeMid( str, pos, std::string::npos ); +std::wstring diff_match_patch::safeMid(const std::wstring &str, + std::size_t pos) { + return safeMid(str, pos, std::string::npos); } -std::wstring diff_match_patch::safeMid( const std::wstring &str, std::size_t pos, std::size_t len ) -{ - return ( pos == str.length() ) ? std::wstring() : str.substr( pos, len ); +std::wstring diff_match_patch::safeMid(const std::wstring &str, std::size_t pos, + std::size_t len) { + return (pos == str.length()) ? std::wstring() : str.substr(pos, len); } -std::wstring NUtils::to_wstring( const diff_match_patch::TVariant &variant, bool doubleQuoteEmpty ) -{ - std::wstring retVal; - if ( std::holds_alternative< std::wstring >( variant ) ) - retVal = std::get< std::wstring >( variant ); +std::wstring NUtils::to_wstring(const diff_match_patch::TVariant &variant, + bool doubleQuoteEmpty) { + std::wstring retVal; + if (std::holds_alternative(variant)) + retVal = std::get(variant); - if ( doubleQuoteEmpty && retVal.empty() ) - return LR"("")"; + if (doubleQuoteEmpty && retVal.empty()) return LR"("")"; - return retVal; + return retVal; } -std::wstring NUtils::to_wstring( const Patch &patch, bool doubleQuoteEmpty ) -{ - auto retVal = patch.toString(); - if ( doubleQuoteEmpty && retVal.empty() ) - return LR"("")"; - return retVal; +std::wstring NUtils::to_wstring(const Patch &patch, bool doubleQuoteEmpty) { + auto retVal = patch.toString(); + if (doubleQuoteEmpty && retVal.empty()) return LR"("")"; + return retVal; } -std::wstring NUtils::to_wstring( const Diff &diff, bool doubleQuoteEmpty ) -{ - auto retVal = diff.toString(); - if ( doubleQuoteEmpty && retVal.empty() ) - return LR"("")"; - return retVal; +std::wstring NUtils::to_wstring(const Diff &diff, bool doubleQuoteEmpty) { + auto retVal = diff.toString(); + if (doubleQuoteEmpty && retVal.empty()) return LR"("")"; + return retVal; } diff --git a/cpp17/diff_match_patch.h b/cpp17/diff_match_patch.h index 08f4582b..47361095 100644 --- a/cpp17/diff_match_patch.h +++ b/cpp17/diff_match_patch.h @@ -20,15 +20,15 @@ #ifndef DIFF_MATCH_PATCH_H #define DIFF_MATCH_PATCH_H -#include -#include -#include -#include +#include #include +#include +#include #include -#include +#include +#include #ifdef USE_GTEST - #include "gtest/gtest.h" +#include "gtest/gtest.h" #endif /* @@ -39,7 +39,8 @@ * @author fraser@google.com (Neil Fraser) * * Qt/C++ port by mikeslemmer@gmail.com (Mike Slemmer): - * Qt->C++17 with native STL use only, port by scott@towel42.com (Scott Aron Bloom) + * Qt->C++17 with native STL use only, port by scott@towel42.com (Scott Aron + Bloom) * C++17 was intentionally chosen for variant support * * Code known to compile with C++17 @@ -65,132 +66,124 @@ */ /**- -* The data structure representing a diff is a Linked list of Diff objects: -* {Diff(Operation.DELETE, "Hello"), Diff(Operation.INSERT, "Goodbye"), -* Diff(Operation.EQUAL, " world.")} -* which means: delete "Hello", add "Goodbye" and keep " world." -*/ -enum Operation -{ - DELETE, - INSERT, - EQUAL -}; + * The data structure representing a diff is a Linked list of Diff objects: + * {Diff(Operation.DELETE, "Hello"), Diff(Operation.INSERT, "Goodbye"), + * Diff(Operation.EQUAL, " world.")} + * which means: delete "Hello", add "Goodbye" and keep " world." + */ +enum Operation { DELETE, INSERT, EQUAL }; /** -* Class representing one diff operation. -*/ -class Diff -{ -public: - Operation operation{ DELETE }; - // One of: INSERT, DELETE or EQUAL. - std::wstring text; - // The text associated with this diff operation. - - /** + * Class representing one diff operation. + */ +class Diff { + public: + Operation operation{DELETE}; + // One of: INSERT, DELETE or EQUAL. + std::wstring text; + // The text associated with this diff operation. + + /** * Constructor. Initializes the diff with the provided values. * @param operation One of INSERT, DELETE or EQUAL. * @param text The text being applied. */ - Diff( Operation _operation, const std::wstring &_text ); - Diff( Operation _operation, const wchar_t *_text ); - Diff( Operation _operation, const std::string &_text ); - Diff( Operation _operation, const char *_text ); - Diff(); - inline bool isNull() const; - std::wstring toString() const; - bool operator==( const Diff &d ) const; - bool operator!=( const Diff &d ) const; - - static std::wstring strOperation( Operation op ); + Diff(Operation _operation, const std::wstring &_text); + Diff(Operation _operation, const wchar_t *_text); + Diff(Operation _operation, const std::string &_text); + Diff(Operation _operation, const char *_text); + Diff(); + inline bool isNull() const; + std::wstring toString() const; + bool operator==(const Diff &d) const; + bool operator!=(const Diff &d) const; + + static std::wstring strOperation(Operation op); }; -using TDiffVector = std::vector< Diff >; +using TDiffVector = std::vector; /** -* Class representing one patch operation. -*/ -class Patch -{ -public: - TDiffVector diffs; - std::size_t length1{ 0 }; - std::size_t length2{ 0 }; - std::size_t start1{ 0 }; - std::size_t start2{ 0 }; - - /** + * Class representing one patch operation. + */ +class Patch { + public: + TDiffVector diffs; + std::size_t length1{0}; + std::size_t length2{0}; + std::size_t start1{0}; + std::size_t start2{0}; + + /** * Constructor. Initializes with an empty list of diffs. */ - Patch(); - Patch( std::wstring &text ); // modifies text, and removes the text used - bool isNull() const; - std::wstring toString() const; + Patch(); + Patch(std::wstring &text); // modifies text, and removes the text used + bool isNull() const; + std::wstring toString() const; -private: - std::wstring getPatchHeader() const; - std::wstring getCoordinateString( std::size_t start, std::size_t length ) const; + private: + std::wstring getPatchHeader() const; + std::wstring getCoordinateString(std::size_t start, std::size_t length) const; }; -using TPatchVector = std::vector< Patch >; +using TPatchVector = std::vector; /** * Class containing the diff, match and patch methods. * Also contains the behaviour settings. */ -class diff_match_patch -{ - friend class diff_match_patch_test; +class diff_match_patch { + friend class diff_match_patch_test; #ifdef USE_GTEST - FRIEND_TEST( diff_match_patch_test, testDiffCommonOverlap ); - FRIEND_TEST( diff_match_patch_test, testDiffHalfmatch ); - FRIEND_TEST( diff_match_patch_test, testDiffLinesToChars ); - FRIEND_TEST( diff_match_patch_test, testDiffCharsToLines ); - FRIEND_TEST( diff_match_patch_test, testDiffBisect ); - FRIEND_TEST( diff_match_patch_test, testMatchAlphabet ); - FRIEND_TEST( diff_match_patch_test, testMatchBitap ); - FRIEND_TEST( diff_match_patch_test, testPatchAddContext ); + FRIEND_TEST(diff_match_patch_test, testDiffCommonOverlap); + FRIEND_TEST(diff_match_patch_test, testDiffHalfmatch); + FRIEND_TEST(diff_match_patch_test, testDiffLinesToChars); + FRIEND_TEST(diff_match_patch_test, testDiffCharsToLines); + FRIEND_TEST(diff_match_patch_test, testDiffBisect); + FRIEND_TEST(diff_match_patch_test, testMatchAlphabet); + FRIEND_TEST(diff_match_patch_test, testMatchBitap); + FRIEND_TEST(diff_match_patch_test, testPatchAddContext); #endif -public: - // Defaults. - // Set these on your diff_match_patch instance to override the defaults. - - // Number of seconds to map a diff before giving up (0 for infinity). - float Diff_Timeout{ 1.0f }; - // Cost of an empty edit operation in terms of edit characters. - short Diff_EditCost{ 4 }; - // At what point is no match declared (0.0 = perfection, 1.0 = very loose). - float Match_Threshold{ 0.5f }; - // How far to search for a match (0 = exact location, 1000+ = broad match). - // A match this many characters away from the expected location will add - // 1.0 to the score (0.0 is a perfect match). - int64_t Match_Distance{ 1000 }; - // When deleting a large block of text (over ~64 characters), how close does - // the contents have to match the expected contents. (0.0 = perfection, - // 1.0 = very loose). Note that Match_Threshold controls how closely the - // end points of a delete need to match. - float Patch_DeleteThreshold{ 0.5f }; - // Chunk size for context length. - short Patch_Margin{ 4 }; - - short Match_MaxBits{ 32 }; // unit tests are based on 32 bits - -private: - // Define some regex patterns for matching boundaries. - static std::wregex BLANKLINEEND; - static std::wregex BLANKLINESTART; - -public: - using TStringVector = std::vector< std::wstring >; - using TVariant = std::variant< std::wstring, TStringVector >; - using TVariantVector = std::vector< TVariant >; - using TCharPosMap = std::map< wchar_t, std::size_t >; - - diff_match_patch(); - - // DIFF FUNCTIONS - - /** + public: + // Defaults. + // Set these on your diff_match_patch instance to override the defaults. + + // Number of seconds to map a diff before giving up (0 for infinity). + float Diff_Timeout{1.0f}; + // Cost of an empty edit operation in terms of edit characters. + short Diff_EditCost{4}; + // At what point is no match declared (0.0 = perfection, 1.0 = very loose). + float Match_Threshold{0.5f}; + // How far to search for a match (0 = exact location, 1000+ = broad match). + // A match this many characters away from the expected location will add + // 1.0 to the score (0.0 is a perfect match). + int64_t Match_Distance{1000}; + // When deleting a large block of text (over ~64 characters), how close does + // the contents have to match the expected contents. (0.0 = perfection, + // 1.0 = very loose). Note that Match_Threshold controls how closely the + // end points of a delete need to match. + float Patch_DeleteThreshold{0.5f}; + // Chunk size for context length. + short Patch_Margin{4}; + + short Match_MaxBits{32}; // unit tests are based on 32 bits + + private: + // Define some regex patterns for matching boundaries. + static std::wregex BLANKLINEEND; + static std::wregex BLANKLINESTART; + + public: + using TStringVector = std::vector; + using TVariant = std::variant; + using TVariantVector = std::vector; + using TCharPosMap = std::map; + + diff_match_patch(); + + // DIFF FUNCTIONS + + /** * Find the differences between two texts. * Run a faster slightly less optimal diff. * This method allows the 'checklines' of diff_main() to be optional. @@ -199,10 +192,10 @@ class diff_match_patch * @param text2 New string to be diffed. * @return Linked List of Diff objects. */ - TDiffVector diff_main( const std::wstring &text1, const std::wstring &text2 ); - TDiffVector diff_main( const std::string &text1, const std::string &text2 ); + TDiffVector diff_main(const std::wstring &text1, const std::wstring &text2); + TDiffVector diff_main(const std::string &text1, const std::string &text2); - /** + /** * Find the differences between two texts. * @param text1 Old string to be diffed. * @param text2 New string to be diffed. @@ -211,10 +204,12 @@ class diff_match_patch * If true, then run a faster slightly less optimal diff. * @return Linked List of Diff objects. */ - TDiffVector diff_main( const std::wstring &text1, const std::wstring &text2, bool checklines ); - TDiffVector diff_main( const std::string &text1, const std::string &text2, bool checklines ); + TDiffVector diff_main(const std::wstring &text1, const std::wstring &text2, + bool checklines); + TDiffVector diff_main(const std::string &text1, const std::string &text2, + bool checklines); - /** + /** * Find the differences between two texts. Simplifies the problem by * stripping any common prefix or suffix off the texts before diffing. * @param text1 Old string to be diffed. @@ -226,11 +221,13 @@ class diff_match_patch * internally for recursive calls. Users should set DiffTimeout instead. * @return Linked List of Diff objects. */ -private: - TDiffVector diff_main( const std::wstring &text1, const std::wstring &text2, bool checklines, clock_t deadline ); - TDiffVector diff_main( const std::string &text1, const std::string &text2, bool checklines, clock_t deadline ); + private: + TDiffVector diff_main(const std::wstring &text1, const std::wstring &text2, + bool checklines, clock_t deadline); + TDiffVector diff_main(const std::string &text1, const std::string &text2, + bool checklines, clock_t deadline); - /** + /** * Find the differences between two texts. Assumes that the texts do not * have any common prefix or suffix. * @param text1 Old string to be diffed. @@ -241,11 +238,13 @@ class diff_match_patch * @param deadline Time when the diff should be complete by. * @return Linked List of Diff objects. */ -private: - TDiffVector diff_compute( const std::wstring &text1, const std::wstring &text2, bool checklines, clock_t deadline ); - TDiffVector diff_compute( const std::string &text1, const std::string &text2, bool checklines, clock_t deadline ); + private: + TDiffVector diff_compute(const std::wstring &text1, const std::wstring &text2, + bool checklines, clock_t deadline); + TDiffVector diff_compute(const std::string &text1, const std::string &text2, + bool checklines, clock_t deadline); - /** + /** * Do a quick line-level diff on both strings, then rediff the parts for * greater accuracy. * This speedup can produce non-minimal diffs. @@ -254,11 +253,13 @@ class diff_match_patch * @param deadline Time when the diff should be complete by. * @return Linked List of Diff objects. */ -private: - TDiffVector diff_lineMode( std::wstring text1, std::wstring text2, clock_t deadline ); - TDiffVector diff_lineMode( std::string text1, std::string text2, clock_t deadline ); + private: + TDiffVector diff_lineMode(std::wstring text1, std::wstring text2, + clock_t deadline); + TDiffVector diff_lineMode(std::string text1, std::string text2, + clock_t deadline); - /** + /** * Find the 'middle snake' of a diff, split the problem in two * and return the recursively constructed diff. * See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations. @@ -266,11 +267,13 @@ class diff_match_patch * @param text2 New string to be diffed. * @return Linked List of Diff objects. */ -protected: - TDiffVector diff_bisect( const std::wstring &text1, const std::wstring &text2, clock_t deadline ); - TDiffVector diff_bisect( const std::string &text1, const std::string &text2, clock_t deadline ); + protected: + TDiffVector diff_bisect(const std::wstring &text1, const std::wstring &text2, + clock_t deadline); + TDiffVector diff_bisect(const std::string &text1, const std::string &text2, + clock_t deadline); - /** + /** * Given the location of the 'middle snake', split the diff in two parts * and recurse. * @param text1 Old string to be diffed. @@ -280,11 +283,15 @@ class diff_match_patch * @param deadline Time at which to bail if not yet complete. * @return LinkedList of Diff objects. */ -private: - TDiffVector diff_bisectSplit( const std::wstring &text1, const std::wstring &text2, std::size_t x, std::size_t y, clock_t deadline ); - TDiffVector diff_bisectSplit( const std::string &text1, const std::string &text2, std::size_t x, std::size_t y, clock_t deadline ); + private: + TDiffVector diff_bisectSplit(const std::wstring &text1, + const std::wstring &text2, std::size_t x, + std::size_t y, clock_t deadline); + TDiffVector diff_bisectSplit(const std::string &text1, + const std::string &text2, std::size_t x, + std::size_t y, clock_t deadline); - /** + /** * Split two texts into a list of strings. Reduce the texts to a string of * hashes where each Unicode character represents one line. * @param text1 First string. @@ -293,11 +300,15 @@ class diff_match_patch * encoded text2 and the List of unique strings. The zeroth element * of the List of unique strings is intentionally blank. */ -protected: - std::vector< TVariant > diff_linesToChars( const std::wstring &text1, const std::wstring &text2 ); // return elems 0 and 1 are std::wstring, elem 2 is TStringVector - std::vector< TVariant > diff_linesToChars( const std::string &text1, const std::string &text2 ); + protected: + std::vector diff_linesToChars( + const std::wstring &text1, + const std::wstring &text2); // return elems 0 and 1 are std::wstring, + // elem 2 is TStringVector + std::vector diff_linesToChars(const std::string &text1, + const std::string &text2); - /** + /** * Split a text into a list of strings. Reduce the texts to a string of * hashes where each Unicode character represents one line. * @param text String to encode. @@ -305,50 +316,58 @@ class diff_match_patch * @param lineHash Map of strings to indices. * @return Encoded string. */ -private: - std::wstring diff_linesToCharsMunge( const std::wstring &text, TStringVector &lineArray, std::unordered_map< std::wstring, std::size_t > &lineHash ); + private: + std::wstring diff_linesToCharsMunge( + const std::wstring &text, TStringVector &lineArray, + std::unordered_map &lineHash); - /** + /** * Rehydrate the text in a diff from a string of line hashes to real lines of * text. * @param diffs LinkedList of Diff objects. * @param lineArray List of unique strings. */ -private: - void diff_charsToLines( TDiffVector &diffs, const TStringVector &lineArray ); + private: + void diff_charsToLines(TDiffVector &diffs, const TStringVector &lineArray); - /** + /** * Determine the common prefix of two strings. * @param text1 First string. * @param text2 Second string. * @return The number of characters common to the start of each string. */ -public: - std::size_t diff_commonPrefix( const std::wstring &text1, const std::wstring &text2 ); - std::size_t diff_commonPrefix( const std::string &text1, const std::string &text2 ); + public: + std::size_t diff_commonPrefix(const std::wstring &text1, + const std::wstring &text2); + std::size_t diff_commonPrefix(const std::string &text1, + const std::string &text2); - /** + /** * Determine the common suffix of two strings. * @param text1 First string. * @param text2 Second string. * @return The number of characters common to the end of each string. */ -public: - std::size_t diff_commonSuffix( const std::wstring &text1, const std::wstring &text2 ); - std::size_t diff_commonSuffix( const std::string &text1, const std::string &text2 ); + public: + std::size_t diff_commonSuffix(const std::wstring &text1, + const std::wstring &text2); + std::size_t diff_commonSuffix(const std::string &text1, + const std::string &text2); - /** + /** * Determine if the suffix of one string is the prefix of another. * @param text1 First string. * @param text2 Second string. * @return The number of characters common to the end of the first * string and the start of the second string. */ -protected: - std::size_t diff_commonOverlap( const std::wstring &text1, const std::wstring &text2 ); - std::size_t diff_commonOverlap( const std::string &text1, const std::string &text2 ); + protected: + std::size_t diff_commonOverlap(const std::wstring &text1, + const std::wstring &text2); + std::size_t diff_commonOverlap(const std::string &text1, + const std::string &text2); - /** + /** * Do the two texts share a substring which is at least half the length of * the longer text? * This speedup can produce non-minimal diffs. @@ -358,11 +377,13 @@ class diff_match_patch * suffix of text1, the prefix of text2, the suffix of text2 and the * common middle. Or null if there was no match. */ -protected: - TStringVector diff_halfMatch( const std::wstring &text1, const std::wstring &text2 ); - TStringVector diff_halfMatch( const std::string &text1, const std::string &text2 ); + protected: + TStringVector diff_halfMatch(const std::wstring &text1, + const std::wstring &text2); + TStringVector diff_halfMatch(const std::string &text1, + const std::string &text2); - /** + /** * Does a substring of shorttext exist within longtext such that the * substring is at least half the length of longtext? * @param longtext Longer string. @@ -372,27 +393,29 @@ class diff_match_patch * suffix of longtext, the prefix of shorttext, the suffix of shorttext * and the common middle. Or null if there was no match. */ -private: - TStringVector diff_halfMatchI( const std::wstring &longtext, const std::wstring &shorttext, std::size_t i ); - TStringVector diff_halfMatchI( const std::string &longtext, const std::string &shorttext, std::size_t i ); + private: + TStringVector diff_halfMatchI(const std::wstring &longtext, + const std::wstring &shorttext, std::size_t i); + TStringVector diff_halfMatchI(const std::string &longtext, + const std::string &shorttext, std::size_t i); - /** + /** * Reduce the number of edits by eliminating semantically trivial equalities. * @param diffs LinkedList of Diff objects. */ -public: - void diff_cleanupSemantic( TDiffVector &diffs ); + public: + void diff_cleanupSemantic(TDiffVector &diffs); - /** + /** * Look for single edits surrounded on both sides by equalities * which can be shifted sideways to align the edit to a word boundary. * e.g: The cat came. -> The cat came. * @param diffs LinkedList of Diff objects. */ -public: - void diff_cleanupSemanticLossless( TDiffVector &diffs ); + public: + void diff_cleanupSemanticLossless(TDiffVector &diffs); - /** + /** * Given two strings, compute a score representing whether the internal * boundary falls on logical boundaries. * Scores range from 6 (best) to 0 (worst). @@ -400,26 +423,28 @@ class diff_match_patch * @param two Second string. * @return The score. */ -private: - int64_t diff_cleanupSemanticScore( const std::wstring &one, const std::wstring &two ); - int64_t diff_cleanupSemanticScore( const std::string &one, const std::string &two ); + private: + int64_t diff_cleanupSemanticScore(const std::wstring &one, + const std::wstring &two); + int64_t diff_cleanupSemanticScore(const std::string &one, + const std::string &two); - /** + /** * Reduce the number of edits by eliminating operationally trivial equalities. * @param diffs LinkedList of Diff objects. */ -public: - void diff_cleanupEfficiency( TDiffVector &diffs ); + public: + void diff_cleanupEfficiency(TDiffVector &diffs); - /** + /** * Reorder and merge like edit sections. Merge equalities. * Any edit section can move as long as it doesn't cross an equality. * @param diffs LinkedList of Diff objects. */ -public: - void diff_cleanupMerge( TDiffVector &diffs ); + public: + void diff_cleanupMerge(TDiffVector &diffs); - /** + /** * loc is a location in text1, compute and return the equivalent location in * text2. * e.g. "The cat" vs "The big cat", 1->1, 5->8 @@ -427,43 +452,43 @@ class diff_match_patch * @param loc Location within text1. * @return Location within text2. */ -public: - std::size_t diff_xIndex( const TDiffVector &diffs, std::size_t loc ); + public: + std::size_t diff_xIndex(const TDiffVector &diffs, std::size_t loc); - /** + /** * Convert a Diff list into a pretty HTML report. * @param diffs LinkedList of Diff objects. * @return HTML representation. */ -public: - std::wstring diff_prettyHtml( const TDiffVector &diffs ); + public: + std::wstring diff_prettyHtml(const TDiffVector &diffs); - /** + /** * Compute and return the source text (all equalities and deletions). * @param diffs LinkedList of Diff objects. * @return Source text. */ -public: - std::wstring diff_text1( const TDiffVector &diffs ); + public: + std::wstring diff_text1(const TDiffVector &diffs); - /** + /** * Compute and return the destination text (all equalities and insertions). * @param diffs LinkedList of Diff objects. * @return Destination text. */ -public: - std::wstring diff_text2( const TDiffVector &diffs ); + public: + std::wstring diff_text2(const TDiffVector &diffs); - /** + /** * Compute the Levenshtein distance; the number of inserted, deleted or * substituted characters. * @param diffs LinkedList of Diff objects. * @return Number of changes. */ -public: - std::size_t diff_levenshtein( const TDiffVector &diffs ); + public: + std::size_t diff_levenshtein(const TDiffVector &diffs); - /** + /** * Crush the diff into an encoded string which describes the operations * required to transform text1 into text2. * E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'. @@ -471,10 +496,10 @@ class diff_match_patch * @param diffs Array of diff tuples. * @return Delta text. */ -public: - std::wstring diff_toDelta( const TDiffVector &diffs ); + public: + std::wstring diff_toDelta(const TDiffVector &diffs); - /** + /** * Given the original text1, and an encoded string which describes the * operations required to transform text1 into text2, compute the full diff. * @param text1 Source string for the diff. @@ -482,13 +507,15 @@ class diff_match_patch * @return Array of diff tuples or null if invalid. * @throws std::wstring If invalid input. */ -public: - TDiffVector diff_fromDelta( const std::wstring &text1, const std::wstring &delta ); - TDiffVector diff_fromDelta( const std::string &text1, const std::string &delta ); + public: + TDiffVector diff_fromDelta(const std::wstring &text1, + const std::wstring &delta); + TDiffVector diff_fromDelta(const std::string &text1, + const std::string &delta); - // MATCH FUNCTIONS + // MATCH FUNCTIONS - /** + /** * Locate the best instance of 'pattern' in 'text' near 'loc'. * Returns -1 if no match found. * @param text The text to search. @@ -496,11 +523,13 @@ class diff_match_patch * @param loc The location to search around. * @return Best match index or -1. */ -public: - std::size_t match_main( const std::wstring &text, const std::wstring &pattern, std::size_t loc ); - std::size_t match_main( const std::string &text, const std::string &pattern, std::size_t loc ); + public: + std::size_t match_main(const std::wstring &text, const std::wstring &pattern, + std::size_t loc); + std::size_t match_main(const std::string &text, const std::string &pattern, + std::size_t loc); - /** + /** * Locate the best instance of 'pattern' in 'text' near 'loc' using the * Bitap algorithm. Returns -1 if no match found. * @param text The text to search. @@ -508,11 +537,13 @@ class diff_match_patch * @param loc The location to search around. * @return Best match index or -1. */ -protected: - std::size_t match_bitap( const std::wstring &text, const std::wstring &pattern, std::size_t loc ); - std::size_t match_bitap( const std::string &text, const std::string &pattern, std::size_t loc ); + protected: + std::size_t match_bitap(const std::wstring &text, const std::wstring &pattern, + std::size_t loc); + std::size_t match_bitap(const std::string &text, const std::string &pattern, + std::size_t loc); - /** + /** * Compute and return the score for a match with e errors and x location. * @param e Number of errors in match. * @param x Location of match. @@ -520,83 +551,87 @@ class diff_match_patch * @param pattern Pattern being sought. * @return Overall score for match (0.0 = good, 1.0 = bad). */ -private: - double match_bitapScore( int64_t e, int64_t x, int64_t loc, const std::wstring &pattern ); + private: + double match_bitapScore(int64_t e, int64_t x, int64_t loc, + const std::wstring &pattern); - /** + /** * Initialise the alphabet for the Bitap algorithm. * @param pattern The text to encode. * @return Hash of character locations. */ -protected: - TCharPosMap match_alphabet( const std::wstring &pattern ); - TCharPosMap match_alphabet( const std::string &pattern ); + protected: + TCharPosMap match_alphabet(const std::wstring &pattern); + TCharPosMap match_alphabet(const std::string &pattern); - // PATCH FUNCTIONS + // PATCH FUNCTIONS - /** + /** * Increase the context until it is unique, * but don't let the pattern expand beyond Match_MaxBits. * @param patch The patch to grow. * @param text Source text. */ -protected: - void patch_addContext( Patch &patch, const std::wstring &text ); - void patch_addContext( Patch &patch, const std::string &text ); + protected: + void patch_addContext(Patch &patch, const std::wstring &text); + void patch_addContext(Patch &patch, const std::string &text); - /** + /** * Compute a list of patches to turn text1 into text2. * A set of diffs will be computed. * @param text1 Old text. * @param text2 New text. * @return LinkedList of Patch objects. */ -public: - TPatchVector patch_make( const std::wstring &text1, const std::wstring &text2 ); - TPatchVector patch_make( const std::string &text1, const std::string &text2 ); + public: + TPatchVector patch_make(const std::wstring &text1, const std::wstring &text2); + TPatchVector patch_make(const std::string &text1, const std::string &text2); - /** + /** * Compute a list of patches to turn text1 into text2. * text1 will be derived from the provided diffs. * @param diffs Array of diff tuples for text1 to text2. * @return LinkedList of Patch objects. */ -public: - TPatchVector patch_make( const TDiffVector &diffs ); + public: + TPatchVector patch_make(const TDiffVector &diffs); - /** + /** * Compute a list of patches to turn text1 into text2. * text2 is ignored, diffs are the delta between text1 and text2. * @param text1 Old text. * @param text2 Ignored. * @param diffs Array of diff tuples for text1 to text2. * @return LinkedList of Patch objects. - * @deprecated Prefer patch_make(const std::wstring &text1, const std::list< Diff > &diffs). + * @deprecated Prefer patch_make(const std::wstring &text1, const std::list< + * Diff > &diffs). */ -public: - TPatchVector patch_make( const std::wstring &text1, const std::wstring &text2, const TDiffVector &diffs ); - TPatchVector patch_make( const std::string &text1, const std::string &text2, const TDiffVector &diffs ); + public: + TPatchVector patch_make(const std::wstring &text1, const std::wstring &text2, + const TDiffVector &diffs); + TPatchVector patch_make(const std::string &text1, const std::string &text2, + const TDiffVector &diffs); - /** + /** * Compute a list of patches to turn text1 into text2. * text2 is not provided, diffs are the delta between text1 and text2. * @param text1 Old text. * @param diffs Array of diff tuples for text1 to text2. * @return LinkedList of Patch objects. */ -public: - TPatchVector patch_make( const std::wstring &text1, const TDiffVector &diffs ); - TPatchVector patch_make( const std::string &text1, const TDiffVector &diffs ); + public: + TPatchVector patch_make(const std::wstring &text1, const TDiffVector &diffs); + TPatchVector patch_make(const std::string &text1, const TDiffVector &diffs); - /** + /** * Given an array of patches, return another array that is identical. * @param patches Array of patch objects. * @return Array of patch objects. */ -public: - TPatchVector patch_deepCopy( const TPatchVector &patches ); + public: + TPatchVector patch_deepCopy(const TPatchVector &patches); - /** + /** * Merge a set of patches onto the text. Return a patched text, as well * as an array of true/false values indicating which patches were applied. * @param patches Array of patch objects. @@ -604,73 +639,76 @@ class diff_match_patch * @return Two element Object array, containing the new text and an array of * boolean values. */ -public: - std::pair< std::wstring, std::vector< bool > > patch_apply( TPatchVector patches, std::wstring text ); - std::pair< std::wstring, std::vector< bool > > patch_apply( TPatchVector patches, std::string text ); + public: + std::pair > patch_apply(TPatchVector patches, + std::wstring text); + std::pair > patch_apply(TPatchVector patches, + std::string text); - /** + /** * Add some padding on text start and end so that edges can match something. * Intended to be called only from within patch_apply. * @param patches Array of patch objects. * @return The padding string added to each side. */ -public: - std::wstring patch_addPadding( TPatchVector &patches ); + public: + std::wstring patch_addPadding(TPatchVector &patches); - /** + /** * Look through the patches and break up any which are longer than the * maximum limit of the match algorithm. * Intended to be called only from within patch_apply. * @param patches LinkedList of Patch objects. */ -public: - void patch_splitMax( TPatchVector &patches ); + public: + void patch_splitMax(TPatchVector &patches); - /** + /** * Take a list of patches and return a textual representation. * @param patches List of Patch objects. * @return Text representation of patches. */ -public: - std::wstring patch_toText( const TPatchVector &patches ); + public: + std::wstring patch_toText(const TPatchVector &patches); - /** + /** * Parse a textual representation of patches and return a List of Patch * objects. * @param textline Text representation of patches. * @return List of Patch objects. * @throws std::wstring If invalid input. */ -public: - TPatchVector patch_fromText( const std::wstring &textline ); - TPatchVector patch_fromText( const std::string &textline ); + public: + TPatchVector patch_fromText(const std::wstring &textline); + TPatchVector patch_fromText(const std::string &textline); - /** + /** * A safer version of std::wstring.mid(pos). This one returns "" instead of * null when the postion equals the string length. * @param str String to take a substring from. * @param pos Position to start the substring from. * @return Substring. */ -private: - static std::wstring safeMid( const std::wstring &str, std::size_t pos ); + private: + static std::wstring safeMid(const std::wstring &str, std::size_t pos); - /** - * A safer version of std::wstring.mid(pos, len). This one returns "" instead of - * null when the postion equals the string length. + /** + * A safer version of std::wstring.mid(pos, len). This one returns "" instead + * of null when the postion equals the string length. * @param str String to take a substring from. * @param pos Position to start the substring from. * @param len Length of substring. * @return Substring. */ -private: - static std::wstring safeMid( const std::wstring &str, std::size_t pos, std::size_t len ); + private: + static std::wstring safeMid(const std::wstring &str, std::size_t pos, + std::size_t len); }; -namespace NUtils -{ - std::wstring to_wstring( const diff_match_patch::TVariant &variant, bool doubleQuoteEmpty = false ); - std::wstring to_wstring( const Diff &diff, bool doubleQuoteEmpty = false ); - std::wstring to_wstring( const Patch &patch, bool doubleQuoteEmpty = false ); -} -#endif // DIFF_MATCH_PATCH_H +namespace NUtils { +std::wstring to_wstring(const diff_match_patch::TVariant &variant, + bool doubleQuoteEmpty = false); +std::wstring to_wstring(const Diff &diff, bool doubleQuoteEmpty = false); +std::wstring to_wstring(const Patch &patch, bool doubleQuoteEmpty = false); +} // namespace NUtils +#endif // DIFF_MATCH_PATCH_H diff --git a/cpp17/diff_match_patch_test.cpp b/cpp17/diff_match_patch_test.cpp index 1ee02f55..2533c160 100644 --- a/cpp17/diff_match_patch_test.cpp +++ b/cpp17/diff_match_patch_test.cpp @@ -17,952 +17,1316 @@ */ #include "diff_match_patch.h" -#include "diff_match_patch_utils.h" + #include "diff_match_patch_test.h" +#include "diff_match_patch_utils.h" #ifdef USE_GTEST - #include "gtest/gtest.h" +#include "gtest/gtest.h" #endif -#include #include +#include -int main( int argc, char **argv ) -{ +int main(int argc, char **argv) { #ifdef USE_GTEST - ::testing::InitGoogleTest( &argc, argv ); - int retVal = RUN_ALL_TESTS(); + ::testing::InitGoogleTest(&argc, argv); + int retVal = RUN_ALL_TESTS(); #else - diff_match_patch_test dmp_test; - std::cerr << "Starting diff_match_patch unit tests.\n"; - int retVal = dmp_test.run_all_tests(); - std::cerr << "Done.\n"; + diff_match_patch_test dmp_test; + std::cerr << "Starting diff_match_patch unit tests.\n"; + int retVal = dmp_test.run_all_tests(); + std::cerr << "Done.\n"; #endif - return retVal; + return retVal; } -static wchar_t kZero{ 0 }; -static wchar_t kOne{ 1 }; -static wchar_t kTwo{ 2 }; +static wchar_t kZero{0}; +static wchar_t kOne{1}; +static wchar_t kTwo{2}; -diff_match_patch_test::diff_match_patch_test() -{ -} +diff_match_patch_test::diff_match_patch_test() {} #ifndef USE_GTEST -bool diff_match_patch_test::runTest( std::function< void() > test ) -{ - bool retVal = false; - try - { - test(); - numPassedTests++; - retVal = true; - } - catch ( std::string msg ) - { - std::cerr << "Test failed: " << msg << "\n"; - numFailedTests++; - retVal = false; - } - return retVal; -} - -int diff_match_patch_test::run_all_tests() -{ - auto startTime = std::chrono::high_resolution_clock::now(); - - runTest( std::bind( &diff_match_patch_test::testDiffCommonPrefix, this ) ); - runTest( std::bind( &diff_match_patch_test::testDiffCommonSuffix, this ) ); - runTest( std::bind( &diff_match_patch_test::testDiffCommonOverlap, this ) ); - runTest( std::bind( &diff_match_patch_test::testDiffHalfmatch, this ) ); - runTest( std::bind( &diff_match_patch_test::testDiffLinesToChars, this ) ); - runTest( std::bind( &diff_match_patch_test::testDiffCharsToLines, this ) ); - runTest( std::bind( &diff_match_patch_test::testDiffCleanupMerge, this ) ); - runTest( std::bind( &diff_match_patch_test::testDiffCleanupSemanticLossless, this ) ); - runTest( std::bind( &diff_match_patch_test::testDiffCleanupSemantic, this ) ); - runTest( std::bind( &diff_match_patch_test::testDiffCleanupEfficiency, this ) ); - runTest( std::bind( &diff_match_patch_test::testDiffPrettyHtml, this ) ); - runTest( std::bind( &diff_match_patch_test::testDiffText, this ) ); - runTest( std::bind( &diff_match_patch_test::testDiffDelta, this ) ); - runTest( std::bind( &diff_match_patch_test::testDiffXIndex, this ) ); - runTest( std::bind( &diff_match_patch_test::testDiffLevenshtein, this ) ); - runTest( std::bind( &diff_match_patch_test::testDiffBisect, this ) ); - runTest( std::bind( &diff_match_patch_test::testDiffMain, this ) ); - - runTest( std::bind( &diff_match_patch_test::testMatchAlphabet, this ) ); - runTest( std::bind( &diff_match_patch_test::testMatchBitap, this ) ); - runTest( std::bind( &diff_match_patch_test::testMatchMain, this ) ); - - runTest( std::bind( &diff_match_patch_test::testPatchObj, this ) ); - runTest( std::bind( &diff_match_patch_test::testPatchFromText, this ) ); - runTest( std::bind( &diff_match_patch_test::testPatchToText, this ) ); - runTest( std::bind( &diff_match_patch_test::testPatchAddContext, this ) ); - runTest( std::bind( &diff_match_patch_test::testPatchMake, this ) ); - runTest( std::bind( &diff_match_patch_test::testPatchSplitMax, this ) ); - runTest( std::bind( &diff_match_patch_test::testPatchAddPadding, this ) ); - runTest( std::bind( &diff_match_patch_test::testPatchApply, this ) ); - if ( numFailedTests == 0 ) - std::cout << numPassedTests << " Tests Passed\n" << numFailedTests << " Tests Failed\n"; - else - std::cerr << numPassedTests << " Tests Passed\n" << numFailedTests << " Tests Failed\n"; - auto endTime = std::chrono::high_resolution_clock::now(); - auto elapsed = std::chrono::duration_cast< std::chrono::milliseconds >( endTime - startTime ).count(); - std::wcout << "Total time: " << elapsed << " ms\n"; - return ( numFailedTests == 0 ) ? 0 : 1; +bool diff_match_patch_test::runTest(std::function test) { + bool retVal = false; + try { + test(); + numPassedTests++; + retVal = true; + } catch (std::string msg) { + std::cerr << "Test failed: " << msg << "\n"; + numFailedTests++; + retVal = false; + } + return retVal; +} + +int diff_match_patch_test::run_all_tests() { + auto startTime = std::chrono::high_resolution_clock::now(); + + runTest(std::bind(&diff_match_patch_test::testDiffCommonPrefix, this)); + runTest(std::bind(&diff_match_patch_test::testDiffCommonSuffix, this)); + runTest(std::bind(&diff_match_patch_test::testDiffCommonOverlap, this)); + runTest(std::bind(&diff_match_patch_test::testDiffHalfmatch, this)); + runTest(std::bind(&diff_match_patch_test::testDiffLinesToChars, this)); + runTest(std::bind(&diff_match_patch_test::testDiffCharsToLines, this)); + runTest(std::bind(&diff_match_patch_test::testDiffCleanupMerge, this)); + runTest( + std::bind(&diff_match_patch_test::testDiffCleanupSemanticLossless, this)); + runTest(std::bind(&diff_match_patch_test::testDiffCleanupSemantic, this)); + runTest(std::bind(&diff_match_patch_test::testDiffCleanupEfficiency, this)); + runTest(std::bind(&diff_match_patch_test::testDiffPrettyHtml, this)); + runTest(std::bind(&diff_match_patch_test::testDiffText, this)); + runTest(std::bind(&diff_match_patch_test::testDiffDelta, this)); + runTest(std::bind(&diff_match_patch_test::testDiffXIndex, this)); + runTest(std::bind(&diff_match_patch_test::testDiffLevenshtein, this)); + runTest(std::bind(&diff_match_patch_test::testDiffBisect, this)); + runTest(std::bind(&diff_match_patch_test::testDiffMain, this)); + + runTest(std::bind(&diff_match_patch_test::testMatchAlphabet, this)); + runTest(std::bind(&diff_match_patch_test::testMatchBitap, this)); + runTest(std::bind(&diff_match_patch_test::testMatchMain, this)); + + runTest(std::bind(&diff_match_patch_test::testPatchObj, this)); + runTest(std::bind(&diff_match_patch_test::testPatchFromText, this)); + runTest(std::bind(&diff_match_patch_test::testPatchToText, this)); + runTest(std::bind(&diff_match_patch_test::testPatchAddContext, this)); + runTest(std::bind(&diff_match_patch_test::testPatchMake, this)); + runTest(std::bind(&diff_match_patch_test::testPatchSplitMax, this)); + runTest(std::bind(&diff_match_patch_test::testPatchAddPadding, this)); + runTest(std::bind(&diff_match_patch_test::testPatchApply, this)); + if (numFailedTests == 0) + std::cout << numPassedTests << " Tests Passed\n" + << numFailedTests << " Tests Failed\n"; + else + std::cerr << numPassedTests << " Tests Passed\n" + << numFailedTests << " Tests Failed\n"; + auto endTime = std::chrono::high_resolution_clock::now(); + auto elapsed = + std::chrono::duration_cast(endTime - startTime) + .count(); + std::wcout << "Total time: " << elapsed << " ms\n"; + return (numFailedTests == 0) ? 0 : 1; } #endif // DIFF TEST FUNCTIONS -TEST_F( diff_match_patch_test, testDiffCommonPrefix ) -{ - // Detect any common prefix. - assertEquals( "diff_commonPrefix: nullptr case.", 0, dmp.diff_commonPrefix( "abc", "xyz" ) ); +TEST_F(diff_match_patch_test, testDiffCommonPrefix) { + // Detect any common prefix. + assertEquals("diff_commonPrefix: nullptr case.", 0, + dmp.diff_commonPrefix("abc", "xyz")); - assertEquals( "diff_commonPrefix: Non-nullptr case.", 4, dmp.diff_commonPrefix( "1234abcdef", "1234xyz" ) ); + assertEquals("diff_commonPrefix: Non-nullptr case.", 4, + dmp.diff_commonPrefix("1234abcdef", "1234xyz")); - assertEquals( "diff_commonPrefix: Whole case.", 4, dmp.diff_commonPrefix( "1234", "1234xyz" ) ); + assertEquals("diff_commonPrefix: Whole case.", 4, + dmp.diff_commonPrefix("1234", "1234xyz")); } -TEST_F( diff_match_patch_test, testDiffCommonSuffix ) -{ - // Detect any common suffix. - assertEquals( "diff_commonSuffix: nullptr case.", 0, dmp.diff_commonSuffix( "abc", "xyz" ) ); +TEST_F(diff_match_patch_test, testDiffCommonSuffix) { + // Detect any common suffix. + assertEquals("diff_commonSuffix: nullptr case.", 0, + dmp.diff_commonSuffix("abc", "xyz")); - assertEquals( "diff_commonSuffix: Non-nullptr case.", 4, dmp.diff_commonSuffix( "abcdef1234", "xyz1234" ) ); + assertEquals("diff_commonSuffix: Non-nullptr case.", 4, + dmp.diff_commonSuffix("abcdef1234", "xyz1234")); - assertEquals( "diff_commonSuffix: Whole case.", 4, dmp.diff_commonSuffix( "1234", "xyz1234" ) ); + assertEquals("diff_commonSuffix: Whole case.", 4, + dmp.diff_commonSuffix("1234", "xyz1234")); } -TEST_F( diff_match_patch_test, testDiffCommonOverlap ) -{ - // Detect any suffix/prefix overlap. - assertEquals( "diff_commonOverlap: nullptr case.", 0, dmp.diff_commonOverlap( "", "abcd" ) ); +TEST_F(diff_match_patch_test, testDiffCommonOverlap) { + // Detect any suffix/prefix overlap. + assertEquals("diff_commonOverlap: nullptr case.", 0, + dmp.diff_commonOverlap("", "abcd")); - assertEquals( "diff_commonOverlap: Whole case.", 3, dmp.diff_commonOverlap( "abc", "abcd" ) ); + assertEquals("diff_commonOverlap: Whole case.", 3, + dmp.diff_commonOverlap("abc", "abcd")); - assertEquals( "diff_commonOverlap: No overlap.", 0, dmp.diff_commonOverlap( "123456", "abcd" ) ); + assertEquals("diff_commonOverlap: No overlap.", 0, + dmp.diff_commonOverlap("123456", "abcd")); - assertEquals( "diff_commonOverlap: Overlap.", 3, dmp.diff_commonOverlap( "123456xxx", "xxxabcd" ) ); + assertEquals("diff_commonOverlap: Overlap.", 3, + dmp.diff_commonOverlap("123456xxx", "xxxabcd")); - // Some overly clever languages (C#) may treat ligatures as equal to their - // component letters. E.g. U+FB01 == 'fi' - assertEquals( "diff_commonOverlap: Unicode.", 0, dmp.diff_commonOverlap( L"fi", std::wstring( L"\ufb01i" ) ) ); + // Some overly clever languages (C#) may treat ligatures as equal to their + // component letters. E.g. U+FB01 == 'fi' + assertEquals("diff_commonOverlap: Unicode.", 0, + dmp.diff_commonOverlap(L"fi", std::wstring(L"\ufb01i"))); } -TEST_F( diff_match_patch_test, testDiffHalfmatch ) -{ - // Detect a halfmatch. - dmp.Diff_Timeout = 1; - assertEmpty( "diff_halfMatch: No match #1.", dmp.diff_halfMatch( "1234567890", "abcdef" ) ); - - assertEmpty( "diff_halfMatch: No match #2.", dmp.diff_halfMatch( "12345", "23" ) ); - - assertEquals( "diff_halfMatch: Single Match #1.", TStringVector( { L"12", L"90", L"a", L"z", L"345678" } ), dmp.diff_halfMatch( "1234567890", "a345678z" ) ); - - assertEquals( "diff_halfMatch: Single Match #2.", TStringVector( { L"a", L"z", L"12", L"90", L"345678" } ), dmp.diff_halfMatch( "a345678z", "1234567890" ) ); - - assertEquals( "diff_halfMatch: Single Match #3.", TStringVector( { L"abc", L"z", L"1234", L"0", L"56789" } ), dmp.diff_halfMatch( "abc56789z", "1234567890" ) ); - - assertEquals( "diff_halfMatch: Single Match #4.", TStringVector( { L"a", L"xyz", L"1", L"7890", L"23456" } ), dmp.diff_halfMatch( "a23456xyz", "1234567890" ) ); - - assertEquals( "diff_halfMatch: Multiple Matches #1.", TStringVector( { L"12123", L"123121", L"a", L"z", L"1234123451234" } ), dmp.diff_halfMatch( "121231234123451234123121", "a1234123451234z" ) ); - - assertEquals( "diff_halfMatch: Multiple Matches #2.", TStringVector( { L"", L"-=-=-=-=-=", L"x", L"", L"x-=-=-=-=-=-=-=" } ), dmp.diff_halfMatch( "x-=-=-=-=-=-=-=-=-=-=-=-=", "xx-=-=-=-=-=-=-=" ) ); - - assertEquals( "diff_halfMatch: Multiple Matches #3.", TStringVector( { L"-=-=-=-=-=", L"", L"", L"y", L"-=-=-=-=-=-=-=y" } ), dmp.diff_halfMatch( "-=-=-=-=-=-=-=-=-=-=-=-=y", "-=-=-=-=-=-=-=yy" ) ); - - // Optimal diff would be -q+x=H-i+e=lloHe+Hu=llo-Hew+y not -qHillo+x=HelloHe-w+Hulloy - assertEquals( "diff_halfMatch: Non-optimal halfmatch.", TStringVector( { L"qHillo", L"w", L"x", L"Hulloy", L"HelloHe" } ), dmp.diff_halfMatch( "qHilloHelloHew", "xHelloHeHulloy" ) ); - - dmp.Diff_Timeout = 0; - assertEmpty( "diff_halfMatch: Optimal no halfmatch.", dmp.diff_halfMatch( L"qHilloHelloHew", L"xHelloHeHulloy" ) ); -} - -TEST_F( diff_match_patch_test, testDiffLinesToChars ) -{ - // Convert lines down to characters. - TStringVector tmpVector = TStringVector( { L"", L"alpha\n", L"beta\n" } ); - TVariantVector tmpVarList; - tmpVarList.emplace_back( NUtils::to_wstring( { 1, 2, 1 } ) ); //(("\u0001\u0002\u0001")); - tmpVarList.emplace_back( NUtils::to_wstring( { 2, 1, 2 } ) ); // (("\u0002\u0001\u0002")); - tmpVarList.emplace_back( tmpVector ); - assertEquals( "diff_linesToChars:", tmpVarList, dmp.diff_linesToChars( "alpha\nbeta\nalpha\n", "beta\nalpha\nbeta\n" ) ); - - tmpVector.clear(); - tmpVarList.clear(); - tmpVector.emplace_back( L"" ); - tmpVector.emplace_back( L"alpha\r\n" ); - tmpVector.emplace_back( L"beta\r\n" ); - tmpVector.emplace_back( L"\r\n" ); - tmpVarList.emplace_back( std::wstring() ); - tmpVarList.emplace_back( NUtils::to_wstring( { 1, 2, 3, 3 } ) ); // (("\u0001\u0002\u0003\u0003")); - tmpVarList.emplace_back( tmpVector ); - assertEquals( "diff_linesToChars:", tmpVarList, dmp.diff_linesToChars( "", "alpha\r\nbeta\r\n\r\n\r\n" ) ); - - tmpVector.clear(); - tmpVarList.clear(); - tmpVector.emplace_back( L"" ); - tmpVector.emplace_back( L"a" ); - tmpVector.emplace_back( L"b" ); - tmpVarList.emplace_back( NUtils::to_wstring( 1 ) ); // (("\u0001")); - tmpVarList.emplace_back( NUtils::to_wstring( 2 ) ); // (("\u0002")); - tmpVarList.emplace_back( tmpVector ); - assertEquals( "diff_linesToChars:", tmpVarList, dmp.diff_linesToChars( "a", "b" ) ); - - // More than 256 to reveal any 8-bit limitations. - int n = 300; - tmpVector.clear(); - tmpVarList.clear(); - std::wstring lines; - std::wstring chars; - for ( int x = 1; x < n + 1; x++ ) - { - tmpVector.emplace_back( std::to_wstring( x ) + L"\n" ); - lines += std::to_wstring( x ) + L"\n"; - chars += NUtils::to_wstring( x ); - } - assertEquals( "diff_linesToChars: More than 256 (setup).", n, tmpVector.size() ); - assertEquals( "diff_linesToChars: More than 256 (setup).", n, chars.length() ); - tmpVector.emplace( tmpVector.begin(), L"" ); - tmpVarList.emplace_back( chars ); - tmpVarList.emplace_back( std::wstring() ); - tmpVarList.emplace_back( tmpVector ); - assertEquals( "diff_linesToChars: More than 256.", tmpVarList, dmp.diff_linesToChars( lines, {} ) ); -} - -TEST_F( diff_match_patch_test, testDiffCharsToLines ) -{ - // First check that Diff equality works. - assertTrue( "diff_charsToLines:", Diff( EQUAL, "a" ) == Diff( EQUAL, "a" ) ); - - assertEquals( "diff_charsToLines:", Diff( EQUAL, "a" ), Diff( EQUAL, "a" ) ); - - // Convert chars up to lines. - TDiffVector diffs; - diffs.emplace_back( EQUAL, NUtils::to_wstring( { 1, 2, 1 } ) ); // ("\u0001\u0002\u0001"); - diffs.emplace_back( INSERT, NUtils::to_wstring( { 2, 1, 2 } ) ); // ("\u0002\u0001\u0002"); - TStringVector tmpVector; - tmpVector.emplace_back( L"" ); - tmpVector.emplace_back( L"alpha\n" ); - tmpVector.emplace_back( L"beta\n" ); - dmp.diff_charsToLines( diffs, tmpVector ); - assertEquals( "diff_charsToLines:", TDiffVector( { Diff( EQUAL, "alpha\nbeta\nalpha\n" ), Diff( INSERT, "beta\nalpha\nbeta\n" ) } ), diffs ); - - // More than 256 to reveal any 8-bit limitations. - int n = 300; - tmpVector.clear(); - std::vector< TVariant > tmpVarList; - std::wstring lines; - std::wstring chars; - for ( int x = 1; x < n + 1; x++ ) - { - tmpVector.emplace_back( std::to_wstring( x ) + L"\n" ); - lines += std::to_wstring( x ) + L"\n"; - chars += NUtils::to_wstring( x ); - } - assertEquals( "diff_linesToChars: More than 256 (setup).", n, tmpVector.size() ); - assertEquals( "diff_linesToChars: More than 256 (setup).", n, chars.length() ); - tmpVector.emplace( tmpVector.begin(), L"" ); - diffs = { Diff( DELETE, chars ) }; - dmp.diff_charsToLines( diffs, tmpVector ); - assertEquals( "diff_charsToLines: More than 256.", TDiffVector( { Diff( DELETE, lines ) } ), diffs ); -} - -TEST_F( diff_match_patch_test, testDiffCleanupMerge ) -{ - // Cleanup a messy diff. - TDiffVector diffs; - dmp.diff_cleanupMerge( diffs ); - assertEquals( "diff_cleanupMerge: nullptr case.", TDiffVector(), diffs ); - - diffs = { Diff( EQUAL, "a" ), Diff( DELETE, "b" ), Diff( INSERT, "c" ) }; - dmp.diff_cleanupMerge( diffs ); - assertEquals( "diff_cleanupMerge: No change case.", TDiffVector( { Diff( EQUAL, "a" ), Diff( DELETE, "b" ), Diff( INSERT, "c" ) } ), diffs ); - - diffs = { Diff( EQUAL, "a" ), Diff( EQUAL, "b" ), Diff( EQUAL, "c" ) }; - dmp.diff_cleanupMerge( diffs ); - assertEquals( "diff_cleanupMerge: Merge equalities.", TDiffVector( { Diff( EQUAL, "abc" ) } ), diffs ); - - diffs = { Diff( DELETE, "a" ), Diff( DELETE, "b" ), Diff( DELETE, "c" ) }; - dmp.diff_cleanupMerge( diffs ); - assertEquals( "diff_cleanupMerge: Merge deletions.", TDiffVector( { Diff( DELETE, "abc" ) } ), diffs ); - - diffs = { Diff( INSERT, "a" ), Diff( INSERT, "b" ), Diff( INSERT, "c" ) }; - dmp.diff_cleanupMerge( diffs ); - assertEquals( "diff_cleanupMerge: Merge insertions.", TDiffVector( { Diff( INSERT, "abc" ) } ), diffs ); - - diffs = { Diff( DELETE, "a" ), Diff( INSERT, "b" ), Diff( DELETE, "c" ), Diff( INSERT, "d" ), Diff( EQUAL, "e" ), Diff( EQUAL, "f" ) }; - dmp.diff_cleanupMerge( diffs ); - assertEquals( "diff_cleanupMerge: Merge interweave.", TDiffVector( { Diff( DELETE, "ac" ), Diff( INSERT, "bd" ), Diff( EQUAL, "ef" ) } ), diffs ); - - diffs = { Diff( DELETE, "a" ), Diff( INSERT, "abc" ), Diff( DELETE, "dc" ) }; - dmp.diff_cleanupMerge( diffs ); - assertEquals( "diff_cleanupMerge: Prefix and suffix detection.", TDiffVector( { Diff( EQUAL, "a" ), Diff( DELETE, "d" ), Diff( INSERT, "b" ), Diff( EQUAL, "c" ) } ), diffs ); - - diffs = { Diff( EQUAL, "x" ), Diff( DELETE, "a" ), Diff( INSERT, "abc" ), Diff( DELETE, "dc" ), Diff( EQUAL, "y" ) }; - dmp.diff_cleanupMerge( diffs ); - assertEquals( "diff_cleanupMerge: Prefix and suffix detection with equalities.", TDiffVector( { Diff( EQUAL, "xa" ), Diff( DELETE, "d" ), Diff( INSERT, "b" ), Diff( EQUAL, "cy" ) } ), diffs ); - - diffs = { Diff( EQUAL, "a" ), Diff( INSERT, "ba" ), Diff( EQUAL, "c" ) }; - dmp.diff_cleanupMerge( diffs ); - assertEquals( "diff_cleanupMerge: Slide edit left.", TDiffVector( { Diff( INSERT, "ab" ), Diff( EQUAL, "ac" ) } ), diffs ); - - diffs = { Diff( EQUAL, "c" ), Diff( INSERT, "ab" ), Diff( EQUAL, "a" ) }; - dmp.diff_cleanupMerge( diffs ); - assertEquals( "diff_cleanupMerge: Slide edit right.", TDiffVector( { Diff( EQUAL, "ca" ), Diff( INSERT, "ba" ) } ), diffs ); - - diffs = { Diff( EQUAL, "a" ), Diff( DELETE, "b" ), Diff( EQUAL, "c" ), Diff( DELETE, "ac" ), Diff( EQUAL, "x" ) }; - dmp.diff_cleanupMerge( diffs ); - assertEquals( "diff_cleanupMerge: Slide edit left recursive.", TDiffVector( { Diff( DELETE, "abc" ), Diff( EQUAL, "acx" ) } ), diffs ); - - diffs = { Diff( EQUAL, "x" ), Diff( DELETE, "ca" ), Diff( EQUAL, "c" ), Diff( DELETE, "b" ), Diff( EQUAL, "a" ) }; - dmp.diff_cleanupMerge( diffs ); - assertEquals( "diff_cleanupMerge: Slide edit right recursive.", TDiffVector( { Diff( EQUAL, "xca" ), Diff( DELETE, "cba" ) } ), diffs ); -} - -TEST_F( diff_match_patch_test, testDiffCleanupSemanticLossless ) -{ - // Slide diffs to match logical boundaries. - auto diffs = TDiffVector(); - dmp.diff_cleanupSemanticLossless( diffs ); - assertEquals( "diff_cleanupSemantic: nullptr case.", TDiffVector(), diffs ); - - diffs = { Diff( EQUAL, "AAA\r\n\r\nBBB" ), Diff( INSERT, "\r\nDDD\r\n\r\nBBB" ), Diff( EQUAL, "\r\nEEE" ) }; - dmp.diff_cleanupSemanticLossless( diffs ); - assertEquals( "diff_cleanupSemanticLossless: Blank lines.", TDiffVector( { Diff( EQUAL, "AAA\r\n\r\n" ), Diff( INSERT, "BBB\r\nDDD\r\n\r\n" ), Diff( EQUAL, "BBB\r\nEEE" ) } ), diffs ); - - diffs = { Diff( EQUAL, "AAA\r\nBBB" ), Diff( INSERT, " DDD\r\nBBB" ), Diff( EQUAL, " EEE" ) }; - dmp.diff_cleanupSemanticLossless( diffs ); - assertEquals( "diff_cleanupSemanticLossless: Line boundaries.", TDiffVector( { Diff( EQUAL, "AAA\r\n" ), Diff( INSERT, "BBB DDD\r\n" ), Diff( EQUAL, "BBB EEE" ) } ), diffs ); - - diffs = { Diff( EQUAL, "The c" ), Diff( INSERT, "ow and the c" ), Diff( EQUAL, "at." ) }; - dmp.diff_cleanupSemanticLossless( diffs ); - assertEquals( "diff_cleanupSemantic: Word boundaries.", TDiffVector( { Diff( EQUAL, "The " ), Diff( INSERT, "cow and the " ), Diff( EQUAL, "cat." ) } ), diffs ); - - diffs = { Diff( EQUAL, "The-c" ), Diff( INSERT, "ow-and-the-c" ), Diff( EQUAL, "at." ) }; - dmp.diff_cleanupSemanticLossless( diffs ); - assertEquals( "diff_cleanupSemantic: Alphanumeric boundaries.", TDiffVector( { Diff( EQUAL, "The-" ), Diff( INSERT, "cow-and-the-" ), Diff( EQUAL, "cat." ) } ), diffs ); - - diffs = { Diff( EQUAL, "a" ), Diff( DELETE, "a" ), Diff( EQUAL, "ax" ) }; - dmp.diff_cleanupSemanticLossless( diffs ); - assertEquals( "diff_cleanupSemantic: Hitting the start.", TDiffVector( { Diff( DELETE, "a" ), Diff( EQUAL, "aax" ) } ), diffs ); - - diffs = { Diff( EQUAL, "xa" ), Diff( DELETE, "a" ), Diff( EQUAL, "a" ) }; - dmp.diff_cleanupSemanticLossless( diffs ); - assertEquals( "diff_cleanupSemantic: Hitting the end.", TDiffVector( { Diff( EQUAL, "xaa" ), Diff( DELETE, "a" ) } ), diffs ); - - diffs = { Diff( EQUAL, "The xxx. The " ), Diff( INSERT, "zzz. The " ), Diff( EQUAL, "yyy." ) }; - dmp.diff_cleanupSemanticLossless( diffs ); - assertEquals( "diff_cleanupSemantic: Sentence boundaries.", TDiffVector( { Diff( EQUAL, "The xxx." ), Diff( INSERT, " The zzz." ), Diff( EQUAL, " The yyy." ) } ), diffs ); -} - -TEST_F( diff_match_patch_test, testDiffCleanupSemantic ) -{ - // Cleanup semantically trivial equalities. - auto diffs = TDiffVector(); - dmp.diff_cleanupSemantic( diffs ); - assertEquals( "diff_cleanupSemantic: nullptr case.", TDiffVector(), diffs ); - - diffs = { Diff( DELETE, "ab" ), Diff( INSERT, "cd" ), Diff( EQUAL, "12" ), Diff( DELETE, "e" ) }; - dmp.diff_cleanupSemantic( diffs ); - assertEquals( "diff_cleanupSemantic: No elimination #1.", TDiffVector( { Diff( DELETE, "ab" ), Diff( INSERT, "cd" ), Diff( EQUAL, "12" ), Diff( DELETE, "e" ) } ), diffs ); - - diffs = { Diff( DELETE, "abc" ), Diff( INSERT, "ABC" ), Diff( EQUAL, "1234" ), Diff( DELETE, "wxyz" ) }; - dmp.diff_cleanupSemantic( diffs ); - assertEquals( "diff_cleanupSemantic: No elimination #2.", TDiffVector( { Diff( DELETE, "abc" ), Diff( INSERT, "ABC" ), Diff( EQUAL, "1234" ), Diff( DELETE, "wxyz" ) } ), diffs ); - - diffs = { Diff( DELETE, "a" ), Diff( EQUAL, "b" ), Diff( DELETE, "c" ) }; - dmp.diff_cleanupSemantic( diffs ); - assertEquals( "diff_cleanupSemantic: Simple elimination.", TDiffVector( { Diff( DELETE, "abc" ), Diff( INSERT, "b" ) } ), diffs ); - - diffs = { Diff( DELETE, "ab" ), Diff( EQUAL, "cd" ), Diff( DELETE, "e" ), Diff( EQUAL, "f" ), Diff( INSERT, "g" ) }; - dmp.diff_cleanupSemantic( diffs ); - assertEquals( "diff_cleanupSemantic: Backpass elimination.", TDiffVector( { Diff( DELETE, "abcdef" ), Diff( INSERT, "cdfg" ) } ), diffs ); - - diffs = { Diff( INSERT, "1" ), Diff( EQUAL, "A" ), Diff( DELETE, "B" ), Diff( INSERT, "2" ), Diff( EQUAL, "_" ), Diff( INSERT, "1" ), Diff( EQUAL, "A" ), Diff( DELETE, "B" ), Diff( INSERT, "2" ) }; - dmp.diff_cleanupSemantic( diffs ); - assertEquals( "diff_cleanupSemantic: Multiple elimination.", TDiffVector( { Diff( DELETE, "AB_AB" ), Diff( INSERT, "1A2_1A2" ) } ), diffs ); - - diffs = { Diff( EQUAL, "The c" ), Diff( DELETE, "ow and the c" ), Diff( EQUAL, "at." ) }; - dmp.diff_cleanupSemantic( diffs ); - assertEquals( "diff_cleanupSemantic: Word boundaries.", TDiffVector( { Diff( EQUAL, "The " ), Diff( DELETE, "cow and the " ), Diff( EQUAL, "cat." ) } ), diffs ); - - diffs = { Diff( DELETE, "abcxx" ), Diff( INSERT, "xxdef" ) }; - dmp.diff_cleanupSemantic( diffs ); - assertEquals( "diff_cleanupSemantic: No overlap elimination.", TDiffVector( { Diff( DELETE, "abcxx" ), Diff( INSERT, "xxdef" ) } ), diffs ); - - diffs = { Diff( DELETE, "abcxxx" ), Diff( INSERT, "xxxdef" ) }; - dmp.diff_cleanupSemantic( diffs ); - assertEquals( "diff_cleanupSemantic: Overlap elimination.", TDiffVector( { Diff( DELETE, "abc" ), Diff( EQUAL, "xxx" ), Diff( INSERT, "def" ) } ), diffs ); - - diffs = { Diff( DELETE, "xxxabc" ), Diff( INSERT, "defxxx" ) }; - dmp.diff_cleanupSemantic( diffs ); - assertEquals( "diff_cleanupSemantic: Reverse overlap elimination.", TDiffVector( { Diff( INSERT, "def" ), Diff( EQUAL, "xxx" ), Diff( DELETE, "abc" ) } ), diffs ); - - diffs = { Diff( DELETE, "abcd1212" ), Diff( INSERT, "1212efghi" ), Diff( EQUAL, "----" ), Diff( DELETE, "A3" ), Diff( INSERT, "3BC" ) }; - dmp.diff_cleanupSemantic( diffs ); - assertEquals( "diff_cleanupSemantic: Two overlap eliminations.", TDiffVector( { Diff( DELETE, "abcd" ), Diff( EQUAL, "1212" ), Diff( INSERT, "efghi" ), Diff( EQUAL, "----" ), Diff( DELETE, "A" ), Diff( EQUAL, "3" ), Diff( INSERT, "BC" ) } ), diffs ); +TEST_F(diff_match_patch_test, testDiffHalfmatch) { + // Detect a halfmatch. + dmp.Diff_Timeout = 1; + assertEmpty("diff_halfMatch: No match #1.", + dmp.diff_halfMatch("1234567890", "abcdef")); + + assertEmpty("diff_halfMatch: No match #2.", + dmp.diff_halfMatch("12345", "23")); + + assertEquals("diff_halfMatch: Single Match #1.", + TStringVector({L"12", L"90", L"a", L"z", L"345678"}), + dmp.diff_halfMatch("1234567890", "a345678z")); + + assertEquals("diff_halfMatch: Single Match #2.", + TStringVector({L"a", L"z", L"12", L"90", L"345678"}), + dmp.diff_halfMatch("a345678z", "1234567890")); + + assertEquals("diff_halfMatch: Single Match #3.", + TStringVector({L"abc", L"z", L"1234", L"0", L"56789"}), + dmp.diff_halfMatch("abc56789z", "1234567890")); + + assertEquals("diff_halfMatch: Single Match #4.", + TStringVector({L"a", L"xyz", L"1", L"7890", L"23456"}), + dmp.diff_halfMatch("a23456xyz", "1234567890")); + + assertEquals( + "diff_halfMatch: Multiple Matches #1.", + TStringVector({L"12123", L"123121", L"a", L"z", L"1234123451234"}), + dmp.diff_halfMatch("121231234123451234123121", "a1234123451234z")); + + assertEquals( + "diff_halfMatch: Multiple Matches #2.", + TStringVector({L"", L"-=-=-=-=-=", L"x", L"", L"x-=-=-=-=-=-=-="}), + dmp.diff_halfMatch("x-=-=-=-=-=-=-=-=-=-=-=-=", "xx-=-=-=-=-=-=-=")); + + assertEquals( + "diff_halfMatch: Multiple Matches #3.", + TStringVector({L"-=-=-=-=-=", L"", L"", L"y", L"-=-=-=-=-=-=-=y"}), + dmp.diff_halfMatch("-=-=-=-=-=-=-=-=-=-=-=-=y", "-=-=-=-=-=-=-=yy")); + + // Optimal diff would be -q+x=H-i+e=lloHe+Hu=llo-Hew+y not + // -qHillo+x=HelloHe-w+Hulloy + assertEquals("diff_halfMatch: Non-optimal halfmatch.", + TStringVector({L"qHillo", L"w", L"x", L"Hulloy", L"HelloHe"}), + dmp.diff_halfMatch("qHilloHelloHew", "xHelloHeHulloy")); + + dmp.Diff_Timeout = 0; + assertEmpty("diff_halfMatch: Optimal no halfmatch.", + dmp.diff_halfMatch(L"qHilloHelloHew", L"xHelloHeHulloy")); } -TEST_F( diff_match_patch_test, testDiffCleanupEfficiency ) -{ - // Cleanup operationally trivial equalities. - dmp.Diff_EditCost = 4; - auto diffs = TDiffVector(); - dmp.diff_cleanupEfficiency( diffs ); - assertEquals( "diff_cleanupEfficiency: nullptr case.", TDiffVector(), diffs ); - - diffs = { Diff( DELETE, "ab" ), Diff( INSERT, "12" ), Diff( EQUAL, "wxyz" ), Diff( DELETE, "cd" ), Diff( INSERT, "34" ) }; - dmp.diff_cleanupEfficiency( diffs ); - assertEquals( "diff_cleanupEfficiency: No elimination.", TDiffVector( { Diff( DELETE, "ab" ), Diff( INSERT, "12" ), Diff( EQUAL, "wxyz" ), Diff( DELETE, "cd" ), Diff( INSERT, "34" ) } ), diffs ); - - diffs = { Diff( DELETE, "ab" ), Diff( INSERT, "12" ), Diff( EQUAL, "xyz" ), Diff( DELETE, "cd" ), Diff( INSERT, "34" ) }; - dmp.diff_cleanupEfficiency( diffs ); - assertEquals( "diff_cleanupEfficiency: Four-edit elimination.", TDiffVector( { Diff( DELETE, "abxyzcd" ), Diff( INSERT, "12xyz34" ) } ), diffs ); - - diffs = { Diff( INSERT, "12" ), Diff( EQUAL, "x" ), Diff( DELETE, "cd" ), Diff( INSERT, "34" ) }; - dmp.diff_cleanupEfficiency( diffs ); - assertEquals( "diff_cleanupEfficiency: Three-edit elimination.", TDiffVector( { Diff( DELETE, "xcd" ), Diff( INSERT, "12x34" ) } ), diffs ); - - diffs = { Diff( DELETE, "ab" ), Diff( INSERT, "12" ), Diff( EQUAL, "xy" ), Diff( INSERT, "34" ), Diff( EQUAL, "z" ), Diff( DELETE, "cd" ), Diff( INSERT, "56" ) }; - dmp.diff_cleanupEfficiency( diffs ); - assertEquals( "diff_cleanupEfficiency: Backpass elimination.", TDiffVector( { Diff( DELETE, "abxyzcd" ), Diff( INSERT, "12xy34z56" ) } ), diffs ); - - dmp.Diff_EditCost = 5; - diffs = { Diff( DELETE, "ab" ), Diff( INSERT, "12" ), Diff( EQUAL, "wxyz" ), Diff( DELETE, "cd" ), Diff( INSERT, "34" ) }; - dmp.diff_cleanupEfficiency( diffs ); - assertEquals( "diff_cleanupEfficiency: High cost elimination.", TDiffVector( { Diff( DELETE, "abwxyzcd" ), Diff( INSERT, "12wxyz34" ) } ), diffs ); - dmp.Diff_EditCost = 4; +TEST_F(diff_match_patch_test, testDiffLinesToChars) { + // Convert lines down to characters. + TStringVector tmpVector = TStringVector({L"", L"alpha\n", L"beta\n"}); + TVariantVector tmpVarList; + tmpVarList.emplace_back( + NUtils::to_wstring({1, 2, 1})); //(("\u0001\u0002\u0001")); + tmpVarList.emplace_back( + NUtils::to_wstring({2, 1, 2})); // (("\u0002\u0001\u0002")); + tmpVarList.emplace_back(tmpVector); + assertEquals( + "diff_linesToChars:", tmpVarList, + dmp.diff_linesToChars("alpha\nbeta\nalpha\n", "beta\nalpha\nbeta\n")); + + tmpVector.clear(); + tmpVarList.clear(); + tmpVector.emplace_back(L""); + tmpVector.emplace_back(L"alpha\r\n"); + tmpVector.emplace_back(L"beta\r\n"); + tmpVector.emplace_back(L"\r\n"); + tmpVarList.emplace_back(std::wstring()); + tmpVarList.emplace_back( + NUtils::to_wstring({1, 2, 3, 3})); // (("\u0001\u0002\u0003\u0003")); + tmpVarList.emplace_back(tmpVector); + assertEquals("diff_linesToChars:", tmpVarList, + dmp.diff_linesToChars("", "alpha\r\nbeta\r\n\r\n\r\n")); + + tmpVector.clear(); + tmpVarList.clear(); + tmpVector.emplace_back(L""); + tmpVector.emplace_back(L"a"); + tmpVector.emplace_back(L"b"); + tmpVarList.emplace_back(NUtils::to_wstring(1)); // (("\u0001")); + tmpVarList.emplace_back(NUtils::to_wstring(2)); // (("\u0002")); + tmpVarList.emplace_back(tmpVector); + assertEquals("diff_linesToChars:", tmpVarList, + dmp.diff_linesToChars("a", "b")); + + // More than 256 to reveal any 8-bit limitations. + int n = 300; + tmpVector.clear(); + tmpVarList.clear(); + std::wstring lines; + std::wstring chars; + for (int x = 1; x < n + 1; x++) { + tmpVector.emplace_back(std::to_wstring(x) + L"\n"); + lines += std::to_wstring(x) + L"\n"; + chars += NUtils::to_wstring(x); + } + assertEquals("diff_linesToChars: More than 256 (setup).", n, + tmpVector.size()); + assertEquals("diff_linesToChars: More than 256 (setup).", n, chars.length()); + tmpVector.emplace(tmpVector.begin(), L""); + tmpVarList.emplace_back(chars); + tmpVarList.emplace_back(std::wstring()); + tmpVarList.emplace_back(tmpVector); + assertEquals("diff_linesToChars: More than 256.", tmpVarList, + dmp.diff_linesToChars(lines, {})); } -TEST_F( diff_match_patch_test, testDiffPrettyHtml ) -{ - // Pretty print. - auto diffs = TDiffVector( { Diff( EQUAL, "a\n" ), Diff( DELETE, "b" ), Diff( INSERT, "c&d" ) } ); - assertEquals( "diff_prettyHtml:", L"
<B>b</B>c&d", dmp.diff_prettyHtml( diffs ) ); +TEST_F(diff_match_patch_test, testDiffCharsToLines) { + // First check that Diff equality works. + assertTrue("diff_charsToLines:", Diff(EQUAL, "a") == Diff(EQUAL, "a")); + + assertEquals("diff_charsToLines:", Diff(EQUAL, "a"), Diff(EQUAL, "a")); + + // Convert chars up to lines. + TDiffVector diffs; + diffs.emplace_back(EQUAL, + NUtils::to_wstring({1, 2, 1})); // ("\u0001\u0002\u0001"); + diffs.emplace_back(INSERT, + NUtils::to_wstring({2, 1, 2})); // ("\u0002\u0001\u0002"); + TStringVector tmpVector; + tmpVector.emplace_back(L""); + tmpVector.emplace_back(L"alpha\n"); + tmpVector.emplace_back(L"beta\n"); + dmp.diff_charsToLines(diffs, tmpVector); + assertEquals("diff_charsToLines:", + TDiffVector({Diff(EQUAL, "alpha\nbeta\nalpha\n"), + Diff(INSERT, "beta\nalpha\nbeta\n")}), + diffs); + + // More than 256 to reveal any 8-bit limitations. + int n = 300; + tmpVector.clear(); + std::vector tmpVarList; + std::wstring lines; + std::wstring chars; + for (int x = 1; x < n + 1; x++) { + tmpVector.emplace_back(std::to_wstring(x) + L"\n"); + lines += std::to_wstring(x) + L"\n"; + chars += NUtils::to_wstring(x); + } + assertEquals("diff_linesToChars: More than 256 (setup).", n, + tmpVector.size()); + assertEquals("diff_linesToChars: More than 256 (setup).", n, chars.length()); + tmpVector.emplace(tmpVector.begin(), L""); + diffs = {Diff(DELETE, chars)}; + dmp.diff_charsToLines(diffs, tmpVector); + assertEquals("diff_charsToLines: More than 256.", + TDiffVector({Diff(DELETE, lines)}), diffs); } -TEST_F( diff_match_patch_test, testDiffText ) -{ - // Compute the source and destination texts. - auto diffs = { Diff( EQUAL, "jump" ), Diff( DELETE, "s" ), Diff( INSERT, "ed" ), Diff( EQUAL, " over " ), Diff( DELETE, "the" ), Diff( INSERT, "a" ), Diff( EQUAL, " lazy" ) }; - assertEquals( "diff_text1:", L"jumps over the lazy", dmp.diff_text1( diffs ) ); - assertEquals( "diff_text2:", L"jumped over a lazy", dmp.diff_text2( diffs ) ); +TEST_F(diff_match_patch_test, testDiffCleanupMerge) { + // Cleanup a messy diff. + TDiffVector diffs; + dmp.diff_cleanupMerge(diffs); + assertEquals("diff_cleanupMerge: nullptr case.", TDiffVector(), diffs); + + diffs = {Diff(EQUAL, "a"), Diff(DELETE, "b"), Diff(INSERT, "c")}; + dmp.diff_cleanupMerge(diffs); + assertEquals( + "diff_cleanupMerge: No change case.", + TDiffVector({Diff(EQUAL, "a"), Diff(DELETE, "b"), Diff(INSERT, "c")}), + diffs); + + diffs = {Diff(EQUAL, "a"), Diff(EQUAL, "b"), Diff(EQUAL, "c")}; + dmp.diff_cleanupMerge(diffs); + assertEquals("diff_cleanupMerge: Merge equalities.", + TDiffVector({Diff(EQUAL, "abc")}), diffs); + + diffs = {Diff(DELETE, "a"), Diff(DELETE, "b"), Diff(DELETE, "c")}; + dmp.diff_cleanupMerge(diffs); + assertEquals("diff_cleanupMerge: Merge deletions.", + TDiffVector({Diff(DELETE, "abc")}), diffs); + + diffs = {Diff(INSERT, "a"), Diff(INSERT, "b"), Diff(INSERT, "c")}; + dmp.diff_cleanupMerge(diffs); + assertEquals("diff_cleanupMerge: Merge insertions.", + TDiffVector({Diff(INSERT, "abc")}), diffs); + + diffs = {Diff(DELETE, "a"), Diff(INSERT, "b"), Diff(DELETE, "c"), + Diff(INSERT, "d"), Diff(EQUAL, "e"), Diff(EQUAL, "f")}; + dmp.diff_cleanupMerge(diffs); + assertEquals( + "diff_cleanupMerge: Merge interweave.", + TDiffVector({Diff(DELETE, "ac"), Diff(INSERT, "bd"), Diff(EQUAL, "ef")}), + diffs); + + diffs = {Diff(DELETE, "a"), Diff(INSERT, "abc"), Diff(DELETE, "dc")}; + dmp.diff_cleanupMerge(diffs); + assertEquals("diff_cleanupMerge: Prefix and suffix detection.", + TDiffVector({Diff(EQUAL, "a"), Diff(DELETE, "d"), + Diff(INSERT, "b"), Diff(EQUAL, "c")}), + diffs); + + diffs = {Diff(EQUAL, "x"), Diff(DELETE, "a"), Diff(INSERT, "abc"), + Diff(DELETE, "dc"), Diff(EQUAL, "y")}; + dmp.diff_cleanupMerge(diffs); + assertEquals( + "diff_cleanupMerge: Prefix and suffix detection with equalities.", + TDiffVector({Diff(EQUAL, "xa"), Diff(DELETE, "d"), Diff(INSERT, "b"), + Diff(EQUAL, "cy")}), + diffs); + + diffs = {Diff(EQUAL, "a"), Diff(INSERT, "ba"), Diff(EQUAL, "c")}; + dmp.diff_cleanupMerge(diffs); + assertEquals("diff_cleanupMerge: Slide edit left.", + TDiffVector({Diff(INSERT, "ab"), Diff(EQUAL, "ac")}), diffs); + + diffs = {Diff(EQUAL, "c"), Diff(INSERT, "ab"), Diff(EQUAL, "a")}; + dmp.diff_cleanupMerge(diffs); + assertEquals("diff_cleanupMerge: Slide edit right.", + TDiffVector({Diff(EQUAL, "ca"), Diff(INSERT, "ba")}), diffs); + + diffs = {Diff(EQUAL, "a"), Diff(DELETE, "b"), Diff(EQUAL, "c"), + Diff(DELETE, "ac"), Diff(EQUAL, "x")}; + dmp.diff_cleanupMerge(diffs); + assertEquals("diff_cleanupMerge: Slide edit left recursive.", + TDiffVector({Diff(DELETE, "abc"), Diff(EQUAL, "acx")}), diffs); + + diffs = {Diff(EQUAL, "x"), Diff(DELETE, "ca"), Diff(EQUAL, "c"), + Diff(DELETE, "b"), Diff(EQUAL, "a")}; + dmp.diff_cleanupMerge(diffs); + assertEquals("diff_cleanupMerge: Slide edit right recursive.", + TDiffVector({Diff(EQUAL, "xca"), Diff(DELETE, "cba")}), diffs); } -TEST_F( diff_match_patch_test, testDiffDelta ) -{ - // Convert a diff into delta string. - auto diffs = TDiffVector( { Diff( EQUAL, "jump" ), Diff( DELETE, "s" ), Diff( INSERT, "ed" ), Diff( EQUAL, " over " ), Diff( DELETE, "the" ), Diff( INSERT, "a" ), Diff( EQUAL, " lazy" ), Diff( INSERT, "old dog" ) } ); - std::wstring text1 = dmp.diff_text1( diffs ); - assertEquals( "diff_text1: Base text.", L"jumps over the lazy", text1 ); - - std::wstring delta = dmp.diff_toDelta( diffs ); - std::wstring golden = L"=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog"; - assertEquals( "diff_toDelta:", L"=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog", delta ); - - // Convert delta string into a diff. - assertEquals( "diff_fromDelta: Normal.", diffs, dmp.diff_fromDelta( text1, delta ) ); - - // Generates error (19 < 20). - assertThrow( "diff_fromDelta: Too long.", dmp.diff_fromDelta( text1 + L"x", delta ), std::wstring ); - - // Generates error (19 > 18). - assertThrow( "diff_fromDelta: Too short.", dmp.diff_fromDelta( text1.substr( 1 ), delta ), std::wstring ); - - // Generates error (%c3%xy invalid Unicode). - assertThrow( "diff_fromDelta: Invalid character.", dmp.diff_fromDelta( "", "+%c3%xy" ), std::wstring ); - - // Test deltas with special characters. - diffs = { Diff( EQUAL, std::wstring( L"\u0680 " ) + kZero + std::wstring( L" \t %" ) ), Diff( DELETE, std::wstring( L"\u0681 " ) + kOne + std::wstring( L" \n ^" ) ), Diff( INSERT, std::wstring( L"\u0682 " ) + kTwo + std::wstring( L" \\ |" ) ) }; - - text1 = dmp.diff_text1( diffs ); - golden = std::wstring( L"\u0680 " ) + kZero + std::wstring( L" \t %\u0681 " ) + kOne + std::wstring( L" \n ^" ); - assertEquals( "diff_text1: Unicode text", golden, text1 ); - - delta = dmp.diff_toDelta( diffs ); - assertEquals( "diff_toDelta: Unicode", L"=7\t-7\t+%DA%82 %02 %5C %7C", delta ); - - assertEquals( "diff_fromDelta: Unicode", diffs, dmp.diff_fromDelta( text1, delta ) ); - - // Verify pool of unchanged characters. - diffs = { Diff( INSERT, "A-Z a-z 0-9 - _ . ! ~ * ' ( ) ; / ? : @ & = + $ , # " ) }; - std::wstring text2 = dmp.diff_text2( diffs ); - assertEquals( "diff_text2: Unchanged characters.", L"A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", text2 ); - - delta = dmp.diff_toDelta( diffs ); - assertEquals( "diff_toDelta: Unchanged characters.", L"+A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", delta ); - - // Convert delta string into a diff. - assertEquals( "diff_fromDelta: Unchanged characters.", diffs, dmp.diff_fromDelta( {}, delta ) ); +TEST_F(diff_match_patch_test, testDiffCleanupSemanticLossless) { + // Slide diffs to match logical boundaries. + auto diffs = TDiffVector(); + dmp.diff_cleanupSemanticLossless(diffs); + assertEquals("diff_cleanupSemantic: nullptr case.", TDiffVector(), diffs); + + diffs = {Diff(EQUAL, "AAA\r\n\r\nBBB"), Diff(INSERT, "\r\nDDD\r\n\r\nBBB"), + Diff(EQUAL, "\r\nEEE")}; + dmp.diff_cleanupSemanticLossless(diffs); + assertEquals("diff_cleanupSemanticLossless: Blank lines.", + TDiffVector({Diff(EQUAL, "AAA\r\n\r\n"), + Diff(INSERT, "BBB\r\nDDD\r\n\r\n"), + Diff(EQUAL, "BBB\r\nEEE")}), + diffs); + + diffs = {Diff(EQUAL, "AAA\r\nBBB"), Diff(INSERT, " DDD\r\nBBB"), + Diff(EQUAL, " EEE")}; + dmp.diff_cleanupSemanticLossless(diffs); + assertEquals("diff_cleanupSemanticLossless: Line boundaries.", + TDiffVector({Diff(EQUAL, "AAA\r\n"), Diff(INSERT, "BBB DDD\r\n"), + Diff(EQUAL, "BBB EEE")}), + diffs); + + diffs = {Diff(EQUAL, "The c"), Diff(INSERT, "ow and the c"), + Diff(EQUAL, "at.")}; + dmp.diff_cleanupSemanticLossless(diffs); + assertEquals("diff_cleanupSemantic: Word boundaries.", + TDiffVector({Diff(EQUAL, "The "), Diff(INSERT, "cow and the "), + Diff(EQUAL, "cat.")}), + diffs); + + diffs = {Diff(EQUAL, "The-c"), Diff(INSERT, "ow-and-the-c"), + Diff(EQUAL, "at.")}; + dmp.diff_cleanupSemanticLossless(diffs); + assertEquals("diff_cleanupSemantic: Alphanumeric boundaries.", + TDiffVector({Diff(EQUAL, "The-"), Diff(INSERT, "cow-and-the-"), + Diff(EQUAL, "cat.")}), + diffs); + + diffs = {Diff(EQUAL, "a"), Diff(DELETE, "a"), Diff(EQUAL, "ax")}; + dmp.diff_cleanupSemanticLossless(diffs); + assertEquals("diff_cleanupSemantic: Hitting the start.", + TDiffVector({Diff(DELETE, "a"), Diff(EQUAL, "aax")}), diffs); + + diffs = {Diff(EQUAL, "xa"), Diff(DELETE, "a"), Diff(EQUAL, "a")}; + dmp.diff_cleanupSemanticLossless(diffs); + assertEquals("diff_cleanupSemantic: Hitting the end.", + TDiffVector({Diff(EQUAL, "xaa"), Diff(DELETE, "a")}), diffs); + + diffs = {Diff(EQUAL, "The xxx. The "), Diff(INSERT, "zzz. The "), + Diff(EQUAL, "yyy.")}; + dmp.diff_cleanupSemanticLossless(diffs); + assertEquals("diff_cleanupSemantic: Sentence boundaries.", + TDiffVector({Diff(EQUAL, "The xxx."), Diff(INSERT, " The zzz."), + Diff(EQUAL, " The yyy.")}), + diffs); } -TEST_F( diff_match_patch_test, testDiffXIndex ) -{ - // Translate a location in text1 to text2. - auto diffs = TDiffVector( { Diff( DELETE, "a" ), Diff( INSERT, "1234" ), Diff( EQUAL, "xyz" ) } ); - assertEquals( "diff_xIndex: Translation on equality.", 5, dmp.diff_xIndex( diffs, 2 ) ); - - diffs = { Diff( EQUAL, "a" ), Diff( DELETE, "1234" ), Diff( EQUAL, "xyz" ) }; - assertEquals( "diff_xIndex: Translation on deletion.", 1, dmp.diff_xIndex( diffs, 3 ) ); +TEST_F(diff_match_patch_test, testDiffCleanupSemantic) { + // Cleanup semantically trivial equalities. + auto diffs = TDiffVector(); + dmp.diff_cleanupSemantic(diffs); + assertEquals("diff_cleanupSemantic: nullptr case.", TDiffVector(), diffs); + + diffs = {Diff(DELETE, "ab"), Diff(INSERT, "cd"), Diff(EQUAL, "12"), + Diff(DELETE, "e")}; + dmp.diff_cleanupSemantic(diffs); + assertEquals("diff_cleanupSemantic: No elimination #1.", + TDiffVector({Diff(DELETE, "ab"), Diff(INSERT, "cd"), + Diff(EQUAL, "12"), Diff(DELETE, "e")}), + diffs); + + diffs = {Diff(DELETE, "abc"), Diff(INSERT, "ABC"), Diff(EQUAL, "1234"), + Diff(DELETE, "wxyz")}; + dmp.diff_cleanupSemantic(diffs); + assertEquals("diff_cleanupSemantic: No elimination #2.", + TDiffVector({Diff(DELETE, "abc"), Diff(INSERT, "ABC"), + Diff(EQUAL, "1234"), Diff(DELETE, "wxyz")}), + diffs); + + diffs = {Diff(DELETE, "a"), Diff(EQUAL, "b"), Diff(DELETE, "c")}; + dmp.diff_cleanupSemantic(diffs); + assertEquals("diff_cleanupSemantic: Simple elimination.", + TDiffVector({Diff(DELETE, "abc"), Diff(INSERT, "b")}), diffs); + + diffs = {Diff(DELETE, "ab"), Diff(EQUAL, "cd"), Diff(DELETE, "e"), + Diff(EQUAL, "f"), Diff(INSERT, "g")}; + dmp.diff_cleanupSemantic(diffs); + assertEquals("diff_cleanupSemantic: Backpass elimination.", + TDiffVector({Diff(DELETE, "abcdef"), Diff(INSERT, "cdfg")}), + diffs); + + diffs = {Diff(INSERT, "1"), Diff(EQUAL, "A"), Diff(DELETE, "B"), + Diff(INSERT, "2"), Diff(EQUAL, "_"), Diff(INSERT, "1"), + Diff(EQUAL, "A"), Diff(DELETE, "B"), Diff(INSERT, "2")}; + dmp.diff_cleanupSemantic(diffs); + assertEquals("diff_cleanupSemantic: Multiple elimination.", + TDiffVector({Diff(DELETE, "AB_AB"), Diff(INSERT, "1A2_1A2")}), + diffs); + + diffs = {Diff(EQUAL, "The c"), Diff(DELETE, "ow and the c"), + Diff(EQUAL, "at.")}; + dmp.diff_cleanupSemantic(diffs); + assertEquals("diff_cleanupSemantic: Word boundaries.", + TDiffVector({Diff(EQUAL, "The "), Diff(DELETE, "cow and the "), + Diff(EQUAL, "cat.")}), + diffs); + + diffs = {Diff(DELETE, "abcxx"), Diff(INSERT, "xxdef")}; + dmp.diff_cleanupSemantic(diffs); + assertEquals("diff_cleanupSemantic: No overlap elimination.", + TDiffVector({Diff(DELETE, "abcxx"), Diff(INSERT, "xxdef")}), + diffs); + + diffs = {Diff(DELETE, "abcxxx"), Diff(INSERT, "xxxdef")}; + dmp.diff_cleanupSemantic(diffs); + assertEquals("diff_cleanupSemantic: Overlap elimination.", + TDiffVector({Diff(DELETE, "abc"), Diff(EQUAL, "xxx"), + Diff(INSERT, "def")}), + diffs); + + diffs = {Diff(DELETE, "xxxabc"), Diff(INSERT, "defxxx")}; + dmp.diff_cleanupSemantic(diffs); + assertEquals("diff_cleanupSemantic: Reverse overlap elimination.", + TDiffVector({Diff(INSERT, "def"), Diff(EQUAL, "xxx"), + Diff(DELETE, "abc")}), + diffs); + + diffs = {Diff(DELETE, "abcd1212"), Diff(INSERT, "1212efghi"), + Diff(EQUAL, "----"), Diff(DELETE, "A3"), Diff(INSERT, "3BC")}; + dmp.diff_cleanupSemantic(diffs); + assertEquals( + "diff_cleanupSemantic: Two overlap eliminations.", + TDiffVector({Diff(DELETE, "abcd"), Diff(EQUAL, "1212"), + Diff(INSERT, "efghi"), Diff(EQUAL, "----"), + Diff(DELETE, "A"), Diff(EQUAL, "3"), Diff(INSERT, "BC")}), + diffs); } -TEST_F( diff_match_patch_test, testDiffLevenshtein ) -{ - auto diffs = TDiffVector( { Diff( DELETE, "abc" ), Diff( INSERT, "1234" ), Diff( EQUAL, "xyz" ) } ); - assertEquals( "diff_levenshtein: Trailing equality.", 4, dmp.diff_levenshtein( diffs ) ); - - diffs = { Diff( EQUAL, "xyz" ), Diff( DELETE, "abc" ), Diff( INSERT, "1234" ) }; - assertEquals( "diff_levenshtein: Leading equality.", 4, dmp.diff_levenshtein( diffs ) ); - - diffs = { Diff( DELETE, "abc" ), Diff( EQUAL, "xyz" ), Diff( INSERT, "1234" ) }; - assertEquals( "diff_levenshtein: Middle equality.", 7, dmp.diff_levenshtein( diffs ) ); +TEST_F(diff_match_patch_test, testDiffCleanupEfficiency) { + // Cleanup operationally trivial equalities. + dmp.Diff_EditCost = 4; + auto diffs = TDiffVector(); + dmp.diff_cleanupEfficiency(diffs); + assertEquals("diff_cleanupEfficiency: nullptr case.", TDiffVector(), diffs); + + diffs = {Diff(DELETE, "ab"), Diff(INSERT, "12"), Diff(EQUAL, "wxyz"), + Diff(DELETE, "cd"), Diff(INSERT, "34")}; + dmp.diff_cleanupEfficiency(diffs); + assertEquals( + "diff_cleanupEfficiency: No elimination.", + TDiffVector({Diff(DELETE, "ab"), Diff(INSERT, "12"), Diff(EQUAL, "wxyz"), + Diff(DELETE, "cd"), Diff(INSERT, "34")}), + diffs); + + diffs = {Diff(DELETE, "ab"), Diff(INSERT, "12"), Diff(EQUAL, "xyz"), + Diff(DELETE, "cd"), Diff(INSERT, "34")}; + dmp.diff_cleanupEfficiency(diffs); + assertEquals("diff_cleanupEfficiency: Four-edit elimination.", + TDiffVector({Diff(DELETE, "abxyzcd"), Diff(INSERT, "12xyz34")}), + diffs); + + diffs = {Diff(INSERT, "12"), Diff(EQUAL, "x"), Diff(DELETE, "cd"), + Diff(INSERT, "34")}; + dmp.diff_cleanupEfficiency(diffs); + assertEquals("diff_cleanupEfficiency: Three-edit elimination.", + TDiffVector({Diff(DELETE, "xcd"), Diff(INSERT, "12x34")}), + diffs); + + diffs = {Diff(DELETE, "ab"), Diff(INSERT, "12"), Diff(EQUAL, "xy"), + Diff(INSERT, "34"), Diff(EQUAL, "z"), Diff(DELETE, "cd"), + Diff(INSERT, "56")}; + dmp.diff_cleanupEfficiency(diffs); + assertEquals( + "diff_cleanupEfficiency: Backpass elimination.", + TDiffVector({Diff(DELETE, "abxyzcd"), Diff(INSERT, "12xy34z56")}), diffs); + + dmp.Diff_EditCost = 5; + diffs = {Diff(DELETE, "ab"), Diff(INSERT, "12"), Diff(EQUAL, "wxyz"), + Diff(DELETE, "cd"), Diff(INSERT, "34")}; + dmp.diff_cleanupEfficiency(diffs); + assertEquals( + "diff_cleanupEfficiency: High cost elimination.", + TDiffVector({Diff(DELETE, "abwxyzcd"), Diff(INSERT, "12wxyz34")}), diffs); + dmp.Diff_EditCost = 4; } -TEST_F( diff_match_patch_test, testDiffBisect ) -{ - // Normal. - std::wstring a = L"cat"; - std::wstring b = L"map"; - // Since the resulting diff hasn't been normalized, it would be ok if - // the insertion and deletion pairs are swapped. - // If the order changes, tweak this test as required. - auto diffs = TDiffVector( { Diff( DELETE, "c" ), Diff( INSERT, "m" ), Diff( EQUAL, "a" ), Diff( DELETE, "t" ), Diff( INSERT, "p" ) } ); - auto results = dmp.diff_bisect( a, b, std::numeric_limits< clock_t >::max() ); - assertEquals( "diff_bisect: Normal.", diffs, results ); - - // Timeout. - diffs = { Diff( DELETE, "cat" ), Diff( INSERT, "map" ) }; - assertEquals( "diff_bisect: Timeout.", diffs, dmp.diff_bisect( a, b, 0 ) ); +TEST_F(diff_match_patch_test, testDiffPrettyHtml) { + // Pretty print. + auto diffs = TDiffVector( + {Diff(EQUAL, "a\n"), Diff(DELETE, "b"), Diff(INSERT, "c&d")}); + assertEquals("diff_prettyHtml:", + L"
<B>b</B>c&d", + dmp.diff_prettyHtml(diffs)); } -TEST_F( diff_match_patch_test, testDiffMain ) -{ - // Perform a trivial diff. - auto diffs = TDiffVector(); - assertEquals( "diff_main: nullptr case.", diffs, dmp.diff_main( "", "", false ) ); - - diffs = { Diff( DELETE, "abc" ) }; - assertEquals( "diff_main: RHS side nullptr case.", diffs, dmp.diff_main( "abc", "", false ) ); - - diffs = { Diff( INSERT, "abc" ) }; - assertEquals( "diff_main: LHS side nullptr case.", diffs, dmp.diff_main( "", "abc", false ) ); - - diffs = { Diff( EQUAL, "abc" ) }; - assertEquals( "diff_main: Equality.", diffs, dmp.diff_main( "abc", "abc", false ) ); - - diffs = { Diff( EQUAL, "ab" ), Diff( INSERT, "123" ), Diff( EQUAL, "c" ) }; - assertEquals( "diff_main: Simple insertion.", diffs, dmp.diff_main( "abc", "ab123c", false ) ); - - diffs = { Diff( EQUAL, "a" ), Diff( DELETE, "123" ), Diff( EQUAL, "bc" ) }; - assertEquals( "diff_main: Simple deletion.", diffs, dmp.diff_main( "a123bc", "abc", false ) ); +TEST_F(diff_match_patch_test, testDiffText) { + // Compute the source and destination texts. + auto diffs = {Diff(EQUAL, "jump"), Diff(DELETE, "s"), Diff(INSERT, "ed"), + Diff(EQUAL, " over "), Diff(DELETE, "the"), Diff(INSERT, "a"), + Diff(EQUAL, " lazy")}; + assertEquals("diff_text1:", L"jumps over the lazy", dmp.diff_text1(diffs)); + assertEquals("diff_text2:", L"jumped over a lazy", dmp.diff_text2(diffs)); +} - diffs = { Diff( EQUAL, "a" ), Diff( INSERT, "123" ), Diff( EQUAL, "b" ), Diff( INSERT, "456" ), Diff( EQUAL, "c" ) }; - assertEquals( "diff_main: Two insertions.", diffs, dmp.diff_main( "abc", "a123b456c", false ) ); +TEST_F(diff_match_patch_test, testDiffDelta) { + // Convert a diff into delta string. + auto diffs = TDiffVector({Diff(EQUAL, "jump"), Diff(DELETE, "s"), + Diff(INSERT, "ed"), Diff(EQUAL, " over "), + Diff(DELETE, "the"), Diff(INSERT, "a"), + Diff(EQUAL, " lazy"), Diff(INSERT, "old dog")}); + std::wstring text1 = dmp.diff_text1(diffs); + assertEquals("diff_text1: Base text.", L"jumps over the lazy", text1); + + std::wstring delta = dmp.diff_toDelta(diffs); + std::wstring golden = L"=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog"; + assertEquals("diff_toDelta:", L"=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog", + delta); + + // Convert delta string into a diff. + assertEquals("diff_fromDelta: Normal.", diffs, + dmp.diff_fromDelta(text1, delta)); + + // Generates error (19 < 20). + assertThrow("diff_fromDelta: Too long.", + dmp.diff_fromDelta(text1 + L"x", delta), std::wstring); + + // Generates error (19 > 18). + assertThrow("diff_fromDelta: Too short.", + dmp.diff_fromDelta(text1.substr(1), delta), std::wstring); + + // Generates error (%c3%xy invalid Unicode). + assertThrow("diff_fromDelta: Invalid character.", + dmp.diff_fromDelta("", "+%c3%xy"), std::wstring); + + // Test deltas with special characters. + diffs = { + Diff(EQUAL, std::wstring(L"\u0680 ") + kZero + std::wstring(L" \t %")), + Diff(DELETE, std::wstring(L"\u0681 ") + kOne + std::wstring(L" \n ^")), + Diff(INSERT, std::wstring(L"\u0682 ") + kTwo + std::wstring(L" \\ |"))}; + + text1 = dmp.diff_text1(diffs); + golden = std::wstring(L"\u0680 ") + kZero + std::wstring(L" \t %\u0681 ") + + kOne + std::wstring(L" \n ^"); + assertEquals("diff_text1: Unicode text", golden, text1); + + delta = dmp.diff_toDelta(diffs); + assertEquals("diff_toDelta: Unicode", L"=7\t-7\t+%DA%82 %02 %5C %7C", delta); + + assertEquals("diff_fromDelta: Unicode", diffs, + dmp.diff_fromDelta(text1, delta)); + + // Verify pool of unchanged characters. + diffs = { + Diff(INSERT, "A-Z a-z 0-9 - _ . ! ~ * ' ( ) ; / ? : @ & = + $ , # ")}; + std::wstring text2 = dmp.diff_text2(diffs); + assertEquals("diff_text2: Unchanged characters.", + L"A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", text2); + + delta = dmp.diff_toDelta(diffs); + assertEquals("diff_toDelta: Unchanged characters.", + L"+A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", + delta); + + // Convert delta string into a diff. + assertEquals("diff_fromDelta: Unchanged characters.", diffs, + dmp.diff_fromDelta({}, delta)); +} - diffs = { Diff( EQUAL, "a" ), Diff( DELETE, "123" ), Diff( EQUAL, "b" ), Diff( DELETE, "456" ), Diff( EQUAL, "c" ) }; - assertEquals( "diff_main: Two deletions.", diffs, dmp.diff_main( "a123b456c", "abc", false ) ); +TEST_F(diff_match_patch_test, testDiffXIndex) { + // Translate a location in text1 to text2. + auto diffs = TDiffVector( + {Diff(DELETE, "a"), Diff(INSERT, "1234"), Diff(EQUAL, "xyz")}); + assertEquals("diff_xIndex: Translation on equality.", 5, + dmp.diff_xIndex(diffs, 2)); - // Perform a real diff. - // Switch off the timeout. - dmp.Diff_Timeout = 0; - diffs = { Diff( DELETE, "a" ), Diff( INSERT, "b" ) }; - assertEquals( "diff_main: Simple case #1.", diffs, dmp.diff_main( "a", "b", false ) ); + diffs = {Diff(EQUAL, "a"), Diff(DELETE, "1234"), Diff(EQUAL, "xyz")}; + assertEquals("diff_xIndex: Translation on deletion.", 1, + dmp.diff_xIndex(diffs, 3)); +} - diffs = { Diff( DELETE, "Apple" ), Diff( INSERT, "Banana" ), Diff( EQUAL, "s are a" ), Diff( INSERT, "lso" ), Diff( EQUAL, " fruit." ) }; - assertEquals( "diff_main: Simple case #2.", diffs, dmp.diff_main( "Apples are a fruit.", "Bananas are also fruit.", false ) ); +TEST_F(diff_match_patch_test, testDiffLevenshtein) { + auto diffs = TDiffVector( + {Diff(DELETE, "abc"), Diff(INSERT, "1234"), Diff(EQUAL, "xyz")}); + assertEquals("diff_levenshtein: Trailing equality.", 4, + dmp.diff_levenshtein(diffs)); - diffs = { Diff( DELETE, "a" ), Diff( INSERT, L"\u0680" ), Diff( EQUAL, "x" ), Diff( DELETE, "\t" ), Diff( INSERT, NUtils::to_wstring( kZero ) ) }; - assertEquals( "diff_main: Simple case #3.", diffs, dmp.diff_main( L"ax\t", std::wstring( L"\u0680x" ) + kZero, false ) ); + diffs = {Diff(EQUAL, "xyz"), Diff(DELETE, "abc"), Diff(INSERT, "1234")}; + assertEquals("diff_levenshtein: Leading equality.", 4, + dmp.diff_levenshtein(diffs)); - diffs = { Diff( DELETE, "1" ), Diff( EQUAL, "a" ), Diff( DELETE, "y" ), Diff( EQUAL, "b" ), Diff( DELETE, "2" ), Diff( INSERT, "xab" ) }; - assertEquals( "diff_main: Overlap #1.", diffs, dmp.diff_main( "1ayb2", "abxab", false ) ); + diffs = {Diff(DELETE, "abc"), Diff(EQUAL, "xyz"), Diff(INSERT, "1234")}; + assertEquals("diff_levenshtein: Middle equality.", 7, + dmp.diff_levenshtein(diffs)); +} - diffs = { Diff( INSERT, "xaxcx" ), Diff( EQUAL, "abc" ), Diff( DELETE, "y" ) }; - assertEquals( "diff_main: Overlap #2.", diffs, dmp.diff_main( "abcy", "xaxcxabc", false ) ); +TEST_F(diff_match_patch_test, testDiffBisect) { + // Normal. + std::wstring a = L"cat"; + std::wstring b = L"map"; + // Since the resulting diff hasn't been normalized, it would be ok if + // the insertion and deletion pairs are swapped. + // If the order changes, tweak this test as required. + auto diffs = + TDiffVector({Diff(DELETE, "c"), Diff(INSERT, "m"), Diff(EQUAL, "a"), + Diff(DELETE, "t"), Diff(INSERT, "p")}); + auto results = dmp.diff_bisect(a, b, std::numeric_limits::max()); + assertEquals("diff_bisect: Normal.", diffs, results); + + // Timeout. + diffs = {Diff(DELETE, "cat"), Diff(INSERT, "map")}; + assertEquals("diff_bisect: Timeout.", diffs, dmp.diff_bisect(a, b, 0)); +} - diffs = { Diff( DELETE, "ABCD" ), Diff( EQUAL, "a" ), Diff( DELETE, "=" ), Diff( INSERT, "-" ), Diff( EQUAL, "bcd" ), Diff( DELETE, "=" ), Diff( INSERT, "-" ), Diff( EQUAL, "efghijklmnopqrs" ), Diff( DELETE, "EFGHIJKLMNOefg" ) }; - assertEquals( "diff_main: Overlap #3.", diffs, dmp.diff_main( "ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg", "a-bcd-efghijklmnopqrs", false ) ); - - diffs = { Diff( INSERT, " " ), Diff( EQUAL, "a" ), Diff( INSERT, "nd" ), Diff( EQUAL, " [[Pennsylvania]]" ), Diff( DELETE, " and [[New" ) }; - assertEquals( "diff_main: Large equality.", diffs, dmp.diff_main( "a [[Pennsylvania]] and [[New", " and [[Pennsylvania]]", false ) ); - - dmp.Diff_Timeout = 0.1f; // 100ms - // This test may 'fail' on extremely fast computers. If so, just increase the text lengths. - std::wstring a = L"`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n"; - std::wstring b = L"I am the very model of a modern major general,\nI've information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n"; - // Increase the text lengths by 1024 times to ensure a timeout. - for ( int x = 0; x < 10; x++ ) - { - a = a + a; - b = b + b; - } - clock_t startTime = clock(); - dmp.diff_main( a, b ); - clock_t endTime = clock(); - // Test that we took at least the timeout period. - assertTrue( "diff_main: Timeout min.", ( dmp.Diff_Timeout * CLOCKS_PER_SEC ) <= ( endTime - startTime ) ); - // Test that we didn't take forever (be forgiving). - // Theoretically this test could fail very occasionally if the - // OS task swaps or locks up for a second at the wrong moment. - // Java seems to overrun by ~80% (compared with 10% for other languages). - // Therefore use an upper limit of 0.5s instead of 0.2s. - assertTrue( "diff_main: Timeout max.", ( dmp.Diff_Timeout * CLOCKS_PER_SEC * 2 ) > ( endTime - startTime ) ); - dmp.Diff_Timeout = 0; - - // Test the linemode speedup. - // Must be long to pass the 100 char cutoff. - a = L"1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n"; - b = L"abcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\n"; - assertEquals( "diff_main: Simple line-mode.", dmp.diff_main( a, b, true ), dmp.diff_main( a, b, false ) ); - - a = L"1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"; - b = L"abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij"; - assertEquals( "diff_main: Single line-mode.", dmp.diff_main( a, b, true ), dmp.diff_main( a, b, false ) ); - - a = L"1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n"; - b = L"abcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n"; - TStringVector texts_linemode = diff_rebuildtexts( dmp.diff_main( a, b, true ) ); - TStringVector texts_textmode = diff_rebuildtexts( dmp.diff_main( a, b, false ) ); - assertEquals( "diff_main: Overlap line-mode.", texts_textmode, texts_linemode ); +TEST_F(diff_match_patch_test, testDiffMain) { + // Perform a trivial diff. + auto diffs = TDiffVector(); + assertEquals("diff_main: nullptr case.", diffs, dmp.diff_main("", "", false)); + + diffs = {Diff(DELETE, "abc")}; + assertEquals("diff_main: RHS side nullptr case.", diffs, + dmp.diff_main("abc", "", false)); + + diffs = {Diff(INSERT, "abc")}; + assertEquals("diff_main: LHS side nullptr case.", diffs, + dmp.diff_main("", "abc", false)); + + diffs = {Diff(EQUAL, "abc")}; + assertEquals("diff_main: Equality.", diffs, + dmp.diff_main("abc", "abc", false)); + + diffs = {Diff(EQUAL, "ab"), Diff(INSERT, "123"), Diff(EQUAL, "c")}; + assertEquals("diff_main: Simple insertion.", diffs, + dmp.diff_main("abc", "ab123c", false)); + + diffs = {Diff(EQUAL, "a"), Diff(DELETE, "123"), Diff(EQUAL, "bc")}; + assertEquals("diff_main: Simple deletion.", diffs, + dmp.diff_main("a123bc", "abc", false)); + + diffs = {Diff(EQUAL, "a"), Diff(INSERT, "123"), Diff(EQUAL, "b"), + Diff(INSERT, "456"), Diff(EQUAL, "c")}; + assertEquals("diff_main: Two insertions.", diffs, + dmp.diff_main("abc", "a123b456c", false)); + + diffs = {Diff(EQUAL, "a"), Diff(DELETE, "123"), Diff(EQUAL, "b"), + Diff(DELETE, "456"), Diff(EQUAL, "c")}; + assertEquals("diff_main: Two deletions.", diffs, + dmp.diff_main("a123b456c", "abc", false)); + + // Perform a real diff. + // Switch off the timeout. + dmp.Diff_Timeout = 0; + diffs = {Diff(DELETE, "a"), Diff(INSERT, "b")}; + assertEquals("diff_main: Simple case #1.", diffs, + dmp.diff_main("a", "b", false)); + + diffs = {Diff(DELETE, "Apple"), Diff(INSERT, "Banana"), + Diff(EQUAL, "s are a"), Diff(INSERT, "lso"), Diff(EQUAL, " fruit.")}; + assertEquals( + "diff_main: Simple case #2.", diffs, + dmp.diff_main("Apples are a fruit.", "Bananas are also fruit.", false)); + + diffs = {Diff(DELETE, "a"), Diff(INSERT, L"\u0680"), Diff(EQUAL, "x"), + Diff(DELETE, "\t"), Diff(INSERT, NUtils::to_wstring(kZero))}; + assertEquals("diff_main: Simple case #3.", diffs, + dmp.diff_main(L"ax\t", std::wstring(L"\u0680x") + kZero, false)); + + diffs = {Diff(DELETE, "1"), Diff(EQUAL, "a"), Diff(DELETE, "y"), + Diff(EQUAL, "b"), Diff(DELETE, "2"), Diff(INSERT, "xab")}; + assertEquals("diff_main: Overlap #1.", diffs, + dmp.diff_main("1ayb2", "abxab", false)); + + diffs = {Diff(INSERT, "xaxcx"), Diff(EQUAL, "abc"), Diff(DELETE, "y")}; + assertEquals("diff_main: Overlap #2.", diffs, + dmp.diff_main("abcy", "xaxcxabc", false)); + + diffs = {Diff(DELETE, "ABCD"), + Diff(EQUAL, "a"), + Diff(DELETE, "="), + Diff(INSERT, "-"), + Diff(EQUAL, "bcd"), + Diff(DELETE, "="), + Diff(INSERT, "-"), + Diff(EQUAL, "efghijklmnopqrs"), + Diff(DELETE, "EFGHIJKLMNOefg")}; + assertEquals("diff_main: Overlap #3.", diffs, + dmp.diff_main("ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg", + "a-bcd-efghijklmnopqrs", false)); + + diffs = {Diff(INSERT, " "), Diff(EQUAL, "a"), Diff(INSERT, "nd"), + Diff(EQUAL, " [[Pennsylvania]]"), Diff(DELETE, " and [[New")}; + assertEquals("diff_main: Large equality.", diffs, + dmp.diff_main("a [[Pennsylvania]] and [[New", + " and [[Pennsylvania]]", false)); + + dmp.Diff_Timeout = 0.1f; // 100ms + // This test may 'fail' on extremely fast computers. If so, just increase the + // text lengths. + std::wstring a = + L"`Twas brillig, and the slithy toves\nDid gyre and gimble in the " + L"wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n"; + std::wstring b = + L"I am the very model of a modern major general,\nI've information " + L"vegetable, animal, and mineral,\nI know the kings of England, and I " + L"quote the fights historical,\nFrom Marathon to Waterloo, in order " + L"categorical.\n"; + // Increase the text lengths by 1024 times to ensure a timeout. + for (int x = 0; x < 10; x++) { + a = a + a; + b = b + b; + } + clock_t startTime = clock(); + dmp.diff_main(a, b); + clock_t endTime = clock(); + // Test that we took at least the timeout period. + assertTrue("diff_main: Timeout min.", + (dmp.Diff_Timeout * CLOCKS_PER_SEC) <= (endTime - startTime)); + // Test that we didn't take forever (be forgiving). + // Theoretically this test could fail very occasionally if the + // OS task swaps or locks up for a second at the wrong moment. + // Java seems to overrun by ~80% (compared with 10% for other languages). + // Therefore use an upper limit of 0.5s instead of 0.2s. + assertTrue("diff_main: Timeout max.", + (dmp.Diff_Timeout * CLOCKS_PER_SEC * 2) > (endTime - startTime)); + dmp.Diff_Timeout = 0; + + // Test the linemode speedup. + // Must be long to pass the 100 char cutoff. + a = L"1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890" + L"\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n123456789" + L"0\n1234567890\n"; + b = L"abcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij" + L"\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghi" + L"j\nabcdefghij\n"; + assertEquals("diff_main: Simple line-mode.", dmp.diff_main(a, b, true), + dmp.diff_main(a, b, false)); + + a = L"12345678901234567890123456789012345678901234567890123456789012345678901" + L"23456789012345678901234567890123456789012345678901234567890"; + b = L"abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghija" + L"bcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij"; + assertEquals("diff_main: Single line-mode.", dmp.diff_main(a, b, true), + dmp.diff_main(a, b, false)); + + a = L"1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890" + L"\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n123456789" + L"0\n1234567890\n"; + b = L"abcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890" + L"\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n123456789" + L"0\nabcdefghij\n"; + TStringVector texts_linemode = diff_rebuildtexts(dmp.diff_main(a, b, true)); + TStringVector texts_textmode = diff_rebuildtexts(dmp.diff_main(a, b, false)); + assertEquals("diff_main: Overlap line-mode.", texts_textmode, texts_linemode); } // MATCH TEST FUNCTIONS -TEST_F( diff_match_patch_test, testMatchAlphabet ) -{ - // Initialise the bitmasks for Bitap. - TCharPosMap bitmask; - bitmask[ 'a' ] = 4; - bitmask[ 'b' ] = 2; - bitmask[ 'c' ] = 1; - assertEquals( "match_alphabet: Unique.", bitmask, dmp.match_alphabet( "abc" ) ); - - bitmask = TCharPosMap(); - bitmask[ 'a' ] = 37; - bitmask[ 'b' ] = 18; - bitmask[ 'c' ] = 8; - assertEquals( "match_alphabet: Duplicates.", bitmask, dmp.match_alphabet( "abcaba" ) ); +TEST_F(diff_match_patch_test, testMatchAlphabet) { + // Initialise the bitmasks for Bitap. + TCharPosMap bitmask; + bitmask['a'] = 4; + bitmask['b'] = 2; + bitmask['c'] = 1; + assertEquals("match_alphabet: Unique.", bitmask, dmp.match_alphabet("abc")); + + bitmask = TCharPosMap(); + bitmask['a'] = 37; + bitmask['b'] = 18; + bitmask['c'] = 8; + assertEquals("match_alphabet: Duplicates.", bitmask, + dmp.match_alphabet("abcaba")); } -TEST_F( diff_match_patch_test, testMatchBitap ) -{ - // Bitap algorithm. - dmp.Match_Distance = 100; - dmp.Match_Threshold = 0.5f; - assertEquals( "match_bitap: Exact match #1.", 5, dmp.match_bitap( "abcdefghijk", "fgh", 5 ) ); +TEST_F(diff_match_patch_test, testMatchBitap) { + // Bitap algorithm. + dmp.Match_Distance = 100; + dmp.Match_Threshold = 0.5f; + assertEquals("match_bitap: Exact match #1.", 5, + dmp.match_bitap("abcdefghijk", "fgh", 5)); - assertEquals( "match_bitap: Exact match #2.", 5, dmp.match_bitap( "abcdefghijk", "fgh", 0 ) ); + assertEquals("match_bitap: Exact match #2.", 5, + dmp.match_bitap("abcdefghijk", "fgh", 0)); - assertEquals( "match_bitap: Fuzzy match #1.", 4, dmp.match_bitap( "abcdefghijk", "efxhi", 0 ) ); + assertEquals("match_bitap: Fuzzy match #1.", 4, + dmp.match_bitap("abcdefghijk", "efxhi", 0)); - assertEquals( "match_bitap: Fuzzy match #2.", 2, dmp.match_bitap( "abcdefghijk", "cdefxyhijk", 5 ) ); + assertEquals("match_bitap: Fuzzy match #2.", 2, + dmp.match_bitap("abcdefghijk", "cdefxyhijk", 5)); - assertEquals( "match_bitap: Fuzzy match #3.", -1, dmp.match_bitap( "abcdefghijk", "bxy", 1 ) ); + assertEquals("match_bitap: Fuzzy match #3.", -1, + dmp.match_bitap("abcdefghijk", "bxy", 1)); - assertEquals( "match_bitap: Overflow.", 2, dmp.match_bitap( "123456789xx0", "3456789x0", 2 ) ); + assertEquals("match_bitap: Overflow.", 2, + dmp.match_bitap("123456789xx0", "3456789x0", 2)); - assertEquals( "match_bitap: Before start match.", 0, dmp.match_bitap( "abcdef", "xxabc", 4 ) ); + assertEquals("match_bitap: Before start match.", 0, + dmp.match_bitap("abcdef", "xxabc", 4)); - assertEquals( "match_bitap: Beyond end match.", 3, dmp.match_bitap( "abcdef", "defyy", 4 ) ); + assertEquals("match_bitap: Beyond end match.", 3, + dmp.match_bitap("abcdef", "defyy", 4)); - assertEquals( "match_bitap: Oversized pattern.", 0, dmp.match_bitap( "abcdef", "xabcdefy", 0 ) ); + assertEquals("match_bitap: Oversized pattern.", 0, + dmp.match_bitap("abcdef", "xabcdefy", 0)); - dmp.Match_Threshold = 0.4f; - assertEquals( "match_bitap: Threshold #1.", 4, dmp.match_bitap( "abcdefghijk", "efxyhi", 1 ) ); + dmp.Match_Threshold = 0.4f; + assertEquals("match_bitap: Threshold #1.", 4, + dmp.match_bitap("abcdefghijk", "efxyhi", 1)); - dmp.Match_Threshold = 0.3f; - assertEquals( "match_bitap: Threshold #2.", -1, dmp.match_bitap( "abcdefghijk", "efxyhi", 1 ) ); + dmp.Match_Threshold = 0.3f; + assertEquals("match_bitap: Threshold #2.", -1, + dmp.match_bitap("abcdefghijk", "efxyhi", 1)); - dmp.Match_Threshold = 0.0f; - assertEquals( "match_bitap: Threshold #3.", 1, dmp.match_bitap( "abcdefghijk", "bcdef", 1 ) ); + dmp.Match_Threshold = 0.0f; + assertEquals("match_bitap: Threshold #3.", 1, + dmp.match_bitap("abcdefghijk", "bcdef", 1)); - dmp.Match_Threshold = 0.5f; - assertEquals( "match_bitap: Multiple select #1.", 0, dmp.match_bitap( "abcdexyzabcde", "abccde", 3 ) ); + dmp.Match_Threshold = 0.5f; + assertEquals("match_bitap: Multiple select #1.", 0, + dmp.match_bitap("abcdexyzabcde", "abccde", 3)); - assertEquals( "match_bitap: Multiple select #2.", 8, dmp.match_bitap( "abcdexyzabcde", "abccde", 5 ) ); + assertEquals("match_bitap: Multiple select #2.", 8, + dmp.match_bitap("abcdexyzabcde", "abccde", 5)); - dmp.Match_Distance = 10; // Strict location. - assertEquals( "match_bitap: Distance test #1.", -1, dmp.match_bitap( "abcdefghijklmnopqrstuvwxyz", "abcdefg", 24 ) ); + dmp.Match_Distance = 10; // Strict location. + assertEquals("match_bitap: Distance test #1.", -1, + dmp.match_bitap("abcdefghijklmnopqrstuvwxyz", "abcdefg", 24)); - assertEquals( "match_bitap: Distance test #2.", 0, dmp.match_bitap( "abcdefghijklmnopqrstuvwxyz", "abcdxxefg", 1 ) ); + assertEquals("match_bitap: Distance test #2.", 0, + dmp.match_bitap("abcdefghijklmnopqrstuvwxyz", "abcdxxefg", 1)); - dmp.Match_Distance = 1000; // Loose location. - assertEquals( "match_bitap: Distance test #3.", 0, dmp.match_bitap( "abcdefghijklmnopqrstuvwxyz", "abcdefg", 24 ) ); + dmp.Match_Distance = 1000; // Loose location. + assertEquals("match_bitap: Distance test #3.", 0, + dmp.match_bitap("abcdefghijklmnopqrstuvwxyz", "abcdefg", 24)); } -TEST_F( diff_match_patch_test, testMatchMain ) -{ - // Full match. - assertEquals( "match_main: Equality.", 0, dmp.match_main( "abcdef", "abcdef", 1000 ) ); +TEST_F(diff_match_patch_test, testMatchMain) { + // Full match. + assertEquals("match_main: Equality.", 0, + dmp.match_main("abcdef", "abcdef", 1000)); - assertEquals( "match_main: nullptr text.", -1, dmp.match_main( "", "abcdef", 1 ) ); + assertEquals("match_main: nullptr text.", -1, + dmp.match_main("", "abcdef", 1)); - assertEquals( "match_main: nullptr pattern.", 3, dmp.match_main( "abcdef", "", 3 ) ); + assertEquals("match_main: nullptr pattern.", 3, + dmp.match_main("abcdef", "", 3)); - assertEquals( "match_main: Exact match.", 3, dmp.match_main( "abcdef", "de", 3 ) ); + assertEquals("match_main: Exact match.", 3, + dmp.match_main("abcdef", "de", 3)); - dmp.Match_Threshold = 0.7f; - assertEquals( "match_main: Complex match.", 4, dmp.match_main( "I am the very model of a modern major general.", " that berry ", 5 ) ); - dmp.Match_Threshold = 0.5f; + dmp.Match_Threshold = 0.7f; + assertEquals("match_main: Complex match.", 4, + dmp.match_main("I am the very model of a modern major general.", + " that berry ", 5)); + dmp.Match_Threshold = 0.5f; } // PATCH TEST FUNCTIONS -TEST_F( diff_match_patch_test, testPatchObj ) -{ - // Patch Object. - Patch p; - p.start1 = 20; - p.start2 = 21; - p.length1 = 18; - p.length2 = 17; - p.diffs = { Diff( EQUAL, "jump" ), Diff( DELETE, "s" ), Diff( INSERT, "ed" ), Diff( EQUAL, " over " ), Diff( DELETE, "the" ), Diff( INSERT, "a" ), Diff( EQUAL, "\nlaz" ) }; - std::wstring strp = L"@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n"; - assertEquals( "patch: toString.", strp, p.toString() ); +TEST_F(diff_match_patch_test, testPatchObj) { + // Patch Object. + Patch p; + p.start1 = 20; + p.start2 = 21; + p.length1 = 18; + p.length2 = 17; + p.diffs = {Diff(EQUAL, "jump"), Diff(DELETE, "s"), Diff(INSERT, "ed"), + Diff(EQUAL, " over "), Diff(DELETE, "the"), Diff(INSERT, "a"), + Diff(EQUAL, "\nlaz")}; + std::wstring strp = + L"@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n"; + assertEquals("patch: toString.", strp, p.toString()); } -TEST_F( diff_match_patch_test, testPatchFromText ) -{ - assertTrue( "patch_fromText: #0.", dmp.patch_fromText( "" ).empty() ); +TEST_F(diff_match_patch_test, testPatchFromText) { + assertTrue("patch_fromText: #0.", dmp.patch_fromText("").empty()); - std::wstring strp = L"@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n"; - assertEquals( "patch_fromText: #1.", strp, dmp.patch_fromText( strp )[ 0 ].toString() ); + std::wstring strp = + L"@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n"; + assertEquals("patch_fromText: #1.", strp, + dmp.patch_fromText(strp)[0].toString()); - assertEquals( "patch_fromText: #2.", L"@@ -1 +1 @@\n-a\n+b\n", dmp.patch_fromText( "@@ -1 +1 @@\n-a\n+b\n" )[ 0 ].toString() ); + assertEquals("patch_fromText: #2.", L"@@ -1 +1 @@\n-a\n+b\n", + dmp.patch_fromText("@@ -1 +1 @@\n-a\n+b\n")[0].toString()); - assertEquals( "patch_fromText: #3.", L"@@ -1,3 +0,0 @@\n-abc\n", dmp.patch_fromText( "@@ -1,3 +0,0 @@\n-abc\n" )[ 0 ].toString() ); + assertEquals("patch_fromText: #3.", L"@@ -1,3 +0,0 @@\n-abc\n", + dmp.patch_fromText("@@ -1,3 +0,0 @@\n-abc\n")[0].toString()); - assertEquals( "patch_fromText: #4.", L"@@ -0,0 +1,3 @@\n+abc\n", dmp.patch_fromText( "@@ -0,0 +1,3 @@\n+abc\n" )[ 0 ].toString() ); + assertEquals("patch_fromText: #4.", L"@@ -0,0 +1,3 @@\n+abc\n", + dmp.patch_fromText("@@ -0,0 +1,3 @@\n+abc\n")[0].toString()); - // Generates error. - assertThrow( "patch_fromText: #5.", dmp.patch_fromText( "Bad\nPatch\n" ), std::wstring ); + // Generates error. + assertThrow("patch_fromText: #5.", dmp.patch_fromText("Bad\nPatch\n"), + std::wstring); } -TEST_F( diff_match_patch_test, testPatchToText ) -{ - std::wstring strp = L"@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n"; - auto patches = dmp.patch_fromText( strp ); - assertEquals( "patch_toText: Single", strp, dmp.patch_toText( patches ) ); +TEST_F(diff_match_patch_test, testPatchToText) { + std::wstring strp = + L"@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n"; + auto patches = dmp.patch_fromText(strp); + assertEquals("patch_toText: Single", strp, dmp.patch_toText(patches)); + + strp = + L"@@ -1,9 +1,9 @@\n-f\n+F\n oo+fooba\n@@ -7,9 +7,9 @@\n obar\n-,\n+.\n " + L"tes\n"; + patches = dmp.patch_fromText(strp); + assertEquals("patch_toText: Dua", strp, dmp.patch_toText(patches)); +} - strp = L"@@ -1,9 +1,9 @@\n-f\n+F\n oo+fooba\n@@ -7,9 +7,9 @@\n obar\n-,\n+.\n tes\n"; - patches = dmp.patch_fromText( strp ); - assertEquals( "patch_toText: Dua", strp, dmp.patch_toText( patches ) ); +TEST_F(diff_match_patch_test, testPatchAddContext) { + dmp.Patch_Margin = 4; + auto p = dmp.patch_fromText("@@ -21,4 +21,10 @@\n-jump\n+somersault\n")[0]; + dmp.patch_addContext(p, "The quick brown fox jumps over the lazy dog."); + assertEquals("patch_addContext: Simple case.", + L"@@ -17,12 +17,18 @@\n fox \n-jump\n+somersault\n s ov\n", + p.toString()); + + p = dmp.patch_fromText("@@ -21,4 +21,10 @@\n-jump\n+somersault\n")[0]; + dmp.patch_addContext(p, "The quick brown fox jumps."); + assertEquals("patch_addContext: Not enough trailing context.", + L"@@ -17,10 +17,16 @@\n fox \n-jump\n+somersault\n s.\n", + p.toString()); + + p = dmp.patch_fromText("@@ -3 +3,2 @@\n-e\n+at\n")[0]; + dmp.patch_addContext(p, "The quick brown fox jumps."); + assertEquals("patch_addContext: Not enough leading context.", + L"@@ -1,7 +1,8 @@\n Th\n-e\n+at\n qui\n", p.toString()); + + p = dmp.patch_fromText("@@ -3 +3,2 @@\n-e\n+at\n")[0]; + dmp.patch_addContext( + p, "The quick brown fox jumps. The quick brown fox crashes."); + assertEquals("patch_addContext: Ambiguity.", + L"@@ -1,27 +1,28 @@\n Th\n-e\n+at\n quick brown fox jumps. \n", + p.toString()); } -TEST_F( diff_match_patch_test, testPatchAddContext ) -{ - dmp.Patch_Margin = 4; - auto p = dmp.patch_fromText( "@@ -21,4 +21,10 @@\n-jump\n+somersault\n" )[ 0 ]; - dmp.patch_addContext( p, "The quick brown fox jumps over the lazy dog." ); - assertEquals( "patch_addContext: Simple case.", L"@@ -17,12 +17,18 @@\n fox \n-jump\n+somersault\n s ov\n", p.toString() ); +TEST_F(diff_match_patch_test, testPatchMake) { + TPatchVector patches; + patches = dmp.patch_make("", ""); + assertEquals("patch_make: nullptr case", L"", dmp.patch_toText(patches)); + + std::wstring text1 = L"The quick brown fox jumps over the lazy dog."; + std::wstring text2 = L"That quick brown fox jumped over a lazy dog."; + std::wstring expectedPatch = + L"@@ -1,8 +1,7 @@\n Th\n-at\n+e\n qui\n@@ -21,17 +21,18 @@\n " + L"jump\n-ed\n+s\n over \n-a\n+the\n laz\n"; + // The second patch must be "-21,17 +21,18", not "-22,17 +21,18" due to + // rolling context. + patches = dmp.patch_make(text2, text1); + assertEquals("patch_make: Text2+Text1 inputs", expectedPatch, + dmp.patch_toText(patches)); + + expectedPatch = + L"@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n " + L"jump\n-s\n+ed\n over \n-the\n+a\n laz\n"; + patches = dmp.patch_make(text1, text2); + assertEquals("patch_make: Text1+Text2 inputs", expectedPatch, + dmp.patch_toText(patches)); + + auto diffs = dmp.diff_main(text1, text2, false); + patches = dmp.patch_make(diffs); + assertEquals("patch_make: Diff input", expectedPatch, + dmp.patch_toText(patches)); + + patches = dmp.patch_make(text1, diffs); + assertEquals("patch_make: Text1+Diff inputs", expectedPatch, + dmp.patch_toText(patches)); + + patches = dmp.patch_make(text1, text2, diffs); + assertEquals("patch_make: Text1+Text2+Diff inputs (deprecated)", + expectedPatch, dmp.patch_toText(patches)); + + patches = dmp.patch_make("`1234567890-=[]\\;',./", "~!@#$%^&*()_+{}|:\"<>?"); + assertEquals("patch_toText: Character encoding.", + L"@@ -1,21 +1,21 " + L"@@\n-%601234567890-=%5B%5D%5C;',./" + L"\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n", + dmp.patch_toText(patches)); + + diffs = {Diff(DELETE, "`1234567890-=[]\\;',./"), + Diff(INSERT, "~!@#$%^&*()_+{}|:\"<>?")}; + assertEquals( + "patch_fromText: Character decoding.", diffs, + dmp.patch_fromText("@@ -1,21 +1,21 " + "@@\n-%601234567890-=%5B%5D%5C;',./" + "\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n")[0] + .diffs); + + text1 = {}; + for (int x = 0; x < 100; x++) { + text1 += L"abcdef"; + } + text2 = text1 + L"123"; + expectedPatch = + L"@@ -573,28 +573,31 @@\n cdefabcdefabcdefabcdefabcdef\n+123\n"; + patches = dmp.patch_make(text1, text2); + assertEquals("patch_make: Long string with repeats.", expectedPatch, + dmp.patch_toText(patches)); +} - p = dmp.patch_fromText( "@@ -21,4 +21,10 @@\n-jump\n+somersault\n" )[ 0 ]; - dmp.patch_addContext( p, "The quick brown fox jumps." ); - assertEquals( "patch_addContext: Not enough trailing context.", L"@@ -17,10 +17,16 @@\n fox \n-jump\n+somersault\n s.\n", p.toString() ); - - p = dmp.patch_fromText( "@@ -3 +3,2 @@\n-e\n+at\n" )[ 0 ]; - dmp.patch_addContext( p, "The quick brown fox jumps." ); - assertEquals( "patch_addContext: Not enough leading context.", L"@@ -1,7 +1,8 @@\n Th\n-e\n+at\n qui\n", p.toString() ); - - p = dmp.patch_fromText( "@@ -3 +3,2 @@\n-e\n+at\n" )[ 0 ]; - dmp.patch_addContext( p, "The quick brown fox jumps. The quick brown fox crashes." ); - assertEquals( "patch_addContext: Ambiguity.", L"@@ -1,27 +1,28 @@\n Th\n-e\n+at\n quick brown fox jumps. \n", p.toString() ); -} - -TEST_F( diff_match_patch_test, testPatchMake ) -{ - TPatchVector patches; - patches = dmp.patch_make( "", "" ); - assertEquals( "patch_make: nullptr case", L"", dmp.patch_toText( patches ) ); - - std::wstring text1 = L"The quick brown fox jumps over the lazy dog."; - std::wstring text2 = L"That quick brown fox jumped over a lazy dog."; - std::wstring expectedPatch = L"@@ -1,8 +1,7 @@\n Th\n-at\n+e\n qui\n@@ -21,17 +21,18 @@\n jump\n-ed\n+s\n over \n-a\n+the\n laz\n"; - // The second patch must be "-21,17 +21,18", not "-22,17 +21,18" due to rolling context. - patches = dmp.patch_make( text2, text1 ); - assertEquals( "patch_make: Text2+Text1 inputs", expectedPatch, dmp.patch_toText( patches ) ); +TEST_F(diff_match_patch_test, testPatchSplitMax) { + // Confirm Match_MaxBits is 32. + TPatchVector patches; + patches = dmp.patch_make( + "abcdefghijklmnopqrstuvwxyz01234567890", + "XabXcdXefXghXijXklXmnXopXqrXstXuvXwxXyzX01X23X45X67X89X0"); + dmp.patch_splitMax(patches); + assertEquals("patch_splitMax: #1.", + L"@@ -1,32 +1,46 @@\n+X\n ab\n+X\n cd\n+X\n ef\n+X\n gh\n+X\n " + L"ij\n+X\n kl\n+X\n mn\n+X\n op\n+X\n qr\n+X\n st\n+X\n " + L"uv\n+X\n wx\n+X\n yz\n+X\n 012345\n@@ -25,13 +39,18 @@\n " + L"zX01\n+X\n 23\n+X\n 45\n+X\n 67\n+X\n 89\n+X\n 0\n", + dmp.patch_toText(patches)); + + patches = dmp.patch_make( + "abcdef123456789012345678901234567890123456789012345678901234567890123456" + "7890uvwxyz", + "abcdefuvwxyz"); + std::wstring oldToText = dmp.patch_toText(patches); + dmp.patch_splitMax(patches); + assertEquals("patch_splitMax: #2.", oldToText, dmp.patch_toText(patches)); + + patches = dmp.patch_make( + "1234567890123456789012345678901234567890123456789012345678901234567890", + "abc"); + dmp.patch_splitMax(patches); + assertEquals("patch_splitMax: #3.", + L"@@ -1,32 +1,4 @@\n-1234567890123456789012345678\n 9012\n@@ " + L"-29,32 +1,4 @@\n-9012345678901234567890123456\n 7890\n@@ " + L"-57,14 +1,3 @@\n-78901234567890\n+abc\n", + dmp.patch_toText(patches)); + + patches = dmp.patch_make( + "abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1 abcdefghij , h : " + "0 , t : 1", + "abcdefghij , h : 1 , t : 1 abcdefghij , h : 1 , t : 1 abcdefghij , h : " + "0 , t : 1"); + dmp.patch_splitMax(patches); + assertEquals( + "patch_splitMax: #4.", + L"@@ -2,32 +2,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n@@ " + L"-29,32 +29,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n", + dmp.patch_toText(patches)); +} - expectedPatch = L"@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n"; - patches = dmp.patch_make( text1, text2 ); - assertEquals( "patch_make: Text1+Text2 inputs", expectedPatch, dmp.patch_toText( patches ) ); +TEST_F(diff_match_patch_test, testPatchAddPadding) { + TPatchVector patches; + patches = dmp.patch_make("", "test"); + assertEquals("patch_addPadding: Both edges ful", L"@@ -0,0 +1,4 @@\n+test\n", + dmp.patch_toText(patches)); + dmp.patch_addPadding(patches); + assertEquals("patch_addPadding: Both edges full.", + L"@@ -1,8 +1,12 @@\n %01%02%03%04\n+test\n %01%02%03%04\n", + dmp.patch_toText(patches)); + + patches = dmp.patch_make("XY", "XtestY"); + assertEquals("patch_addPadding: Both edges partial.", + L"@@ -1,2 +1,6 @@\n X\n+test\n Y\n", dmp.patch_toText(patches)); + dmp.patch_addPadding(patches); + assertEquals("patch_addPadding: Both edges partial.", + L"@@ -2,8 +2,12 @@\n %02%03%04X\n+test\n Y%01%02%03\n", + dmp.patch_toText(patches)); + + patches = dmp.patch_make("XXXXYYYY", "XXXXtestYYYY"); + assertEquals("patch_addPadding: Both edges none.", + L"@@ -1,8 +1,12 @@\n XXXX\n+test\n YYYY\n", + dmp.patch_toText(patches)); + dmp.patch_addPadding(patches); + assertEquals("patch_addPadding: Both edges none.", + L"@@ -5,8 +5,12 @@\n XXXX\n+test\n YYYY\n", + dmp.patch_toText(patches)); +} + +TEST_F(diff_match_patch_test, testPatchApply) { + dmp.Match_Distance = 1000; + dmp.Match_Threshold = 0.5f; + dmp.Patch_DeleteThreshold = 0.5f; + TPatchVector patches; + patches = dmp.patch_make("", ""); + auto results = dmp.patch_apply(patches, "Hello world."); + auto &&boolArray = results.second; + + std::wstring resultStr = + results.first + L"\t" + std::to_wstring(boolArray.size()); + assertEquals("patch_apply: nullptr case.", L"Hello world.\t0", resultStr); + + patches = dmp.patch_make("The quick brown fox jumps over the lazy dog.", + "That quick brown fox jumped over a lazy dog."); + assertEquals("patch_apply: Exact match.", + L"@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 " + L"@@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n", + dmp.patch_toText(patches)); + + results = + dmp.patch_apply(patches, "The quick brown fox jumps over the lazy dog."); + boolArray = results.second; + resultStr = results.first + NUtils::to_wstring(boolArray); + + assertEquals("patch_apply: Exact match.", + L"That quick brown fox jumped over a lazy dog.\ttrue\ttrue", + resultStr); + + results = dmp.patch_apply(patches, + "The quick red rabbit jumps over the tired tiger."); + boolArray = results.second; + resultStr = results.first + NUtils::to_wstring(boolArray); + assertEquals("patch_apply: Partial match.", + L"That quick red rabbit jumped over a tired tiger.\ttrue\ttrue", + resultStr); + + results = dmp.patch_apply(patches, + "I am the very model of a modern major general."); + boolArray = results.second; + resultStr = results.first + NUtils::to_wstring(boolArray); + assertEquals("patch_apply: Failed match.", + L"I am the very model of a modern major general.\tfalse\tfalse", + resultStr); + + patches = dmp.patch_make( + "x1234567890123456789012345678901234567890123456789012345678901234567890" + "y", + "xabcy"); + results = dmp.patch_apply(patches, + "x123456789012345678901234567890-----++++++++++----" + "-123456789012345678901234567890y"); + boolArray = results.second; + resultStr = results.first + NUtils::to_wstring(boolArray); + assertEquals("patch_apply: Big delete, small change.", L"xabcy\ttrue\ttrue", + resultStr); + + patches = dmp.patch_make( + "x1234567890123456789012345678901234567890123456789012345678901234567890" + "y", + "xabcy"); + results = dmp.patch_apply(patches, + "x12345678901234567890---------------++++++++++----" + "-----------12345678901234567890y"); + boolArray = results.second; + resultStr = results.first + NUtils::to_wstring(boolArray); + assertEquals("patch_apply: Big delete, large change 1.", + L"xabc12345678901234567890---------------++++++++++-------------" + L"--12345678901234567890y\tfalse\ttrue", + resultStr); + + dmp.Patch_DeleteThreshold = 0.6f; + patches = dmp.patch_make( + "x1234567890123456789012345678901234567890123456789012345678901234567890" + "y", + "xabcy"); + results = dmp.patch_apply(patches, + "x12345678901234567890---------------++++++++++----" + "-----------12345678901234567890y"); + boolArray = results.second; + resultStr = results.first + NUtils::to_wstring(boolArray); + assertEquals("patch_apply: Big delete, large change 2.", L"xabcy\ttrue\ttrue", + resultStr); + dmp.Patch_DeleteThreshold = 0.5f; + + dmp.Match_Threshold = 0.0f; + dmp.Match_Distance = 0; + patches = + dmp.patch_make("abcdefghijklmnopqrstuvwxyz--------------------1234567890", + "abcXXXXXXXXXXdefghijklmnopqrstuvwxyz--------------------" + "1234567YYYYYYYYYY890"); + results = dmp.patch_apply( + patches, "ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567890"); + boolArray = results.second; + resultStr = results.first + NUtils::to_wstring(boolArray); + assertEquals("patch_apply: Compensate for failed patch.", + L"ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------" + L"1234567YYYYYYYYYY890\tfalse\ttrue", + resultStr); + dmp.Match_Threshold = 0.5f; + dmp.Match_Distance = 1000; + + patches = dmp.patch_make("", "test"); + std::wstring patchStr = dmp.patch_toText(patches); + dmp.patch_apply(patches, ""); + assertEquals("patch_apply: No side effects.", patchStr, + dmp.patch_toText(patches)); + + patches = + dmp.patch_make("The quick brown fox jumps over the lazy dog.", "Woof"); + patchStr = dmp.patch_toText(patches); + dmp.patch_apply(patches, "The quick brown fox jumps over the lazy dog."); + assertEquals("patch_apply: No side effects with major delete.", patchStr, + dmp.patch_toText(patches)); + + patches = dmp.patch_make("", "test"); + results = dmp.patch_apply(patches, ""); + boolArray = results.second; + resultStr = results.first + L"\t" + NUtils::to_wstring(boolArray[0], false); + assertEquals("patch_apply: Edge exact match.", L"test\ttrue", resultStr); + + patches = dmp.patch_make("XY", "XtestY"); + results = dmp.patch_apply(patches, "XY"); + boolArray = results.second; + resultStr = results.first + L"\t" + NUtils::to_wstring(boolArray[0], false); + assertEquals("patch_apply: Near edge exact match.", L"XtestY\ttrue", + resultStr); + + patches = dmp.patch_make("y", "y123"); + results = dmp.patch_apply(patches, "x"); + boolArray = results.second; + resultStr = results.first + L"\t" + NUtils::to_wstring(boolArray[0]); + assertEquals("patch_apply: Edge partial match.", L"x123\ttrue", resultStr); +} - auto diffs = dmp.diff_main( text1, text2, false ); - patches = dmp.patch_make( diffs ); - assertEquals( "patch_make: Diff input", expectedPatch, dmp.patch_toText( patches ) ); - - patches = dmp.patch_make( text1, diffs ); - assertEquals( "patch_make: Text1+Diff inputs", expectedPatch, dmp.patch_toText( patches ) ); - - patches = dmp.patch_make( text1, text2, diffs ); - assertEquals( "patch_make: Text1+Text2+Diff inputs (deprecated)", expectedPatch, dmp.patch_toText( patches ) ); - - patches = dmp.patch_make( "`1234567890-=[]\\;',./", "~!@#$%^&*()_+{}|:\"<>?" ); - assertEquals( "patch_toText: Character encoding.", L"@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n", dmp.patch_toText( patches ) ); - - diffs = { Diff( DELETE, "`1234567890-=[]\\;',./" ), Diff( INSERT, "~!@#$%^&*()_+{}|:\"<>?" ) }; - assertEquals( "patch_fromText: Character decoding.", diffs, dmp.patch_fromText( "@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n" )[ 0 ].diffs ); - - text1 = {}; - for ( int x = 0; x < 100; x++ ) - { - text1 += L"abcdef"; - } - text2 = text1 + L"123"; - expectedPatch = L"@@ -573,28 +573,31 @@\n cdefabcdefabcdefabcdefabcdef\n+123\n"; - patches = dmp.patch_make( text1, text2 ); - assertEquals( "patch_make: Long string with repeats.", expectedPatch, dmp.patch_toText( patches ) ); -} - -TEST_F( diff_match_patch_test, testPatchSplitMax ) -{ - // Confirm Match_MaxBits is 32. - TPatchVector patches; - patches = dmp.patch_make( "abcdefghijklmnopqrstuvwxyz01234567890", "XabXcdXefXghXijXklXmnXopXqrXstXuvXwxXyzX01X23X45X67X89X0" ); - dmp.patch_splitMax( patches ); - assertEquals( "patch_splitMax: #1.", L"@@ -1,32 +1,46 @@\n+X\n ab\n+X\n cd\n+X\n ef\n+X\n gh\n+X\n ij\n+X\n kl\n+X\n mn\n+X\n op\n+X\n qr\n+X\n st\n+X\n uv\n+X\n wx\n+X\n yz\n+X\n 012345\n@@ -25,13 +39,18 @@\n zX01\n+X\n 23\n+X\n 45\n+X\n 67\n+X\n 89\n+X\n 0\n", dmp.patch_toText( patches ) ); - - patches = dmp.patch_make( "abcdef1234567890123456789012345678901234567890123456789012345678901234567890uvwxyz", "abcdefuvwxyz" ); - std::wstring oldToText = dmp.patch_toText( patches ); - dmp.patch_splitMax( patches ); - assertEquals( "patch_splitMax: #2.", oldToText, dmp.patch_toText( patches ) ); - - patches = dmp.patch_make( "1234567890123456789012345678901234567890123456789012345678901234567890", "abc" ); - dmp.patch_splitMax( patches ); - assertEquals( "patch_splitMax: #3.", L"@@ -1,32 +1,4 @@\n-1234567890123456789012345678\n 9012\n@@ -29,32 +1,4 @@\n-9012345678901234567890123456\n 7890\n@@ -57,14 +1,3 @@\n-78901234567890\n+abc\n", dmp.patch_toText( patches ) ); - - patches = dmp.patch_make( "abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1", "abcdefghij , h : 1 , t : 1 abcdefghij , h : 1 , t : 1 abcdefghij , h : 0 , t : 1" ); - dmp.patch_splitMax( patches ); - assertEquals( "patch_splitMax: #4.", L"@@ -2,32 +2,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n@@ -29,32 +29,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n", dmp.patch_toText( patches ) ); -} - -TEST_F( diff_match_patch_test, testPatchAddPadding ) -{ - TPatchVector patches; - patches = dmp.patch_make( "", "test" ); - assertEquals( "patch_addPadding: Both edges ful", L"@@ -0,0 +1,4 @@\n+test\n", dmp.patch_toText( patches ) ); - dmp.patch_addPadding( patches ); - assertEquals( "patch_addPadding: Both edges full.", L"@@ -1,8 +1,12 @@\n %01%02%03%04\n+test\n %01%02%03%04\n", dmp.patch_toText( patches ) ); - - patches = dmp.patch_make( "XY", "XtestY" ); - assertEquals( "patch_addPadding: Both edges partial.", L"@@ -1,2 +1,6 @@\n X\n+test\n Y\n", dmp.patch_toText( patches ) ); - dmp.patch_addPadding( patches ); - assertEquals( "patch_addPadding: Both edges partial.", L"@@ -2,8 +2,12 @@\n %02%03%04X\n+test\n Y%01%02%03\n", dmp.patch_toText( patches ) ); - - patches = dmp.patch_make( "XXXXYYYY", "XXXXtestYYYY" ); - assertEquals( "patch_addPadding: Both edges none.", L"@@ -1,8 +1,12 @@\n XXXX\n+test\n YYYY\n", dmp.patch_toText( patches ) ); - dmp.patch_addPadding( patches ); - assertEquals( "patch_addPadding: Both edges none.", L"@@ -5,8 +5,12 @@\n XXXX\n+test\n YYYY\n", dmp.patch_toText( patches ) ); -} - -TEST_F( diff_match_patch_test, testPatchApply ) -{ - dmp.Match_Distance = 1000; - dmp.Match_Threshold = 0.5f; - dmp.Patch_DeleteThreshold = 0.5f; - TPatchVector patches; - patches = dmp.patch_make( "", "" ); - auto results = dmp.patch_apply( patches, "Hello world." ); - auto &&boolArray = results.second; - - std::wstring resultStr = results.first + L"\t" + std::to_wstring( boolArray.size() ); - assertEquals( "patch_apply: nullptr case.", L"Hello world.\t0", resultStr ); - - patches = dmp.patch_make( "The quick brown fox jumps over the lazy dog.", "That quick brown fox jumped over a lazy dog." ); - assertEquals( "patch_apply: Exact match.", L"@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n", dmp.patch_toText( patches ) ); - - results = dmp.patch_apply( patches, "The quick brown fox jumps over the lazy dog." ); - boolArray = results.second; - resultStr = results.first + NUtils::to_wstring( boolArray ); - - assertEquals( "patch_apply: Exact match.", L"That quick brown fox jumped over a lazy dog.\ttrue\ttrue", resultStr ); - - results = dmp.patch_apply( patches, "The quick red rabbit jumps over the tired tiger." ); - boolArray = results.second; - resultStr = results.first + NUtils::to_wstring( boolArray ); - assertEquals( "patch_apply: Partial match.", L"That quick red rabbit jumped over a tired tiger.\ttrue\ttrue", resultStr ); - - results = dmp.patch_apply( patches, "I am the very model of a modern major general." ); - boolArray = results.second; - resultStr = results.first + NUtils::to_wstring( boolArray ); - assertEquals( "patch_apply: Failed match.", L"I am the very model of a modern major general.\tfalse\tfalse", resultStr ); - - patches = dmp.patch_make( "x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy" ); - results = dmp.patch_apply( patches, "x123456789012345678901234567890-----++++++++++-----123456789012345678901234567890y" ); - boolArray = results.second; - resultStr = results.first + NUtils::to_wstring( boolArray ); - assertEquals( "patch_apply: Big delete, small change.", L"xabcy\ttrue\ttrue", resultStr ); - - patches = dmp.patch_make( "x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy" ); - results = dmp.patch_apply( patches, "x12345678901234567890---------------++++++++++---------------12345678901234567890y" ); - boolArray = results.second; - resultStr = results.first + NUtils::to_wstring( boolArray ); - assertEquals( "patch_apply: Big delete, large change 1.", L"xabc12345678901234567890---------------++++++++++---------------12345678901234567890y\tfalse\ttrue", resultStr ); - - dmp.Patch_DeleteThreshold = 0.6f; - patches = dmp.patch_make( "x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy" ); - results = dmp.patch_apply( patches, "x12345678901234567890---------------++++++++++---------------12345678901234567890y" ); - boolArray = results.second; - resultStr = results.first + NUtils::to_wstring( boolArray ); - assertEquals( "patch_apply: Big delete, large change 2.", L"xabcy\ttrue\ttrue", resultStr ); - dmp.Patch_DeleteThreshold = 0.5f; - - dmp.Match_Threshold = 0.0f; - dmp.Match_Distance = 0; - patches = dmp.patch_make( "abcdefghijklmnopqrstuvwxyz--------------------1234567890", "abcXXXXXXXXXXdefghijklmnopqrstuvwxyz--------------------1234567YYYYYYYYYY890" ); - results = dmp.patch_apply( patches, "ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567890" ); - boolArray = results.second; - resultStr = results.first + NUtils::to_wstring( boolArray ); - assertEquals( "patch_apply: Compensate for failed patch.", L"ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567YYYYYYYYYY890\tfalse\ttrue", resultStr ); - dmp.Match_Threshold = 0.5f; - dmp.Match_Distance = 1000; - - patches = dmp.patch_make( "", "test" ); - std::wstring patchStr = dmp.patch_toText( patches ); - dmp.patch_apply( patches, "" ); - assertEquals( "patch_apply: No side effects.", patchStr, dmp.patch_toText( patches ) ); - - patches = dmp.patch_make( "The quick brown fox jumps over the lazy dog.", "Woof" ); - patchStr = dmp.patch_toText( patches ); - dmp.patch_apply( patches, "The quick brown fox jumps over the lazy dog." ); - assertEquals( "patch_apply: No side effects with major delete.", patchStr, dmp.patch_toText( patches ) ); - - patches = dmp.patch_make( "", "test" ); - results = dmp.patch_apply( patches, "" ); - boolArray = results.second; - resultStr = results.first + L"\t" + NUtils::to_wstring( boolArray[ 0 ], false ); - assertEquals( "patch_apply: Edge exact match.", L"test\ttrue", resultStr ); - - patches = dmp.patch_make( "XY", "XtestY" ); - results = dmp.patch_apply( patches, "XY" ); - boolArray = results.second; - resultStr = results.first + L"\t" + NUtils::to_wstring( boolArray[ 0 ], false ); - assertEquals( "patch_apply: Near edge exact match.", L"XtestY\ttrue", resultStr ); - - patches = dmp.patch_make( "y", "y123" ); - results = dmp.patch_apply( patches, "x" ); - boolArray = results.second; - resultStr = results.first + L"\t" + NUtils::to_wstring( boolArray[ 0 ] ); - assertEquals( "patch_apply: Edge partial match.", L"x123\ttrue", resultStr ); -} - -TEST_F( diff_match_patch_test, fromGitHubExamples ) -{ - auto lhs = L"I am the very model of a modern Major-General, I've information vegetable, animal, and mineral, I know the kings of England, and I quote the fights historical, From Marathon to Waterloo, in order categorical."; - auto rhs = L"I am the very model of a cartoon individual, My animation's comical, unusual, and whimsical, I'm quite adept at funny gags, comedic theory I have read, From wicked puns and stupid jokes to anvils that drop on your head."; - auto diffs = dmp.diff_main( lhs, rhs ); - dmp.diff_cleanupSemantic( diffs ); - auto html = dmp.diff_prettyHtml( diffs ); - auto delta = dmp.diff_toDelta( diffs ); - auto htmlGolden = LR"(I am the very model of a modern Major-General, I've information vegetable, animal, and mineral, I know the kings of England, and I quote the fights historical, From Marathon to Waterloo, in order categoricalcartoon individual, My animation's comical, unusual, and whimsical, I'm quite adept at funny gags, comedic theory I have read, From wicked puns and stupid jokes to anvils that drop on your head.)"; - assertEquals( "gitHubDemos", htmlGolden, html ); - auto deltaGolden = L"=25\t-182\t+cartoon individual, My animation's comical, unusual, and whimsical, I'm quite adept at funny gags, comedic theory I have read, From wicked puns and stupid jokes to anvils that drop on your head\t=1"; - assertEquals( "gitHubDemos", deltaGolden, delta ); - - auto patches = dmp.patch_make( lhs, rhs ); - auto patch = dmp.patch_toText( patches ); - auto patchGolden = L"@@ -22,187 +22,198 @@\n f a \n-modern Major-General, I've information vegetable, animal, and mineral, I know the kings of England, and I quote the fights historical, From Marathon to Waterloo, in order categorical\n+cartoon individual, My animation's comical, unusual, and whimsical, I'm quite adept at funny gags, comedic theory I have read, From wicked puns and stupid jokes to anvils that drop on your head\n .\n"; - assertEquals( "gitHubDemos", patchGolden, patch ); +TEST_F(diff_match_patch_test, fromGitHubExamples) { + auto lhs = + L"I am the very model of a modern Major-General, I've information " + L"vegetable, animal, and mineral, I know the kings of England, and I " + L"quote the fights historical, From Marathon to Waterloo, in order " + L"categorical."; + auto rhs = + L"I am the very model of a cartoon individual, My animation's comical, " + L"unusual, and whimsical, I'm quite adept at funny gags, comedic theory " + L"I have read, From wicked puns and stupid jokes to anvils that drop on " + L"your head."; + auto diffs = dmp.diff_main(lhs, rhs); + dmp.diff_cleanupSemantic(diffs); + auto html = dmp.diff_prettyHtml(diffs); + auto delta = dmp.diff_toDelta(diffs); + auto htmlGolden = + LR"(I am the very model of a modern Major-General, I've information vegetable, animal, and mineral, I know the kings of England, and I quote the fights historical, From Marathon to Waterloo, in order categoricalcartoon individual, My animation's comical, unusual, and whimsical, I'm quite adept at funny gags, comedic theory I have read, From wicked puns and stupid jokes to anvils that drop on your head.)"; + assertEquals("gitHubDemos", htmlGolden, html); + auto deltaGolden = + L"=25\t-182\t+cartoon individual, My animation's comical, unusual, and " + L"whimsical, I'm quite adept at funny gags, comedic theory I have read, " + L"From wicked puns and stupid jokes to anvils that drop on your head\t=1"; + assertEquals("gitHubDemos", deltaGolden, delta); + + auto patches = dmp.patch_make(lhs, rhs); + auto patch = dmp.patch_toText(patches); + auto patchGolden = + L"@@ -22,187 +22,198 @@\n f a \n-modern Major-General, I've information " + L"vegetable, animal, and mineral, I know the kings of England, and I " + L"quote the fights historical, From Marathon to Waterloo, in order " + L"categorical\n+cartoon individual, My animation's comical, unusual, and " + L"whimsical, I'm quite adept at funny gags, comedic theory I have read, " + L"From wicked puns and stupid jokes to anvils that drop on your head\n " + L".\n"; + assertEquals("gitHubDemos", patchGolden, patch); } diff --git a/cpp17/diff_match_patch_test.h b/cpp17/diff_match_patch_test.h index 35110051..0ac1848b 100644 --- a/cpp17/diff_match_patch_test.h +++ b/cpp17/diff_match_patch_test.h @@ -20,148 +20,145 @@ #define DIFF_MATCH_PATCH_TEST_H #ifdef USE_GTEST - #include "gtest/gtest.h" - #define assertEquals( msg, GOLDEN, COMPUTED ) EXPECT_EQ( GOLDEN, COMPUTED ) << msg - #define assertEmpty( msg, COMPUTED ) EXPECT_TRUE( COMPUTED.empty() ) << msg - #define assertTrue( msg, COMPUTED ) EXPECT_TRUE( COMPUTED ) << msg - #define assertFalse( msg, COMPUTED ) EXPECT_FALSE( COMPUTED ) << msg - #define PUBLIC_TESTING : public testing::Test - #define assertThrow( msg, STATEMENT, EXCEPTION_TYPE ) EXPECT_THROW( STATEMENT, EXCEPTION_TYPE ) << msg +#include "gtest/gtest.h" +#define assertEquals(msg, GOLDEN, COMPUTED) EXPECT_EQ(GOLDEN, COMPUTED) << msg +#define assertEmpty(msg, COMPUTED) EXPECT_TRUE(COMPUTED.empty()) << msg +#define assertTrue(msg, COMPUTED) EXPECT_TRUE(COMPUTED) << msg +#define assertFalse(msg, COMPUTED) EXPECT_FALSE(COMPUTED) << msg +#define PUBLIC_TESTING : public testing::Test +#define assertThrow(msg, STATEMENT, EXCEPTION_TYPE) \ + EXPECT_THROW(STATEMENT, EXCEPTION_TYPE) << msg #else - #include - #define PUBLIC_TESTING - #define TEST_F( className, funcName ) void diff_match_patch_test::funcName() +#include +#define PUBLIC_TESTING +#define TEST_F(className, funcName) void diff_match_patch_test::funcName() #endif -class diff_match_patch_test PUBLIC_TESTING -{ -public: - using TStringVector = diff_match_patch::TStringVector; - using TCharPosMap = diff_match_patch::TCharPosMap; - using TVariant = diff_match_patch::TVariant; - using TVariantVector = diff_match_patch::TVariantVector; +class diff_match_patch_test PUBLIC_TESTING { + public: + using TStringVector = diff_match_patch::TStringVector; + using TCharPosMap = diff_match_patch::TCharPosMap; + using TVariant = diff_match_patch::TVariant; + using TVariantVector = diff_match_patch::TVariantVector; - diff_match_patch_test(); + diff_match_patch_test(); #ifndef USE_GTEST -public: - int run_all_tests(); - - // DIFF TEST FUNCTIONS - void testDiffCommonPrefix(); - void testDiffCommonSuffix(); - void testDiffCommonOverlap(); - void testDiffHalfmatch(); - void testDiffLinesToChars(); - void testDiffCharsToLines(); - void testDiffCleanupMerge(); - void testDiffCleanupSemanticLossless(); - void testDiffCleanupSemantic(); - void testDiffCleanupEfficiency(); - void testDiffPrettyHtml(); - void testDiffText(); - void testDiffDelta(); - void testDiffXIndex(); - void testDiffLevenshtein(); - void testDiffBisect(); - void testDiffMain(); - - // MATCH TEST FUNCTIONS - void testMatchAlphabet(); - void testMatchBitap(); - void testMatchMain(); - - // PATCH TEST FUNCTIONS - void testPatchObj(); - void testPatchFromText(); - void testPatchToText(); - void testPatchAddContext(); - void testPatchMake(); - void testPatchSplitMax(); - void testPatchAddPadding(); - void testPatchApply(); - -private: - bool runTest( std::function< void() > test ); - std::size_t numPassedTests{ 0 }; - std::size_t numFailedTests{ 0 }; - - // Define equality. - template< typename T > - void assertEquals( const std::string &strCase, const T &lhs, const T &rhs ) - { - bool failed = ( lhs.size() != rhs.size() ); - if ( !failed ) - { - for ( auto ii = 0ULL; !failed && ( ii < lhs.size() ); ++ii ) - { - auto &&t1 = lhs[ ii ]; - auto &&t2 = rhs[ ii ]; - failed = t1 != t2; - } - } - - if ( failed ) - { - // Build human readable description of both lists. - auto lhsString = NUtils::to_wstring( lhs, true ); - auto rhsString = NUtils::to_wstring( rhs, true ); - reportFailure( strCase, lhsString, rhsString ); - return; - } - reportPassed( strCase ); + public: + int run_all_tests(); + + // DIFF TEST FUNCTIONS + void testDiffCommonPrefix(); + void testDiffCommonSuffix(); + void testDiffCommonOverlap(); + void testDiffHalfmatch(); + void testDiffLinesToChars(); + void testDiffCharsToLines(); + void testDiffCleanupMerge(); + void testDiffCleanupSemanticLossless(); + void testDiffCleanupSemantic(); + void testDiffCleanupEfficiency(); + void testDiffPrettyHtml(); + void testDiffText(); + void testDiffDelta(); + void testDiffXIndex(); + void testDiffLevenshtein(); + void testDiffBisect(); + void testDiffMain(); + + // MATCH TEST FUNCTIONS + void testMatchAlphabet(); + void testMatchBitap(); + void testMatchMain(); + + // PATCH TEST FUNCTIONS + void testPatchObj(); + void testPatchFromText(); + void testPatchToText(); + void testPatchAddContext(); + void testPatchMake(); + void testPatchSplitMax(); + void testPatchAddPadding(); + void testPatchApply(); + + private: + bool runTest(std::function test); + std::size_t numPassedTests{0}; + std::size_t numFailedTests{0}; + + // Define equality. + template + void assertEquals(const std::string &strCase, const T &lhs, const T &rhs) { + bool failed = (lhs.size() != rhs.size()); + if (!failed) { + for (auto ii = 0ULL; !failed && (ii < lhs.size()); ++ii) { + auto &&t1 = lhs[ii]; + auto &&t2 = rhs[ii]; + failed = t1 != t2; + } } - void assertEquals( const std::string &strCase, bool lhs, bool rhs ); - void assertEquals( const std::string &strCase, std::size_t n1, std::size_t n2 ); - void assertEquals( const std::string &strCase, const std::wstring &s1, const std::wstring &s2 ); - void assertEquals( const std::string &strCase, const std::string &s1, const std::string &s2 ); - void assertEquals( const std::string &strCase, const Diff &d1, const Diff &d2 ); - void assertEquals( const std::string &strCase, const TVariant &var1, const TVariant &var2 ); - void assertEquals( const std::string &strCase, const TCharPosMap &m1, const TCharPosMap &m2 ); - - void assertTrue( const std::string &strCase, bool value ); - void assertFalse( const std::string &strCase, bool value ); - void assertEmpty( const std::string &strCase, const TStringVector &list ); - - void reportFailure( const std::string &strCase, const std::wstring &expected, const std::wstring &actual ); - void reportPassed( const std::string &strCase ); - - #define assertThrow( msg, COMMAND, EXCEPTION_TYPE ) \ - { \ - bool exceptionTriggered = false; \ - try \ - { \ - COMMAND;\ - assertFalse( msg, true ); \ - } \ - catch ( const EXCEPTION_TYPE &ex ) \ - { \ - exceptionTriggered = true; \ - } \ - assertTrue( std::string( msg ) + std::string( " - Exception triggered" ), exceptionTriggered ); \ - } + if (failed) { + // Build human readable description of both lists. + auto lhsString = NUtils::to_wstring(lhs, true); + auto rhsString = NUtils::to_wstring(rhs, true); + reportFailure(strCase, lhsString, rhsString); + return; + } + reportPassed(strCase); + } + + void assertEquals(const std::string &strCase, bool lhs, bool rhs); + void assertEquals(const std::string &strCase, std::size_t n1, std::size_t n2); + void assertEquals(const std::string &strCase, const std::wstring &s1, + const std::wstring &s2); + void assertEquals(const std::string &strCase, const std::string &s1, + const std::string &s2); + void assertEquals(const std::string &strCase, const Diff &d1, const Diff &d2); + void assertEquals(const std::string &strCase, const TVariant &var1, + const TVariant &var2); + void assertEquals(const std::string &strCase, const TCharPosMap &m1, + const TCharPosMap &m2); + + void assertTrue(const std::string &strCase, bool value); + void assertFalse(const std::string &strCase, bool value); + void assertEmpty(const std::string &strCase, const TStringVector &list); + + void reportFailure(const std::string &strCase, const std::wstring &expected, + const std::wstring &actual); + void reportPassed(const std::string &strCase); + +#define assertThrow(msg, COMMAND, EXCEPTION_TYPE) \ + { \ + bool exceptionTriggered = false; \ + try { \ + COMMAND; \ + assertFalse(msg, true); \ + } catch (const EXCEPTION_TYPE &ex) { \ + exceptionTriggered = true; \ + } \ + assertTrue(std::string(msg) + std::string(" - Exception triggered"), \ + exceptionTriggered); \ + } #endif -public: - bool equals( const TVariant &var1, const TVariant &var2 ); - - template< typename T > - bool equals( const T &lhs, const T &rhs ) - { - bool equal = ( lhs.size() == rhs.size() ); - for ( auto ii = 0ULL; equal && ( ii < lhs.size() ); ++ii ) - { - auto &&t1 = lhs[ ii ]; - auto &&t2 = rhs[ ii ]; - equal = t1 == t2; - } - return equal; + public: + bool equals(const TVariant &var1, const TVariant &var2); + + template + bool equals(const T &lhs, const T &rhs) { + bool equal = (lhs.size() == rhs.size()); + for (auto ii = 0ULL; equal && (ii < lhs.size()); ++ii) { + auto &&t1 = lhs[ii]; + auto &&t2 = rhs[ii]; + equal = t1 == t2; } - diff_match_patch dmp; + return equal; + } + diff_match_patch dmp; - // Construct the two texts which made up the diff originally. - TStringVector diff_rebuildtexts( const TDiffVector &diffs ); + // Construct the two texts which made up the diff originally. + TStringVector diff_rebuildtexts(const TDiffVector &diffs); }; -#endif // DIFF_MATCH_PATCH_TEST_H +#endif // DIFF_MATCH_PATCH_TEST_H diff --git a/cpp17/diff_match_patch_test_assertEquals.cpp b/cpp17/diff_match_patch_test_assertEquals.cpp index 3665956f..4e01f687 100644 --- a/cpp17/diff_match_patch_test_assertEquals.cpp +++ b/cpp17/diff_match_patch_test_assertEquals.cpp @@ -16,139 +16,137 @@ * limitations under the License. */ +#include + #include "diff_match_patch.h" -#include "diff_match_patch_utils.h" #include "diff_match_patch_test.h" - -#include +#include "diff_match_patch_utils.h" #ifndef USE_GTEST -void diff_match_patch_test::reportFailure( const std::string &strCase, const std::wstring &expected, const std::wstring &actual ) -{ - std::cout << "FAILED : " + strCase + "\n"; - std::wcerr << " Expected: " << expected << "\n Actual: " << actual << "\n"; - numFailedTests++; - //throw strCase; +void diff_match_patch_test::reportFailure(const std::string &strCase, + const std::wstring &expected, + const std::wstring &actual) { + std::cout << "FAILED : " + strCase + "\n"; + std::wcerr << " Expected: " << expected << "\n Actual: " << actual + << "\n"; + numFailedTests++; + // throw strCase; } -void diff_match_patch_test::reportPassed( const std::string &strCase ) -{ - std::cout << "PASSED: " + strCase + "\n"; +void diff_match_patch_test::reportPassed(const std::string &strCase) { + std::cout << "PASSED: " + strCase + "\n"; } -void diff_match_patch_test::assertEquals( const std::string &strCase, std::size_t n1, std::size_t n2 ) -{ - if ( n1 != n2 ) - { - reportFailure( strCase, std::to_wstring( n1 ), std::to_wstring( n2 ) ); - } - reportPassed( strCase ); +void diff_match_patch_test::assertEquals(const std::string &strCase, + std::size_t n1, std::size_t n2) { + if (n1 != n2) { + reportFailure(strCase, std::to_wstring(n1), std::to_wstring(n2)); + } + reportPassed(strCase); } -void diff_match_patch_test::assertEquals( const std::string &strCase, const std::wstring &s1, const std::wstring &s2 ) -{ - if ( s1 != s2 ) - { - reportFailure( strCase, s1, s2 ); - } - reportPassed( strCase ); +void diff_match_patch_test::assertEquals(const std::string &strCase, + const std::wstring &s1, + const std::wstring &s2) { + if (s1 != s2) { + reportFailure(strCase, s1, s2); + } + reportPassed(strCase); } -void diff_match_patch_test::assertEquals( const std::string &strCase, const std::string &s1, const std::string &s2 ) -{ - return assertEquals( strCase, NUtils::to_wstring( s1 ), NUtils::to_wstring( s2 ) ); +void diff_match_patch_test::assertEquals(const std::string &strCase, + const std::string &s1, + const std::string &s2) { + return assertEquals(strCase, NUtils::to_wstring(s1), NUtils::to_wstring(s2)); } -void diff_match_patch_test::assertEquals( const std::string &strCase, const Diff &d1, const Diff &d2 ) -{ - if ( d1 != d2 ) - { - reportFailure( strCase, d1.toString(), d2.toString() ); - } - reportPassed( strCase ); +void diff_match_patch_test::assertEquals(const std::string &strCase, + const Diff &d1, const Diff &d2) { + if (d1 != d2) { + reportFailure(strCase, d1.toString(), d2.toString()); + } + reportPassed(strCase); } -void diff_match_patch_test::assertEquals( const std::string &strCase, const TVariant &var1, const TVariant &var2 ) -{ - if ( var1 != var2 ) - { - reportFailure( strCase, NUtils::to_wstring( var1 ), NUtils::to_wstring( var2 ) ); - } - reportPassed( strCase ); +void diff_match_patch_test::assertEquals(const std::string &strCase, + const TVariant &var1, + const TVariant &var2) { + if (var1 != var2) { + reportFailure(strCase, NUtils::to_wstring(var1), NUtils::to_wstring(var2)); + } + reportPassed(strCase); } -void diff_match_patch_test::assertEquals( const std::string &strCase, const TCharPosMap &m1, const TCharPosMap &m2 ) -{ - for ( auto &&ii : m1 ) - { - auto rhs = m2.find( ii.first ); - if ( rhs == m2.end() ) - { - reportFailure( strCase, L"(" + NUtils::to_wstring( ii.first ) + L"," + std::to_wstring( ii.second ) + L")", L"" ); - } +void diff_match_patch_test::assertEquals(const std::string &strCase, + const TCharPosMap &m1, + const TCharPosMap &m2) { + for (auto &&ii : m1) { + auto rhs = m2.find(ii.first); + if (rhs == m2.end()) { + reportFailure(strCase, + L"(" + NUtils::to_wstring(ii.first) + L"," + + std::to_wstring(ii.second) + L")", + L""); } - - for ( auto &&ii : m2 ) - { - auto rhs = m1.find( ii.first ); - if ( rhs == m1.end() ) - { - reportFailure( strCase, L"(" + NUtils::to_wstring( ii.first ) + L"," + std::to_wstring( ii.second ) + L")", L"" ); - } + } + + for (auto &&ii : m2) { + auto rhs = m1.find(ii.first); + if (rhs == m1.end()) { + reportFailure(strCase, + L"(" + NUtils::to_wstring(ii.first) + L"," + + std::to_wstring(ii.second) + L")", + L""); } + } - reportPassed( strCase ); + reportPassed(strCase); } -void diff_match_patch_test::assertEquals( const std::string &strCase, bool lhs, bool rhs ) -{ - if ( lhs != rhs ) - { - reportFailure( strCase, NUtils::to_wstring( lhs, false ), NUtils::to_wstring( rhs, false ) ); - } - reportPassed( strCase ); +void diff_match_patch_test::assertEquals(const std::string &strCase, bool lhs, + bool rhs) { + if (lhs != rhs) { + reportFailure(strCase, NUtils::to_wstring(lhs, false), + NUtils::to_wstring(rhs, false)); + } + reportPassed(strCase); } -void diff_match_patch_test::assertTrue( const std::string &strCase, bool value ) -{ - if ( !value ) - { - reportFailure( strCase, NUtils::to_wstring( true, false ), NUtils::to_wstring( false, false ) ); - } - reportPassed( strCase ); +void diff_match_patch_test::assertTrue(const std::string &strCase, bool value) { + if (!value) { + reportFailure(strCase, NUtils::to_wstring(true, false), + NUtils::to_wstring(false, false)); + } + reportPassed(strCase); } -void diff_match_patch_test::assertFalse( const std::string &strCase, bool value ) -{ - if ( value ) - { - reportFailure( strCase, NUtils::to_wstring( false, false ), NUtils::to_wstring( true, false ) ); - } - reportPassed( strCase ); +void diff_match_patch_test::assertFalse(const std::string &strCase, + bool value) { + if (value) { + reportFailure(strCase, NUtils::to_wstring(false, false), + NUtils::to_wstring(true, false)); + } + reportPassed(strCase); } -void diff_match_patch_test::assertEmpty( const std::string &strCase, const TStringVector &list ) -{ - if ( !list.empty() ) - { - throw strCase; - } +void diff_match_patch_test::assertEmpty(const std::string &strCase, + const TStringVector &list) { + if (!list.empty()) { + throw strCase; + } } #endif // Construct the two texts which made up the diff originally. -diff_match_patch_test::TStringVector diff_match_patch_test::diff_rebuildtexts( const TDiffVector &diffs ) -{ - TStringVector text( 2, std::wstring() ); - for ( auto &&myDiff : diffs ) - { - if ( myDiff.operation != INSERT ) - { - text[ 0 ] += myDiff.text; - } - if ( myDiff.operation != DELETE ) - { - text[ 1 ] += myDiff.text; - } +diff_match_patch_test::TStringVector diff_match_patch_test::diff_rebuildtexts( + const TDiffVector &diffs) { + TStringVector text(2, std::wstring()); + for (auto &&myDiff : diffs) { + if (myDiff.operation != INSERT) { + text[0] += myDiff.text; + } + if (myDiff.operation != DELETE) { + text[1] += myDiff.text; } - return text; + } + return text; } diff --git a/cpp17/diff_match_patch_utils.cpp b/cpp17/diff_match_patch_utils.cpp index 9470adee..967fd1eb 100644 --- a/cpp17/diff_match_patch_utils.cpp +++ b/cpp17/diff_match_patch_utils.cpp @@ -18,187 +18,157 @@ #include "diff_match_patch_utils.h" -//#include -//#include -//#include -//#include -//#include -//#include -//#include +// #include +// #include +// #include +// #include +// #include +// #include +// #include #include -namespace NUtils -{ - std::wstring safeMid( const std::wstring &str, std::size_t pos ) - { - return safeMid( str, pos, std::string::npos ); +namespace NUtils { +std::wstring safeMid(const std::wstring &str, std::size_t pos) { + return safeMid(str, pos, std::string::npos); +} + +std::wstring safeMid(const std::wstring &str, std::size_t pos, + std::size_t len) { + return (pos == str.length()) ? std::wstring() : str.substr(pos, len); +} + +void replace(std::wstring &inString, const std::wstring &from, + const std::wstring &to) { + std::size_t pos = inString.find(from); + while (pos != std::wstring::npos) { + inString.replace(pos, from.length(), to); + pos = inString.find(from, pos + 1); + } +} + +wchar_t toHexUpper(wchar_t value) { return L"0123456789ABCDEF"[value & 0xF]; } + +std::wstring toPercentEncoding(wchar_t c, const std::wstring &exclude, + const std::wstring &include) { + std::wstring retVal; + + if (((c >= 0x61 && c <= 0x7A) // ALPHA + || (c >= 0x41 && c <= 0x5A) // ALPHA + || (c >= 0x30 && c <= 0x39) // DIGIT + || c == 0x2D // - + || c == 0x2E // . + || c == 0x5F // _ + || c == 0x7E // ~ + || (exclude.find(c) != std::string::npos)) && + (include.find(c) == std::string::npos)) { + retVal = std::wstring(1, c); + } else { + retVal = L'%'; + retVal += toHexUpper((c & 0xf0) >> 4); + retVal += toHexUpper(c & 0xf); + } + return retVal; +} + +std::wstring toPercentEncoding( + const std::wstring &input, const std::wstring &exclude /*= std::wstring()*/, + const std::wstring &include /*= std::wstring() */) { + if (input.empty()) return {}; + std::wstring retVal; + retVal.reserve(input.length() * 3); + + static_assert(sizeof(wchar_t) <= 4, "wchar_t is greater that 32 bit"); + + std::wstring_convert > utf8_conv; + for (auto &&c : input) { + auto currStr = std::wstring(1, c); + auto asBytes = utf8_conv.to_bytes(currStr); + for (auto &&ii : asBytes) { + if (ii) retVal += toPercentEncoding(ii, exclude, include); } - - std::wstring safeMid( const std::wstring &str, std::size_t pos, std::size_t len ) - { - return ( pos == str.length() ) ? std::wstring() : str.substr( pos, len ); - } - - void replace( std::wstring &inString, const std::wstring &from, const std::wstring &to ) - { - std::size_t pos = inString.find( from ); - while ( pos != std::wstring::npos ) - { - inString.replace( pos, from.length(), to ); - pos = inString.find( from, pos + 1 ); - } - } - - wchar_t toHexUpper( wchar_t value ) - { - return L"0123456789ABCDEF"[ value & 0xF ]; - } - - std::wstring toPercentEncoding( wchar_t c, const std::wstring &exclude, const std::wstring &include ) - { - std::wstring retVal; - - if ( ( ( c >= 0x61 && c <= 0x7A ) // ALPHA - || ( c >= 0x41 && c <= 0x5A ) // ALPHA - || ( c >= 0x30 && c <= 0x39 ) // DIGIT - || c == 0x2D // - - || c == 0x2E // . - || c == 0x5F // _ - || c == 0x7E // ~ - || ( exclude.find( c ) != std::string::npos ) ) - && ( include.find( c ) == std::string::npos ) ) - { - retVal = std::wstring( 1, c ); - } - else - { - retVal = L'%'; - retVal += toHexUpper( ( c & 0xf0 ) >> 4 ); - retVal += toHexUpper( c & 0xf ); - } - return retVal; - } - - std::wstring toPercentEncoding( const std::wstring &input, const std::wstring &exclude /*= std::wstring()*/, const std::wstring &include /*= std::wstring() */ ) - { - if ( input.empty() ) - return {}; - std::wstring retVal; - retVal.reserve( input.length() * 3 ); - - static_assert( sizeof( wchar_t ) <= 4, "wchar_t is greater that 32 bit" ); - - std::wstring_convert< std::codecvt_utf8< wchar_t > > utf8_conv; - for ( auto &&c : input ) - { - auto currStr = std::wstring( 1, c ); - auto asBytes = utf8_conv.to_bytes( currStr ); - for ( auto &&ii : asBytes ) - { - if ( ii ) - retVal += toPercentEncoding( ii, exclude, include ); - } - } - return retVal; - } - - wchar_t getValue( wchar_t ch ) - { - if ( ch >= '0' && ch <= '9' ) - ch -= '0'; - else if ( ch >= 'a' && ch <= 'f' ) - ch = ch - 'a' + 10; - else if ( ch >= 'A' && ch <= 'F' ) - ch = ch - 'A' + 10; - else - throw std::wstring( L"Invalid Character %" ) + ch; - - return ch; + } + return retVal; +} + +wchar_t getValue(wchar_t ch) { + if (ch >= '0' && ch <= '9') + ch -= '0'; + else if (ch >= 'a' && ch <= 'f') + ch = ch - 'a' + 10; + else if (ch >= 'A' && ch <= 'F') + ch = ch - 'A' + 10; + else + throw std::wstring(L"Invalid Character %") + ch; + + return ch; +} + +std::wstring fromPercentEncoding(const std::wstring &input) { + if (input.empty()) return {}; + std::string retVal; + retVal.reserve(input.length()); + for (auto ii = 0ULL; ii < input.length(); ++ii) { + auto c = input[ii]; + if (c == L'%' && (ii + 2) < input.length()) { + auto a = input[++ii]; + auto b = input[++ii]; + a = getValue(a); + b = getValue(b); + a = a << 4; + auto value = a | b; + retVal += std::string(1, value); + } else if (c == '+') + retVal += ' '; + else { + retVal += c; } - - std::wstring fromPercentEncoding( const std::wstring &input ) - { - if ( input.empty() ) - return {}; - std::string retVal; - retVal.reserve( input.length() ); - for ( auto ii = 0ULL; ii < input.length(); ++ii ) - { - auto c = input[ ii ]; - if ( c == L'%' && ( ii + 2 ) < input.length() ) - { - auto a = input[ ++ii ]; - auto b = input[ ++ii ]; - a = getValue( a ); - b = getValue( b ); - a = a << 4; - auto value = a | b; - retVal += std::string( 1, value ); - } - else if ( c == '+' ) - retVal += ' '; - else - { - retVal += c; - } - } - std::wstring_convert< std::codecvt_utf8< wchar_t > > utf8_conv; - auto asBytes = utf8_conv.from_bytes( retVal ); - - return asBytes; - } - - bool endsWith( const std::wstring &string, const std::wstring &suffix ) - { - if ( suffix.length() > string.length() ) - return false; - - return string.compare( string.length() - suffix.length(), suffix.length(), suffix ) == 0; - } - - TStringVector splitString( const std::wstring &string, const std::wstring &separator, bool skipEmptyParts ) - { - if ( separator.empty() ) - { - if ( !skipEmptyParts || !string.empty() ) - return { string }; - return {}; - } - - TStringVector strings; - auto prevPos = 0ULL; - auto startPos = string.find_first_of( separator ); - while ( startPos != std::string::npos ) - { - auto start = prevPos ? prevPos + 1 : prevPos; - auto len = prevPos ? ( startPos - prevPos - 1 ) : startPos; - auto curr = string.substr( start, len ); - prevPos = startPos; - if ( !skipEmptyParts || !curr.empty() ) - strings.emplace_back( curr ); - startPos = string.find_first_of( separator, prevPos + 1 ); - } - auto remainder = string.substr( prevPos ? prevPos + 1 : prevPos ); - if ( !skipEmptyParts || !remainder.empty() ) - strings.emplace_back( remainder ); - - return strings; - } - - int64_t toInt( const std::wstring &string ) - { - int64_t retVal = 0; - try - { - std::size_t lastPos{}; - retVal = std::stoul( string, &lastPos ); - if ( lastPos != string.length() ) - return 0; - } - catch ( ... ) - { - } - return retVal; - } - -} \ No newline at end of file + } + std::wstring_convert > utf8_conv; + auto asBytes = utf8_conv.from_bytes(retVal); + + return asBytes; +} + +bool endsWith(const std::wstring &string, const std::wstring &suffix) { + if (suffix.length() > string.length()) return false; + + return string.compare(string.length() - suffix.length(), suffix.length(), + suffix) == 0; +} + +TStringVector splitString(const std::wstring &string, + const std::wstring &separator, bool skipEmptyParts) { + if (separator.empty()) { + if (!skipEmptyParts || !string.empty()) return {string}; + return {}; + } + + TStringVector strings; + auto prevPos = 0ULL; + auto startPos = string.find_first_of(separator); + while (startPos != std::string::npos) { + auto start = prevPos ? prevPos + 1 : prevPos; + auto len = prevPos ? (startPos - prevPos - 1) : startPos; + auto curr = string.substr(start, len); + prevPos = startPos; + if (!skipEmptyParts || !curr.empty()) strings.emplace_back(curr); + startPos = string.find_first_of(separator, prevPos + 1); + } + auto remainder = string.substr(prevPos ? prevPos + 1 : prevPos); + if (!skipEmptyParts || !remainder.empty()) strings.emplace_back(remainder); + + return strings; +} + +int64_t toInt(const std::wstring &string) { + int64_t retVal = 0; + try { + std::size_t lastPos{}; + retVal = std::stoul(string, &lastPos); + if (lastPos != string.length()) return 0; + } catch (...) { + } + return retVal; +} + +} // namespace NUtils \ No newline at end of file diff --git a/cpp17/diff_match_patch_utils.h b/cpp17/diff_match_patch_utils.h index 30c51c06..7362a32f 100644 --- a/cpp17/diff_match_patch_utils.h +++ b/cpp17/diff_match_patch_utils.h @@ -20,269 +20,267 @@ #ifndef DIFF_MATCH_PATCH_UTILS_H #define DIFF_MATCH_PATCH_UTILS_H // +#include #include #include -#include -namespace NUtils -{ - using TStringVector = std::vector< std::wstring >; - - /* - * Utility functions to replace Qt built in methods - */ - - /** - * A safer version of std::wstring.mid(pos). This one returns "" instead of - * null when the postion equals the string length. - * @param str String to take a substring from. - * @param pos Position to start the substring from. - * @return Substring. - */ - std::wstring safeMid( const std::wstring &str, std::size_t pos ); - - /** - * A safer version of std::wstring.mid(pos, len). This one returns "" instead of - * null when the postion equals the string length. - * @param str String to take a substring from. - * @param pos Position to start the substring from. - * @param len Length of substring. - * @return Substring. - */ - std::wstring safeMid( const std::wstring &str, std::size_t pos, std::size_t len ); - - /** - * replaces QString::replace - * @param haystack String to replace all needles with to - * @param needle Substring to search for in the haystack - * @param to replacement string - * @return void. - */ - void replace( std::wstring &haystack, const std::wstring &needle, const std::wstring &to ); - - /** - * replaces returns the html percent encoded character equivalent - * @param c the input Character to return the encoded string of - * @param exclude The list of chars that are NOT to be encoded - * @param include The list of chars that are to be encoded - * @return the encoded string - */ - std::wstring toPercentEncoding( wchar_t c, const std::wstring &exclude = std::wstring(), const std::wstring &include = std::wstring() ); - - /** - * return the html percent encoded string equivalent - * @param input the input String to return the encoded string of - * @param exclude The list of chars that are NOT to be encoded - * @param include The list of chars that are to be encoded - * @return the encoded string - */ - std::wstring toPercentEncoding( const std::wstring &input, const std::wstring &exclude = std::wstring(), const std::wstring &include = std::wstring() ); - - /** - * returns the string equivalent removing any percent encoding and replacing it with the correct character - * @param input the input String to return the encoded string of - * @return the decoded string - */ - std::wstring fromPercentEncoding( const std::wstring &input ); - - /** - * replaces returns integer value of the character, '0'-'9' = 0-9, 'A'-'F' = 10-15, 'a'-'f' = 10-15 - * @param input the value to return the integer value of - * @return the integer value of the character - */ - wchar_t getIntValue( wchar_t ch ); - - /** - * return the integer value of the string - * @param string the String to be converted to an integer - * @return the integer version, on an invalid input returns 0 - */ - int64_t toInt( const std::wstring &string ); - - /** - * return true if the string has the suffix - * @param string the String to check to see if it ends with suffix - * @param suffix the String to see if the input string ends with - * @return True if the string ends with suffix - */ - bool endsWith( const std::wstring &string, const std::wstring &suffix ); - - /** - * return a TStringVector of the string split by separator - * @param string the String to be split - * @param separator the String to search in the input string to split on - * @param if true, empty values will be removed - * @return the split string - */ - TStringVector splitString( const std::wstring &string, const std::wstring &separator, bool skipEmptyParts ); - - /** - * splices the objects vector into the input vector - * @param input The input vector to splice out from - * @param start The position of the first item to remove from the input vector - * @param count How many values to remove from the input vector - * @param objects optional objects to insert where the previous objects were removed - * @return the character as a single character string - */ - template< typename T > - static std::vector< T > Splice( std::vector< T > &input, std::size_t start, std::size_t count, const std::vector< T > &objects = {} ) - { - auto deletedRange = std::vector< T >( { input.begin() + start, input.begin() + start + count } ); - input.erase( input.begin() + start, input.begin() + start + count ); - input.insert( input.begin() + start, objects.begin(), objects.end() ); - - return deletedRange; - } - - /** - * splices the objects vector into the input vector - * @param input The input vector to splice out from - * @param start The position of the first item to remove from the input vector - * @param count How many values to remove from the input vector - * @param object individual object to insert where the previous objects were removed - * @return the character as a single character string - */ - template< typename T > - static std::vector< T > Splice( std::vector< T > &input, std::size_t start, std::size_t count, const T &object ) - { - return Splice( input, start, count, std::vector< T >( { object } ) ); - } - - template< typename T > - std::wstring to_wstring( const T & /*value*/, bool /*doubleQuoteEmpty*/ ) - { - assert( false ); - return {}; - } - - /** - * return the single character wide string for the given character - * @param value the char to be converted to an wstring - * @param doubleQuoteEmpty, if the return value would be empty, return "" - * @return the character as a single character string - */ - inline std::wstring to_wstring( const char &value, bool doubleQuoteEmpty ) - { - if ( doubleQuoteEmpty && ( value == 0 ) ) - return LR"("")"; - - return std::wstring( 1, static_cast< wchar_t >( value ) ); - } - - template<> - inline std::wstring to_wstring( const bool &value, bool /*doubleQuoteOnEmpty*/ ) - { - std::wstring retVal = std::wstring( value ? L"true" : L"false" ); - return retVal; - } - - template<> - inline std::wstring to_wstring( const std::vector< bool >::reference &value, bool /*doubleQuoteOnEmpty*/ ) - { - std::wstring retVal = std::wstring( value ? L"true" : L"false" ); - return retVal; - } - - template<> - inline std::wstring to_wstring( const std::string &string, bool doubleQuoteEmpty ) - { - if ( doubleQuoteEmpty && string.empty() ) - return LR"("")"; - - std::wstring wstring( string.size(), L' ' ); // Overestimate number of code points. - wstring.resize( std::mbstowcs( &wstring[ 0 ], string.c_str(), string.size() ) ); // Shrink to fit. - return wstring; - } - - template<> - inline std::wstring to_wstring( const wchar_t &value, bool doubleQuoteEmpty ) - { - if ( doubleQuoteEmpty && ( value == 0 ) ) - return LR"("")"; - - return std::wstring( 1, value ); - } - - template<> - inline std::wstring to_wstring( const int &value, bool doubleQuoteEmpty ) - { - return to_wstring( static_cast< wchar_t >( value ), doubleQuoteEmpty ); - } +namespace NUtils { +using TStringVector = std::vector; - template<> - inline std::wstring to_wstring( const std::wstring &value, bool doubleQuoteEmpty ) - { - if ( doubleQuoteEmpty && value.empty() ) - return LR"("")"; - - return value; - } +/* + * Utility functions to replace Qt built in methods + */ - template< typename T > - inline std::wstring to_wstring( const std::vector< T > &values, bool doubleQuoteEmpty ) - { - std::wstring retVal = L"("; - bool first = true; - for ( auto &&curr : values ) - { - if ( !first ) - { - retVal += L", "; - } - retVal += to_wstring( curr, doubleQuoteEmpty ); - first = false; - } - retVal += L")"; - return retVal; - } +/** + * A safer version of std::wstring.mid(pos). This one returns "" instead of + * null when the postion equals the string length. + * @param str String to take a substring from. + * @param pos Position to start the substring from. + * @return Substring. + */ +std::wstring safeMid(const std::wstring &str, std::size_t pos); + +/** + * A safer version of std::wstring.mid(pos, len). This one returns "" instead + * of null when the postion equals the string length. + * @param str String to take a substring from. + * @param pos Position to start the substring from. + * @param len Length of substring. + * @return Substring. + */ +std::wstring safeMid(const std::wstring &str, std::size_t pos, std::size_t len); + +/** + * replaces QString::replace + * @param haystack String to replace all needles with to + * @param needle Substring to search for in the haystack + * @param to replacement string + * @return void. + */ +void replace(std::wstring &haystack, const std::wstring &needle, + const std::wstring &to); + +/** + * replaces returns the html percent encoded character equivalent + * @param c the input Character to return the encoded string of + * @param exclude The list of chars that are NOT to be encoded + * @param include The list of chars that are to be encoded + * @return the encoded string + */ +std::wstring toPercentEncoding(wchar_t c, + const std::wstring &exclude = std::wstring(), + const std::wstring &include = std::wstring()); + +/** + * return the html percent encoded string equivalent + * @param input the input String to return the encoded string of + * @param exclude The list of chars that are NOT to be encoded + * @param include The list of chars that are to be encoded + * @return the encoded string + */ +std::wstring toPercentEncoding(const std::wstring &input, + const std::wstring &exclude = std::wstring(), + const std::wstring &include = std::wstring()); + +/** + * returns the string equivalent removing any percent encoding and replacing it + * with the correct character + * @param input the input String to return the encoded string of + * @return the decoded string + */ +std::wstring fromPercentEncoding(const std::wstring &input); - template<> - inline std::wstring to_wstring( const std::vector< bool > &boolArray, bool doubleQuoteOnEmpty ) - { - std::wstring retVal; - for ( auto &&curr : boolArray ) - { - retVal += L"\t" + to_wstring( curr, doubleQuoteOnEmpty ); - } - return retVal; - } +/** + * replaces returns integer value of the character, '0'-'9' = 0-9, 'A'-'F' = + * 10-15, 'a'-'f' = 10-15 + * @param input the value to return the integer value of + * @return the integer value of the character + */ +wchar_t getIntValue(wchar_t ch); - template< typename T > - inline typename std::enable_if_t< std::is_integral_v< T >, std::wstring > to_wstring( const std::initializer_list< T > &values, bool doubleQuoteEmpty = false ) - { - if ( doubleQuoteEmpty && ( values.size() == 0 ) ) - return LR"(\"\")"; - - std::wstring retVal; - for ( auto &&curr : values ) - { - retVal += to_wstring( curr, false ); - } - return retVal; - } +/** + * return the integer value of the string + * @param string the String to be converted to an integer + * @return the integer version, on an invalid input returns 0 + */ +int64_t toInt(const std::wstring &string); - template< typename T > - inline typename std::enable_if_t< !std::is_integral_v< T >, std::wstring > to_wstring( const std::initializer_list< T > &values, bool doubleQuoteEmpty = false ) - { - std::wstring retVal = L"("; - bool first = true; - for ( auto &&curr : values ) - { - if ( !first ) - { - retVal += L", "; - } - retVal += to_wstring( curr, doubleQuoteEmpty ); - first = false; - } - retVal += L")"; - return retVal; +/** + * return true if the string has the suffix + * @param string the String to check to see if it ends with suffix + * @param suffix the String to see if the input string ends with + * @return True if the string ends with suffix + */ +bool endsWith(const std::wstring &string, const std::wstring &suffix); + +/** + * return a TStringVector of the string split by separator + * @param string the String to be split + * @param separator the String to search in the input string to split on + * @param if true, empty values will be removed + * @return the split string + */ +TStringVector splitString(const std::wstring &string, + const std::wstring &separator, bool skipEmptyParts); + +/** + * splices the objects vector into the input vector + * @param input The input vector to splice out from + * @param start The position of the first item to remove from the input vector + * @param count How many values to remove from the input vector + * @param objects optional objects to insert where the previous objects were + * removed + * @return the character as a single character string + */ +template +static std::vector Splice(std::vector &input, std::size_t start, + std::size_t count, + const std::vector &objects = {}) { + auto deletedRange = + std::vector({input.begin() + start, input.begin() + start + count}); + input.erase(input.begin() + start, input.begin() + start + count); + input.insert(input.begin() + start, objects.begin(), objects.end()); + + return deletedRange; +} + +/** + * splices the objects vector into the input vector + * @param input The input vector to splice out from + * @param start The position of the first item to remove from the input vector + * @param count How many values to remove from the input vector + * @param object individual object to insert where the previous objects were + * removed + * @return the character as a single character string + */ +template +static std::vector Splice(std::vector &input, std::size_t start, + std::size_t count, const T &object) { + return Splice(input, start, count, std::vector({object})); +} + +template +std::wstring to_wstring(const T & /*value*/, bool /*doubleQuoteEmpty*/) { + assert(false); + return {}; +} + +/** + * return the single character wide string for the given character + * @param value the char to be converted to an wstring + * @param doubleQuoteEmpty, if the return value would be empty, return "" + * @return the character as a single character string + */ +inline std::wstring to_wstring(const char &value, bool doubleQuoteEmpty) { + if (doubleQuoteEmpty && (value == 0)) return LR"("")"; + + return std::wstring(1, static_cast(value)); +} + +template <> +inline std::wstring to_wstring(const bool &value, bool /*doubleQuoteOnEmpty*/) { + std::wstring retVal = std::wstring(value ? L"true" : L"false"); + return retVal; +} + +template <> +inline std::wstring to_wstring(const std::vector::reference &value, + bool /*doubleQuoteOnEmpty*/) { + std::wstring retVal = std::wstring(value ? L"true" : L"false"); + return retVal; +} + +template <> +inline std::wstring to_wstring(const std::string &string, + bool doubleQuoteEmpty) { + if (doubleQuoteEmpty && string.empty()) return LR"("")"; + + std::wstring wstring(string.size(), + L' '); // Overestimate number of code points. + wstring.resize(std::mbstowcs(&wstring[0], string.c_str(), + string.size())); // Shrink to fit. + return wstring; +} + +template <> +inline std::wstring to_wstring(const wchar_t &value, bool doubleQuoteEmpty) { + if (doubleQuoteEmpty && (value == 0)) return LR"("")"; + + return std::wstring(1, value); +} + +template <> +inline std::wstring to_wstring(const int &value, bool doubleQuoteEmpty) { + return to_wstring(static_cast(value), doubleQuoteEmpty); +} + +template <> +inline std::wstring to_wstring(const std::wstring &value, + bool doubleQuoteEmpty) { + if (doubleQuoteEmpty && value.empty()) return LR"("")"; + + return value; +} + +template +inline std::wstring to_wstring(const std::vector &values, + bool doubleQuoteEmpty) { + std::wstring retVal = L"("; + bool first = true; + for (auto &&curr : values) { + if (!first) { + retVal += L", "; } - - template< typename T > - std::wstring to_wstring( const T &value ) - { - return to_wstring( value, false ); + retVal += to_wstring(curr, doubleQuoteEmpty); + first = false; + } + retVal += L")"; + return retVal; +} + +template <> +inline std::wstring to_wstring(const std::vector &boolArray, + bool doubleQuoteOnEmpty) { + std::wstring retVal; + for (auto &&curr : boolArray) { + retVal += L"\t" + to_wstring(curr, doubleQuoteOnEmpty); + } + return retVal; +} + +template +inline typename std::enable_if_t, std::wstring> +to_wstring(const std::initializer_list &values, + bool doubleQuoteEmpty = false) { + if (doubleQuoteEmpty && (values.size() == 0)) return LR"(\"\")"; + + std::wstring retVal; + for (auto &&curr : values) { + retVal += to_wstring(curr, false); + } + return retVal; +} + +template +inline typename std::enable_if_t, std::wstring> +to_wstring(const std::initializer_list &values, + bool doubleQuoteEmpty = false) { + std::wstring retVal = L"("; + bool first = true; + for (auto &&curr : values) { + if (!first) { + retVal += L", "; } -}; + retVal += to_wstring(curr, doubleQuoteEmpty); + first = false; + } + retVal += L")"; + return retVal; +} + +template +std::wstring to_wstring(const T &value) { + return to_wstring(value, false); +} +}; // namespace NUtils #endif From c021ea7890c6fdd789535197b5c7095ec58a5cdf Mon Sep 17 00:00:00 2001 From: Scott Aron Bloom Date: Tue, 30 Jan 2024 15:49:15 -0800 Subject: [PATCH 08/15] Fix for linux build --- cpp17/CMakeLists.txt | 4 ++-- cpp17/diff_match_patch.cpp | 24 ++++++++++++------------ cpp17/diff_match_patch_test.cpp | 3 ++- cpp17/diff_match_patch_test.h | 3 +++ cpp17/diff_match_patch_utils.cpp | 10 ++-------- 5 files changed, 21 insertions(+), 23 deletions(-) diff --git a/cpp17/CMakeLists.txt b/cpp17/CMakeLists.txt index dbc68325..27439fe8 100644 --- a/cpp17/CMakeLists.txt +++ b/cpp17/CMakeLists.txt @@ -1,4 +1,6 @@ cmake_minimum_required(VERSION 3.22) +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED YES) SET( _PROJECT_NAME diff_match_patch_cpp17 ) @@ -15,8 +17,6 @@ target_include_directories( ${_PROJECT_NAME} PUBLIC ${CMAKE_SOURCE_DIR} ) target_link_libraries( ${_PROJECT_NAME} ) SET( TEST_NAME "${_PROJECT_NAME}_test" ) -set(CMAKE_CXX_STANDARD 17) -set(CMAKE_CXX_STANDARD_REQUIRED YES) project( ${TEST_NAME} ) add_executable( ${TEST_NAME} diff_match_patch_test.cpp diff_match_patch_test.h diff_match_patch_test_assertEquals.cpp) diff --git a/cpp17/diff_match_patch.cpp b/cpp17/diff_match_patch.cpp index 71a55027..28ee3167 100644 --- a/cpp17/diff_match_patch.cpp +++ b/cpp17/diff_match_patch.cpp @@ -1439,7 +1439,7 @@ std::size_t diff_match_patch::match_main(const std::wstring &text, // Check for null inputs not needed since null can't be passed via // std::wstring - loc = std::max(0ULL, std::min(loc, text.length())); + loc = std::max(0UL, std::min(loc, text.length())); if (text == pattern) { // Shortcut (potentially not guaranteed by the algorithm) return 0; @@ -1512,7 +1512,7 @@ std::size_t diff_match_patch::match_bitap(const std::wstring &text, } // Use the result from this iteration as the maximum for the next. bin_max = bin_mid; - auto start = std::max(1ULL, (loc > bin_mid) ? (loc - bin_mid + 1) : 0); + auto start = std::max(1UL, (loc > bin_mid) ? (loc - bin_mid + 1) : 0UL); auto finish = std::min(loc + bin_mid, text.length()) + pattern.length(); rd = std::vector(finish + 2, 0); @@ -1548,7 +1548,7 @@ std::size_t diff_match_patch::match_bitap(const std::wstring &text, if (best_loc > loc) { // When passing loc, don't exceed our current distance from loc. start = - std::max(1ULL, (2 * loc > best_loc) ? 2 * loc - best_loc : 1); + std::max(1UL, (2 * loc > best_loc) ? 2 * loc - best_loc : 1); } else { // Already passed loc, downhill from here on in. break; @@ -1623,10 +1623,10 @@ void diff_match_patch::patch_addContext(Patch &patch, padding += Patch_Margin; pattern = safeMid( text, - std::max(0ULL, - ((patch.start2 > padding) ? patch.start2 - padding : 0ULL)), + std::max(0UL, + ((patch.start2 > padding) ? patch.start2 - padding : 0UL)), std::min(text.length(), patch.start2 + patch.length1 + padding) - - std::max(0ULL, + std::max(0UL, (patch.start2 > padding) ? patch.start2 - padding : 0)); } // Add one chunk for good luck. @@ -1635,11 +1635,11 @@ void diff_match_patch::patch_addContext(Patch &patch, // Add the prefix. std::wstring prefix = safeMid( text, - std::max(0ULL, - ((patch.start2 > padding) ? patch.start2 - padding : 0ULL)), + std::max(0UL, + ((patch.start2 > padding) ? patch.start2 - padding : 0UL)), patch.start2 - - std::max(0ULL, - ((patch.start2 > padding) ? patch.start2 - padding : 0ULL))); + std::max(0UL, + ((patch.start2 > padding) ? patch.start2 - padding : 0UL))); if (!prefix.empty()) { patch.diffs.emplace(patch.diffs.begin(), EQUAL, prefix); } @@ -2021,7 +2021,7 @@ void diff_match_patch::patch_splitMax(TPatchVector &patches) { 0, std::min(diff_text.length(), (patch_size > (patch.length1 + Patch_Margin)) ? (patch_size - patch.length1 - Patch_Margin) - : (-1 * 1ULL))); + : (-1 * 1UL))); patch.length1 += diff_text.length(); start1 += diff_text.length(); if (diff_type == EQUAL) { @@ -2042,7 +2042,7 @@ void diff_match_patch::patch_splitMax(TPatchVector &patches) { // Compute the head context for the next patch. precontext = diff_text2(patch.diffs); precontext = precontext.substr( - std::max(0ULL, (precontext.length() > Patch_Margin) + std::max(0UL, (precontext.length() > Patch_Margin) ? (precontext.length() - Patch_Margin) : 0)); diff --git a/cpp17/diff_match_patch_test.cpp b/cpp17/diff_match_patch_test.cpp index 2533c160..74ec07cf 100644 --- a/cpp17/diff_match_patch_test.cpp +++ b/cpp17/diff_match_patch_test.cpp @@ -698,7 +698,8 @@ TEST_F(diff_match_patch_test, testDiffBisect) { // Timeout. diffs = {Diff(DELETE, "cat"), Diff(INSERT, "map")}; - assertEquals("diff_bisect: Timeout.", diffs, dmp.diff_bisect(a, b, 0)); + results = dmp.diff_bisect(a, b, 0); + assertEquals("diff_bisect: Timeout.", diffs, results); } TEST_F(diff_match_patch_test, testDiffMain) { diff --git a/cpp17/diff_match_patch_test.h b/cpp17/diff_match_patch_test.h index 0ac1848b..c7403995 100644 --- a/cpp17/diff_match_patch_test.h +++ b/cpp17/diff_match_patch_test.h @@ -34,6 +34,8 @@ #define TEST_F(className, funcName) void diff_match_patch_test::funcName() #endif +#include "diff_match_patch_utils.h" + class diff_match_patch_test PUBLIC_TESTING { public: using TStringVector = diff_match_patch::TStringVector; @@ -81,6 +83,7 @@ class diff_match_patch_test PUBLIC_TESTING { void testPatchAddPadding(); void testPatchApply(); + void fromGitHubExamples(); private: bool runTest(std::function test); std::size_t numPassedTests{0}; diff --git a/cpp17/diff_match_patch_utils.cpp b/cpp17/diff_match_patch_utils.cpp index 967fd1eb..ffff836b 100644 --- a/cpp17/diff_match_patch_utils.cpp +++ b/cpp17/diff_match_patch_utils.cpp @@ -18,14 +18,8 @@ #include "diff_match_patch_utils.h" -// #include -// #include -// #include -// #include -// #include -// #include -// #include #include +#include namespace NUtils { std::wstring safeMid(const std::wstring &str, std::size_t pos) { @@ -171,4 +165,4 @@ int64_t toInt(const std::wstring &string) { return retVal; } -} // namespace NUtils \ No newline at end of file +} // namespace NUtils From 173d72e0c244510b4bde871ef2b9898a3ddfafae Mon Sep 17 00:00:00 2001 From: Scott Aron Bloom Date: Tue, 30 Jan 2024 15:57:07 -0800 Subject: [PATCH 09/15] Fix for windows, I hate that size_t is different.... --- cpp17/diff_match_patch.cpp | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/cpp17/diff_match_patch.cpp b/cpp17/diff_match_patch.cpp index 28ee3167..282a46a0 100644 --- a/cpp17/diff_match_patch.cpp +++ b/cpp17/diff_match_patch.cpp @@ -29,6 +29,14 @@ #include "diff_match_patch_utils.h" +#ifdef WIN32 +std::size_t kZERO{0ULL}; +std::size_t kONE{1ULL}; +#else +unsigned long kZERO{0UL}; +unsigned long kONE{1UL}; +#endif + ////////////////////////// // // Diff Class @@ -1439,7 +1447,7 @@ std::size_t diff_match_patch::match_main(const std::wstring &text, // Check for null inputs not needed since null can't be passed via // std::wstring - loc = std::max(0UL, std::min(loc, text.length())); + loc = std::max(kZERO, std::min(loc, text.length())); if (text == pattern) { // Shortcut (potentially not guaranteed by the algorithm) return 0; @@ -1512,7 +1520,7 @@ std::size_t diff_match_patch::match_bitap(const std::wstring &text, } // Use the result from this iteration as the maximum for the next. bin_max = bin_mid; - auto start = std::max(1UL, (loc > bin_mid) ? (loc - bin_mid + 1) : 0UL); + auto start = std::max(kONE, (loc > bin_mid) ? (loc - bin_mid + 1) : kZERO); auto finish = std::min(loc + bin_mid, text.length()) + pattern.length(); rd = std::vector(finish + 2, 0); @@ -1548,7 +1556,7 @@ std::size_t diff_match_patch::match_bitap(const std::wstring &text, if (best_loc > loc) { // When passing loc, don't exceed our current distance from loc. start = - std::max(1UL, (2 * loc > best_loc) ? 2 * loc - best_loc : 1); + std::max(kONE, (2 * loc > best_loc) ? 2 * loc - best_loc : 1); } else { // Already passed loc, downhill from here on in. break; @@ -1623,10 +1631,10 @@ void diff_match_patch::patch_addContext(Patch &patch, padding += Patch_Margin; pattern = safeMid( text, - std::max(0UL, + std::max(kZERO, ((patch.start2 > padding) ? patch.start2 - padding : 0UL)), std::min(text.length(), patch.start2 + patch.length1 + padding) - - std::max(0UL, + std::max(kZERO, (patch.start2 > padding) ? patch.start2 - padding : 0)); } // Add one chunk for good luck. @@ -1635,10 +1643,10 @@ void diff_match_patch::patch_addContext(Patch &patch, // Add the prefix. std::wstring prefix = safeMid( text, - std::max(0UL, + std::max(kZERO, ((patch.start2 > padding) ? patch.start2 - padding : 0UL)), patch.start2 - - std::max(0UL, + std::max(kZERO, ((patch.start2 > padding) ? patch.start2 - padding : 0UL))); if (!prefix.empty()) { patch.diffs.emplace(patch.diffs.begin(), EQUAL, prefix); @@ -2042,9 +2050,9 @@ void diff_match_patch::patch_splitMax(TPatchVector &patches) { // Compute the head context for the next patch. precontext = diff_text2(patch.diffs); precontext = precontext.substr( - std::max(0UL, (precontext.length() > Patch_Margin) - ? (precontext.length() - Patch_Margin) - : 0)); + std::max(kZERO, (precontext.length() > Patch_Margin) + ? (precontext.length() - Patch_Margin) + : 0)); std::wstring postcontext; // Append the end context for this patch. From ef1f82363819b710c2d5cbe1ac959dcefd4ab183 Mon Sep 17 00:00:00 2001 From: Scott Aron Bloom Date: Tue, 30 Jan 2024 16:09:51 -0800 Subject: [PATCH 10/15] update comment listing confirmed build platforms --- cpp17/diff_match_patch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp17/diff_match_patch.h b/cpp17/diff_match_patch.h index 47361095..821376a3 100644 --- a/cpp17/diff_match_patch.h +++ b/cpp17/diff_match_patch.h @@ -43,7 +43,7 @@ Bloom) * C++17 was intentionally chosen for variant support * - * Code known to compile with C++17 + * Code known to compile with C++17 VS2022 and g++ 9.5.0 * * Here is a trivial sample program which works properly when linked with this * library: From 73ed7c7822d0e89fa4661b38aa1def759c6807b5 Mon Sep 17 00:00:00 2001 From: Scott Aron Bloom Date: Tue, 30 Jan 2024 16:58:41 -0800 Subject: [PATCH 11/15] Fix for replace function when needle is greater than 1 --- cpp17/diff_match_patch_utils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp17/diff_match_patch_utils.cpp b/cpp17/diff_match_patch_utils.cpp index ffff836b..dfd89519 100644 --- a/cpp17/diff_match_patch_utils.cpp +++ b/cpp17/diff_match_patch_utils.cpp @@ -36,7 +36,7 @@ void replace(std::wstring &inString, const std::wstring &from, std::size_t pos = inString.find(from); while (pos != std::wstring::npos) { inString.replace(pos, from.length(), to); - pos = inString.find(from, pos + 1); + pos = inString.find(from, pos + to.length()); } } From d4dbdeab9d7839ea803fd8c3e4bd20a8449df541 Mon Sep 17 00:00:00 2001 From: Scott Aron Bloom Date: Tue, 30 Jan 2024 17:00:03 -0800 Subject: [PATCH 12/15] Added support for pretty print to a console --- cpp17/diff_match_patch.cpp | 27 +++++++++++++++++++++++++++ cpp17/diff_match_patch.h | 8 ++++++++ cpp17/diff_match_patch_test.cpp | 31 +++++++++++++++++++++++++++++++ cpp17/diff_match_patch_test.h | 2 ++ 4 files changed, 68 insertions(+) diff --git a/cpp17/diff_match_patch.cpp b/cpp17/diff_match_patch.cpp index 282a46a0..3cbfcc3a 100644 --- a/cpp17/diff_match_patch.cpp +++ b/cpp17/diff_match_patch.cpp @@ -1311,6 +1311,33 @@ std::wstring diff_match_patch::diff_prettyHtml(const TDiffVector &diffs) { return html; } +std::wstring diff_match_patch::diff_prettyConsole(const TDiffVector &diffs) { + static std::wstring kRed{L"\033[0;31m"}; + static std::wstring kGreen{L"\033[0;32m"}; + static std::wstring kYellow{L"\033[0;33m"}; + static std::wstring kReset{L"\033[m"}; + static std::wstring kEOL{NUtils::fromPercentEncoding(L"%C2%B6") + L"\n"}; + + std::wstring retVal; + std::wstring text; + for (auto &&aDiff : diffs) { + text = aDiff.text; + NUtils::replace(text, L"\n", kEOL); + switch (aDiff.operation) { + case INSERT: + retVal += kGreen + text + kReset; + break; + case DELETE: + retVal += kRed + text + kReset; + break; + case EQUAL: + retVal += text; + break; + } + } + return retVal; +} + std::wstring diff_match_patch::diff_text1(const TDiffVector &diffs) { std::wstring text; for (auto &&aDiff : diffs) { diff --git a/cpp17/diff_match_patch.h b/cpp17/diff_match_patch.h index 821376a3..b0e37bd2 100644 --- a/cpp17/diff_match_patch.h +++ b/cpp17/diff_match_patch.h @@ -463,6 +463,14 @@ class diff_match_patch { public: std::wstring diff_prettyHtml(const TDiffVector &diffs); + /** + * Convert a Diff list into a pretty Console report. Red for delete, and green for insert + * @param diffs LinkedList of Diff objects. + * @return Console representation. + */ + public: + std::wstring diff_prettyConsole(const TDiffVector &diffs); + /** * Compute and return the source text (all equalities and deletions). * @param diffs LinkedList of Diff objects. diff --git a/cpp17/diff_match_patch_test.cpp b/cpp17/diff_match_patch_test.cpp index 74ec07cf..5dc5abd6 100644 --- a/cpp17/diff_match_patch_test.cpp +++ b/cpp17/diff_match_patch_test.cpp @@ -77,6 +77,7 @@ int diff_match_patch_test::run_all_tests() { runTest(std::bind(&diff_match_patch_test::testDiffCleanupSemantic, this)); runTest(std::bind(&diff_match_patch_test::testDiffCleanupEfficiency, this)); runTest(std::bind(&diff_match_patch_test::testDiffPrettyHtml, this)); + runTest(std::bind(&diff_match_patch_test::testDiffPrettyConsole, this)); runTest(std::bind(&diff_match_patch_test::testDiffText, this)); runTest(std::bind(&diff_match_patch_test::testDiffDelta, this)); runTest(std::bind(&diff_match_patch_test::testDiffXIndex, this)); @@ -583,6 +584,23 @@ TEST_F(diff_match_patch_test, testDiffPrettyHtml) { dmp.diff_prettyHtml(diffs)); } +TEST_F(diff_match_patch_test, testDiffPrettyConsole) { + // Pretty print. + static std::wstring kRed{L"\033[0;31m"}; + static std::wstring kGreen{L"\033[0;32m"}; + static std::wstring kYellow{L"\033[0;33m"}; + static std::wstring kReset{L"\033[m"}; + static std::wstring kEOL{NUtils::fromPercentEncoding(L"%C2%B6") + L"\n"}; + + auto diffs = TDiffVector( + {Diff(EQUAL, "a\n"), Diff(DELETE, "b"), Diff(INSERT, "c&d")}); + auto results = dmp.diff_prettyConsole(diffs); + assertEquals( + "diff_prettyConsole:", + L"a" + kEOL + kRed + L"b" + kReset + kGreen + L"c&d" + kReset, + results); +} + TEST_F(diff_match_patch_test, testDiffText) { // Compute the source and destination texts. auto diffs = {Diff(EQUAL, "jump"), Diff(DELETE, "s"), Diff(INSERT, "ed"), @@ -1308,11 +1326,24 @@ TEST_F(diff_match_patch_test, fromGitHubExamples) { L"your head."; auto diffs = dmp.diff_main(lhs, rhs); dmp.diff_cleanupSemantic(diffs); + auto console = dmp.diff_prettyConsole(diffs); auto html = dmp.diff_prettyHtml(diffs); auto delta = dmp.diff_toDelta(diffs); + + auto consoleGolden = + L"I am the very model of a \x1B[0;31mmodern Major-General, I've " + L"information vegetable, animal, and mineral, I know the kings of " + L"England, and I quote the fights historical, From Marathon to Waterloo, " + L"in order categorical\x1B[m\x1B[0;32mcartoon individual, My animation's " + L"comical, unusual, and whimsical, I'm quite adept at funny gags, " + L"comedic theory I have read, From wicked puns and stupid jokes to " + L"anvils that drop on your head\x1B[m."; + assertEquals("gitHubDemos", consoleGolden, console); + auto htmlGolden = LR"(I am the very model of a modern Major-General, I've information vegetable, animal, and mineral, I know the kings of England, and I quote the fights historical, From Marathon to Waterloo, in order categoricalcartoon individual, My animation's comical, unusual, and whimsical, I'm quite adept at funny gags, comedic theory I have read, From wicked puns and stupid jokes to anvils that drop on your head.)"; assertEquals("gitHubDemos", htmlGolden, html); + auto deltaGolden = L"=25\t-182\t+cartoon individual, My animation's comical, unusual, and " L"whimsical, I'm quite adept at funny gags, comedic theory I have read, " diff --git a/cpp17/diff_match_patch_test.h b/cpp17/diff_match_patch_test.h index c7403995..07678d2a 100644 --- a/cpp17/diff_match_patch_test.h +++ b/cpp17/diff_match_patch_test.h @@ -61,6 +61,7 @@ class diff_match_patch_test PUBLIC_TESTING { void testDiffCleanupSemantic(); void testDiffCleanupEfficiency(); void testDiffPrettyHtml(); + void testDiffPrettyConsole(); void testDiffText(); void testDiffDelta(); void testDiffXIndex(); @@ -84,6 +85,7 @@ class diff_match_patch_test PUBLIC_TESTING { void testPatchApply(); void fromGitHubExamples(); + private: bool runTest(std::function test); std::size_t numPassedTests{0}; From 8e9d024737327babde50e4b491e8ca266a81484f Mon Sep 17 00:00:00 2001 From: Scott Aron Bloom Date: Tue, 14 Jan 2025 08:25:19 -0800 Subject: [PATCH 13/15] Support for C++17 --- cpp17/CMakeLists.txt | 2 +- cpp17/diff_match_patch.cpp | 4451 +++++++++--------- cpp17/diff_match_patch.h | 615 ++- cpp17/diff_match_patch_test.cpp | 2391 +++++----- cpp17/diff_match_patch_test.h | 279 +- cpp17/diff_match_patch_test_assertEquals.cpp | 252 +- cpp17/diff_match_patch_test_utils.cpp | 157 + cpp17/diff_match_patch_utils.cpp | 323 +- cpp17/diff_match_patch_utils.h | 385 +- 9 files changed, 4530 insertions(+), 4325 deletions(-) create mode 100644 cpp17/diff_match_patch_test_utils.cpp diff --git a/cpp17/CMakeLists.txt b/cpp17/CMakeLists.txt index 27439fe8..c972d757 100644 --- a/cpp17/CMakeLists.txt +++ b/cpp17/CMakeLists.txt @@ -18,7 +18,7 @@ target_link_libraries( ${_PROJECT_NAME} ) SET( TEST_NAME "${_PROJECT_NAME}_test" ) project( ${TEST_NAME} ) -add_executable( ${TEST_NAME} diff_match_patch_test.cpp diff_match_patch_test.h diff_match_patch_test_assertEquals.cpp) +add_executable( ${TEST_NAME} diff_match_patch_test.cpp diff_match_patch_test.h diff_match_patch_test_utils.cpp) target_include_directories( ${TEST_NAME} PUBLIC ${CMAKE_SOURCE_DIR}) if( USE_GTEST ) diff --git a/cpp17/diff_match_patch.cpp b/cpp17/diff_match_patch.cpp index 3cbfcc3a..eaeb8cdf 100644 --- a/cpp17/diff_match_patch.cpp +++ b/cpp17/diff_match_patch.cpp @@ -29,1070 +29,1213 @@ #include "diff_match_patch_utils.h" +namespace NDiffMatchPatch +{ #ifdef WIN32 -std::size_t kZERO{0ULL}; -std::size_t kONE{1ULL}; + static std::size_t kZERO{ 0ULL }; + static std::size_t kONE{ 1ULL }; #else -unsigned long kZERO{0UL}; -unsigned long kONE{1UL}; + static unsigned long kZERO{ 0UL }; + static unsigned long kONE{ 1UL }; #endif -////////////////////////// -// -// Diff Class -// -////////////////////////// + std::wstring toString( EOperation op ) + { + switch ( op ) + { + case EOperation::eINSERT: + return L"INSERT"; + case EOperation::eDELETE: + return L"DELETE"; + case EOperation::eEQUAL: + return L"EQUAL"; + } + throw "Invalid operation."; + } -/** + ////////////////////////// + // + // Diff Class + // + ////////////////////////// + + /** * Constructor. Initializes the diff with the provided values. * @param operation One of INSERT, DELETE or EQUAL * @param text The text being applied */ -Diff::Diff(Operation _operation, const std::wstring &_text) - : operation(_operation), text(_text) { - // Construct a diff with the specified operation and text. -} - -Diff::Diff() {} - -Diff::Diff(Operation _operation, const wchar_t *_text) - : Diff(_operation, (_text ? std::wstring(_text) : std::wstring(L""))) {} - -Diff::Diff(Operation _operation, const std::string &_text) - : Diff(_operation, NUtils::to_wstring(_text)) {} - -Diff::Diff(Operation _operation, const char *_text) - : Diff(_operation, std::string(_text)) {} - -std::wstring Diff::strOperation(Operation op) { - switch (op) { - case INSERT: - return L"INSERT"; - case DELETE: - return L"DELETE"; - case EQUAL: - return L"EQUAL"; - } - throw "Invalid operation."; -} - -/** + + /** * Display a human-readable version of this Diff. * @return text version */ -std::wstring Diff::toString() const { - std::wstring prettyText = text; - // Replace linebreaks with Pilcrow signs. - std::replace(prettyText.begin(), prettyText.end(), L'\n', L'\u00b6'); - return std::wstring(L"Diff(") + strOperation(operation) + - std::wstring(L",\"") + prettyText + std::wstring(L"\")"); -} - -/** + std::wstring Diff::toString( EStringType stringType ) const + { + std::wstring retVal; + + if ( stringType == EStringType::ePatch ) + { + switch ( fOperation ) + { + case EOperation::eINSERT: + retVal += L"+"; + break; + case EOperation::eDELETE: + retVal += L"-"; + break; + case EOperation::eEQUAL: + retVal += L" "; + break; + } + retVal += NUtils::toPercentEncoding( fText, L" !~*'();/?:@&=+$,#" ) + std::wstring( L"\n" ); + } + else if ( stringType == EStringType::eUnitTest ) + { + retVal = L"(" + NDiffMatchPatch::toString( fOperation ) + L" " + text() + L")"; + } + else if ( stringType == EStringType::eDefault ) + { + retVal = fText; + // Replace linebreaks with Pilcrow signs. + std::replace( retVal.begin(), retVal.end(), L'\n', L'\u00b6' ); + retVal = std::wstring( L"Diff(" ) + NDiffMatchPatch::toString( fOperation ) + std::wstring( L",\"" ) + retVal + std::wstring( L"\")" ); + } + return retVal; + } + + std::wstring Diff::toHtml() const + { + auto text = this->text(); + NUtils::replace( text, L"&", L"&" ); + NUtils::replace( text, L"<", L"<" ); + NUtils::replace( text, L">", L">" ); + NUtils::replace( text, L"\n", L"¶
" ); + if ( isInsert() ) + return std::wstring( L"" ) + text + std::wstring( L"" ); + else if ( isDelete() ) + return std::wstring( L"" ) + text + std::wstring( L"" ); + else if ( isEqual() ) + return std::wstring( L"" ) + text + std::wstring( L"" ); + return {}; + } + + std::wstring Diff::toConsole() const + { + static std::wstring kRed{ L"\033[0;31m" }; + static std::wstring kGreen{ L"\033[0;32m" }; + static std::wstring kYellow{ L"\033[0;33m" }; + static std::wstring kReset{ L"\033[m" }; + static std::wstring kEOL{ NUtils::fromPercentEncoding( L"%C2%B6" ) + L"\n" }; + + auto retVal = text(); + NUtils::replace( retVal, L"\n", kEOL ); + if ( isInsert() ) + return kGreen + retVal + kReset; + else if ( isDelete() ) + return kRed + retVal + kReset; + else if ( isEqual() ) + return retVal; + return {}; + } + + std::wstring Diff::toDelta() const + { + if ( isInsert() ) + return L"+" + NUtils::toPercentEncoding( text(), L" !~*'();/?:@&=+$,#" ) + L"\t"; + else if ( isDelete() ) + return L"-" + std::to_wstring( text().length() ) + L"\t"; + else if ( isEqual() ) + return L"=" + std::to_wstring( text().length() ) + L"\t"; + return {}; + } + + /** * Is this Diff equivalent to another Diff? * @param d Another Diff to compare against * @return true or false */ -bool Diff::operator==(const Diff &d) const { - return (d.operation == this->operation) && (d.text == this->text); -} + bool Diff::operator==( const Diff &d ) const + { + return ( d.fOperation == fOperation ) && ( d.fText == fText ); + } -bool Diff::operator!=(const Diff &d) const { return !(operator==(d)); } + bool Diff::operator!=( const Diff &d ) const + { + return !( operator==( d ) ); + } -///////////////////////////////////////////// -// -// Patch Class -// -///////////////////////////////////////////// + ///////////////////////////////////////////// + // + // Patch Class + // + ///////////////////////////////////////////// -/** + /** * Constructor. Initializes with an empty list of diffs. */ -Patch::Patch() {} - -Patch::Patch(std::wstring &text) { - std::wsmatch matches; - auto patchHeader = std::wregex(LR"(^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$)"); - if (!std::regex_match(text, matches, patchHeader) || (matches.size() != 5)) { - throw std::wstring(L"Invalid patch string: " + text); - } - start1 = NUtils::toInt(matches[1].str()); - if (!matches[2].length()) { - start1--; - length1 = 1; - } else if (matches[2].str() == L"0") { - length1 = 0; - } else { - start1--; - length1 = NUtils::toInt(matches[2].str()); - } - - start2 = NUtils::toInt(matches[3].str()); - if (!matches[4].length()) { - start2--; - length2 = 1; - } else if (matches[4].str() == L"0") { - length2 = 0; - } else { - start2--; - length2 = NUtils::toInt(matches[4].str()); - } - text.erase(text.begin()); -} - -bool Patch::isNull() const { - if (start1 == 0 && start2 == 0 && length1 == 0 && length2 == 0 && - diffs.empty()) { - return true; - } - return false; -} - -/** + Patch::Patch() + { + } + + Patch::Patch( std::wstring &text ) + { + std::wsmatch matches; + auto patchHeader = std::wregex( LR"(^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$)" ); + if ( !std::regex_match( text, matches, patchHeader ) || ( matches.size() != 5 ) ) + { + throw std::wstring( L"Invalid patch string: " + text ); + } + start1 = NUtils::toInt( matches[ 1 ].str() ); + if ( !matches[ 2 ].length() ) + { + start1--; + length1 = 1; + } + else if ( matches[ 2 ].str() == L"0" ) + { + length1 = 0; + } + else + { + start1--; + length1 = NUtils::toInt( matches[ 2 ].str() ); + } + + start2 = NUtils::toInt( matches[ 3 ].str() ); + if ( !matches[ 4 ].length() ) + { + start2--; + length2 = 1; + } + else if ( matches[ 4 ].str() == L"0" ) + { + length2 = 0; + } + else + { + start2--; + length2 = NUtils::toInt( matches[ 4 ].str() ); + } + text.erase( text.begin() ); + } + + bool Patch::isNull() const + { + if ( start1 == 0 && start2 == 0 && length1 == 0 && length2 == 0 && diffs.empty() ) + { + return true; + } + return false; + } + + /** * Emulate GNU diff's format. * Header: @@ -382,8 +481,9 @@ * Indices are printed as 1-based, not 0-based. * @return The GNU diff string */ -std::wstring Patch::toString() const { - auto text = getPatchHeader(); - // Escape the body of the patch with %xx notation. - for (auto &&aDiff : diffs) { - switch (aDiff.operation) { - case INSERT: - text += L"+"; - break; - case DELETE: - text += L"-"; - break; - case EQUAL: - text += L" "; - break; - } - text += NUtils::toPercentEncoding(aDiff.text, L" !~*'();/?:@&=+$,#") + - std::wstring(L"\n"); - } - - return text; -} - -std::wstring Patch::getPatchHeader() const { - auto coords1 = getCoordinateString(start1, length1); - auto coords2 = getCoordinateString(start2, length2); - auto text = std::wstring(L"@@ -") + coords1 + std::wstring(L" +") + coords2 + - std::wstring(L" @@\n"); - return text; -} - -std::wstring Patch::getCoordinateString(std::size_t start, - std::size_t length) const { - std::wstring retVal; - if (length == 0) { - retVal = std::to_wstring(start) + std::wstring(L",0"); - } else if (length == 1) { - retVal = std::to_wstring(start + 1); - } else { - retVal = std::to_wstring(start + 1) + std::wstring(L",") + - std::to_wstring(length); - } - return retVal; -} - -///////////////////////////////////////////// -// -// diff_match_patch Class -// -///////////////////////////////////////////// - -// all class members initialized in the class -diff_match_patch::diff_match_patch() {} - -TDiffVector diff_match_patch::diff_main(const std::wstring &text1, - const std::wstring &text2) { - return diff_main(text1, text2, true); -} - -TDiffVector diff_match_patch::diff_main(const std::wstring &text1, - const std::wstring &text2, - bool checklines) { - // Set a deadline by which time the diff must be complete. - clock_t deadline; - if (Diff_Timeout <= 0) { - deadline = std::numeric_limits::max(); - } else { - deadline = clock() + (clock_t)(Diff_Timeout * CLOCKS_PER_SEC); - } - return diff_main(text1, text2, checklines, deadline); -} - -TDiffVector diff_match_patch::diff_main(const std::wstring &text1, - const std::wstring &text2, - bool checklines, clock_t deadline) { - // Check for equality (speedup). - TDiffVector diffs; - if (text1 == text2) { - if (!text1.empty()) { - diffs.emplace_back(EQUAL, text1); - } - return diffs; - } - - if (!text1.empty() && text2.empty()) { - diffs.emplace_back(DELETE, text1); - return diffs; - } - - if (text1.empty() && !text2.empty()) { - diffs.emplace_back(INSERT, text2); - return diffs; - } - - // Trim off common prefix (speedup). - auto commonlength = diff_commonPrefix(text1, text2); - auto commonprefix = text1.substr(0, commonlength); - auto textChopped1 = text1.substr(commonlength); - auto textChopped2 = text2.substr(commonlength); - - // Trim off common suffix (speedup). - commonlength = diff_commonSuffix(textChopped1, textChopped2); - auto commonsuffix = textChopped1.substr(textChopped1.length() - commonlength); - textChopped1 = textChopped1.substr(0, textChopped1.length() - commonlength); - textChopped2 = textChopped2.substr(0, textChopped2.length() - commonlength); - - // Compute the diff on the middle block. - diffs = diff_compute(textChopped1, textChopped2, checklines, deadline); - - // Restore the prefix and suffix. - if (!commonprefix.empty()) { - diffs.emplace(diffs.begin(), EQUAL, commonprefix); - } - if (!commonsuffix.empty()) { - diffs.emplace_back(EQUAL, commonsuffix); - } - - diff_cleanupMerge(diffs); - - return diffs; -} - -TDiffVector diff_match_patch::diff_main(const std::string &text1, - const std::string &text2) { - return diff_main(NUtils::to_wstring(text1), NUtils::to_wstring(text2)); -} - -TDiffVector diff_match_patch::diff_main(const std::string &text1, - const std::string &text2, - bool checklines) { - return diff_main(NUtils::to_wstring(text1), NUtils::to_wstring(text2), - checklines); -} - -TDiffVector diff_match_patch::diff_main(const std::string &text1, - const std::string &text2, - bool checklines, clock_t deadline) { - return diff_main(NUtils::to_wstring(text1), NUtils::to_wstring(text2), - checklines, deadline); -} - -TDiffVector diff_match_patch::diff_compute(const std::wstring &text1, - const std::wstring &text2, - bool checklines, clock_t deadline) { - TDiffVector diffs; - - if (text1.empty()) { - // Just add some text (speedup). - diffs.emplace_back(INSERT, text2); - return diffs; - } - - if (text2.empty()) { - // Just delete some text (speedup). - diffs.emplace_back(DELETE, text1); - return diffs; - } - - { - auto [longtext, shorttext] = (text1.length() > text2.length()) - ? std::make_pair(text1, text2) - : std::make_pair(text2, text1); - auto i = longtext.find(shorttext); - if (i != std::string::npos) { - // Shorter text is inside the longer text (speedup). - const Operation op = (text1.length() > text2.length()) ? DELETE : INSERT; - diffs.emplace_back(op, longtext.substr(0, i)); - diffs.emplace_back(EQUAL, shorttext); - diffs.emplace_back(op, safeMid(longtext, i + shorttext.length())); - return diffs; - } - - if (shorttext.length() == 1) { - // Single character string. - // After the previous speedup, the character can't be an equality. - diffs.emplace_back(DELETE, text1); - diffs.emplace_back(INSERT, text2); - return diffs; - } - // Garbage collect longtext and shorttext by scoping out. - } - - // Check to see if the problem can be split in two. - const TStringVector hm = diff_halfMatch(text1, text2); - if (!hm.empty()) { - // A half-match was found, sort out the return data. - auto &&text1_a = hm[0]; - auto &&text1_b = hm[1]; - auto &&text2_a = hm[2]; - auto &&text2_b = hm[3]; - auto &&mid_common = hm[4]; - // Send both pairs off for separate processing. - diffs = diff_main(text1_a, text2_a, checklines, deadline); - const TDiffVector diffs_b = - diff_main(text1_b, text2_b, checklines, deadline); - // Merge the results. - diffs.emplace_back(EQUAL, mid_common); - diffs.insert(diffs.end(), diffs_b.begin(), diffs_b.end()); - return diffs; - } - - // Perform a real diff. - if (checklines && (text1.length() > 100) && (text2.length() > 100)) { - return diff_lineMode(text1, text2, deadline); - } - - return diff_bisect(text1, text2, deadline); -} - -TDiffVector diff_match_patch::diff_compute(const std::string &text1, - const std::string &text2, - bool checklines, clock_t deadline) { - return diff_compute(NUtils::to_wstring(text1), NUtils::to_wstring(text2), - checklines, deadline); -} - -TDiffVector diff_match_patch::diff_lineMode(std::wstring text1, - std::wstring text2, - clock_t deadline) { - // Scan the text on a line-by-line basis first. - auto a = diff_linesToChars(text1, text2); - text1 = std::get(a[0]); - text2 = std::get(a[1]); - auto linearray = std::get(a[2]); - - auto diffs = diff_main(text1, text2, false, deadline); - - // Convert the diff back to original text. - diff_charsToLines(diffs, linearray); - // Eliminate freak matches (e.g. blank lines) - diff_cleanupSemantic(diffs); - - // Rediff any replacement blocks, this time character-by-character. - // Add a dummy entry at the end. - diffs.emplace_back(EQUAL, L""); - std::size_t pointer = 0; - int count_delete = 0; - int count_insert = 0; - std::wstring text_delete; - std::wstring text_insert; - while (pointer < diffs.size()) { - switch (diffs[pointer].operation) { - case INSERT: - count_insert++; - text_insert += diffs[pointer].text; - break; - case DELETE: - count_delete++; - text_delete += diffs[pointer].text; - break; - case EQUAL: - // Upon reaching an equality, check for prior redundancies. - if (count_delete >= 1 && count_insert >= 1) { - // Delete the offending records and add the merged ones. - auto numElements = count_delete + count_insert; - auto start = diffs.begin() + pointer - numElements; - auto end = start + numElements; - diffs.erase(start, end); - pointer = pointer - count_delete - count_insert; - auto subDiff = diff_main(text_delete, text_insert, false, deadline); - diffs.insert(diffs.begin() + pointer, subDiff.begin(), subDiff.end()); - pointer = pointer + subDiff.size(); - } - count_insert = 0; - count_delete = 0; - text_delete.clear(); - text_insert.clear(); - break; - } - pointer++; - } - diffs.pop_back(); // Remove the dummy entry at the end. - - return diffs; -} - -TDiffVector diff_match_patch::diff_lineMode(std::string text1, - std::string text2, - clock_t deadline) { - return diff_lineMode(NUtils::to_wstring(text1), NUtils::to_wstring(text2), - deadline); -} - -// using int64_t rather thant size_t due to the backward walking nature of the -// algorithm -TDiffVector diff_match_patch::diff_bisect(const std::wstring &text1, - const std::wstring &text2, - clock_t deadline) { - // Cache the text lengths to prevent multiple calls. - auto text1_length = static_cast(text1.length()); - auto text2_length = static_cast(text2.length()); - auto max_d = (text1_length + text2_length + 1) / 2; - auto v_offset = max_d; - auto v_length = 2 * max_d; - auto v1 = std::vector(v_length, -1); - auto v2 = std::vector(v_length, -1); - v1[v_offset + 1] = 0; - v2[v_offset + 1] = 0; - auto delta = text1_length - text2_length; - // If the total number of characters is odd, then the front path will - // collide with the reverse path. - bool front = (delta % 2 != 0); - // Offsets for start and end of k loop. - // Prevents mapping of space beyond the grid. - int64_t k1start = 0; - int64_t k1end = 0; - int64_t k2start = 0; - int64_t k2end = 0; - for (int64_t d = 0; d < max_d; d++) { - // Bail out if deadline is reached. - if (clock() > deadline) { - break; - } - - // Walk the front path one step. - for (auto k1 = -d + k1start; k1 <= d - k1end; k1 += 2) { - auto k1_offset = v_offset + k1; - int64_t x1; - if ((k1 == -d) || (k1 != d) && (v1[k1_offset - 1] < v1[k1_offset + 1])) { - x1 = v1[k1_offset + 1]; - } else { - x1 = v1[k1_offset - 1] + 1; - } - int64_t y1 = x1 - k1; - while ((x1 < text1_length) && (y1 < text2_length) && - (text1[x1] == text2[y1])) { - x1++; - y1++; - } - v1[k1_offset] = x1; - if (x1 > text1_length) { - // Ran off the right of the graph. - k1end += 2; - } else if (y1 > text2_length) { - // Ran off the bottom of the graph. - k1start += 2; - } else if (front) { - auto k2_offset = v_offset + delta - k1; - if ((k2_offset >= 0) && (k2_offset < v_length) && - (v2[k2_offset] != -1)) { - // Mirror x2 onto top-left coordinate system. - auto x2 = text1_length - v2[k2_offset]; - if (x1 >= x2) { - // Overlap detected. - return diff_bisectSplit(text1, text2, x1, y1, deadline); - } - } - } - } - - // Walk the reverse path one step. - for (auto k2 = -d + k2start; k2 <= d - k2end; k2 += 2) { - auto k2_offset = v_offset + k2; - int64_t x2; - if ((k2 == -d) || (k2 != d) && (v2[k2_offset - 1] < v2[k2_offset + 1])) { - x2 = v2[k2_offset + 1]; - } else { - x2 = v2[k2_offset - 1] + 1; - } - auto y2 = x2 - k2; - while ((x2 < text1_length) && (y2 < text2_length) && - (text1[text1_length - x2 - 1] == text2[text2_length - y2 - 1])) { - x2++; - y2++; - } - v2[k2_offset] = x2; - if (x2 > text1_length) { - // Ran off the left of the graph. - k2end += 2; - } else if (y2 > text2_length) { - // Ran off the top of the graph. - k2start += 2; - } else if (!front) { - auto k1_offset = v_offset + delta - k2; - if ((k1_offset >= 0) && (k1_offset < v_length) && - (v1[k1_offset] != -1)) { - auto x1 = v1[k1_offset]; - auto y1 = v_offset + x1 - k1_offset; - // Mirror x2 onto top-left coordinate system. - x2 = text1_length - v2[k2_offset]; - if (x1 >= x2) { - // Overlap detected. - return diff_bisectSplit(text1, text2, x1, y1, deadline); - } - } - } - } - } - // Diff took too long and hit the deadline or - // number of diffs equals number of characters, no commonality at all. - auto diffs = TDiffVector({Diff(DELETE, text1), Diff(INSERT, text2)}); - return diffs; -} - -TDiffVector diff_match_patch::diff_bisect(const std::string &text1, - const std::string &text2, - clock_t deadline) { - return diff_bisect(NUtils::to_wstring(text1), NUtils::to_wstring(text2), - deadline); -} - -TDiffVector diff_match_patch::diff_bisectSplit(const std::wstring &text1, - const std::wstring &text2, - std::size_t x, std::size_t y, - clock_t deadline) { - auto text1a = text1.substr(0, x); - auto text2a = text2.substr(0, y); - auto text1b = safeMid(text1, x); - auto text2b = safeMid(text2, y); - - // Compute both diffs serially. - TDiffVector diffs = diff_main(text1a, text2a, false, deadline); - TDiffVector diffsb = diff_main(text1b, text2b, false, deadline); - - diffs.insert(diffs.end(), diffsb.begin(), diffsb.end()); - return diffs; -} - -TDiffVector diff_match_patch::diff_bisectSplit(const std::string &text1, - const std::string &text2, - std::size_t x, std::size_t y, - clock_t deadline) { - return diff_bisectSplit(NUtils::to_wstring(text1), NUtils::to_wstring(text2), - x, y, deadline); -} - -diff_match_patch::TVariantVector diff_match_patch::diff_linesToChars( - const std::wstring &text1, const std::wstring &text2) { - TStringVector lineArray; - std::unordered_map lineHash; - // e.g. linearray[4] == "Hello\n" - // e.g. linehash.get("Hello\n") == 4 - - // "\x00" is a valid character, but various debuggers don't like it. - // So we'll insert a junk entry to avoid generating a nullptr character. - lineArray.emplace_back(L""); - - const std::wstring chars1 = - diff_linesToCharsMunge(text1, lineArray, lineHash); - const std::wstring chars2 = - diff_linesToCharsMunge(text2, lineArray, lineHash); - - TVariantVector listRet; - listRet.emplace_back(chars1); - listRet.emplace_back(chars2); - listRet.emplace_back(lineArray); - return listRet; -} - -std::vector -diff_match_patch::diff_linesToChars(const std::string &text1, - const std::string &text2) { - return diff_linesToChars(NUtils::to_wstring(text1), - NUtils::to_wstring(text2)); -} - -std::wstring diff_match_patch::diff_linesToCharsMunge( - const std::wstring &text, TStringVector &lineArray, - std::unordered_map &lineHash) { - std::size_t lineStart = 0; - std::size_t lineEnd = std::string::npos; - std::wstring line; - std::wstring chars; - // Walk the text, pulling out a substring for each line. - // text.split('\n') would would temporarily double our memory footprint. - // Modifying text would create many large strings to garbage collect. - bool firstTime = true; - while ((firstTime && (lineEnd == -1) && !text.empty()) || - lineEnd < (text.length() - 1)) { - firstTime = false; - lineEnd = text.find('\n', lineStart); - if (lineEnd == -1) { - lineEnd = text.length() - 1; - } - line = safeMid(text, lineStart, lineEnd + 1 - lineStart); - - auto pos = lineHash.find(line); - if (pos != lineHash.end()) { - chars += static_cast((*pos).second); - } else { - lineArray.emplace_back(line); - lineHash[line] = lineArray.size() - 1; - chars += static_cast(lineArray.size() - 1); - } - - lineStart = lineEnd + 1; - } - return chars; -} - -void diff_match_patch::diff_charsToLines(TDiffVector &diffs, - const TStringVector &lineArray) { - // Qt has no mutable Qforeach construct. - for (auto &&diff : diffs) { - std::wstring text; - for (auto &&y : diff.text) { - text += lineArray[y]; - } - diff.text = text; - } -} - -std::size_t diff_match_patch::diff_commonPrefix(const std::wstring &text1, - const std::wstring &text2) { - // Performance analysis: http://neil.fraser.name/news/2007/10/09/ - const auto n = std::min(text1.length(), text2.length()); - for (std::size_t i = 0; i < n; i++) { - if (text1[i] != text2[i]) { - return i; - } - } - return n; -} - -std::size_t diff_match_patch::diff_commonPrefix(const std::string &text1, - const std::string &text2) { - return diff_commonPrefix(NUtils::to_wstring(text1), - NUtils::to_wstring(text2)); -} - -std::size_t diff_match_patch::diff_commonSuffix(const std::wstring &text1, - const std::wstring &text2) { - // Performance analysis: http://neil.fraser.name/news/2007/10/09/ - const auto text1_length = text1.length(); - const auto text2_length = text2.length(); - const auto n = std::min(text1_length, text2_length); - for (std::size_t i = 1; i <= n; i++) { - if (text1[text1_length - i] != text2[text2_length - i]) { - return i - 1; - } - } - return n; -} - -std::size_t diff_match_patch::diff_commonSuffix(const std::string &text1, - const std::string &text2) { - return diff_commonSuffix(NUtils::to_wstring(text1), - NUtils::to_wstring(text2)); -} - -std::size_t diff_match_patch::diff_commonOverlap(const std::wstring &text1, - const std::wstring &text2) { - // Cache the text lengths to prevent multiple calls. - const auto text1_length = text1.length(); - const auto text2_length = text2.length(); - // Eliminate the nullptr case. - if (text1_length == 0 || text2_length == 0) { - return 0; - } - // Truncate the longer string. - std::wstring text1_trunc = text1; - std::wstring text2_trunc = text2; - if (text1_length > text2_length) { - text1_trunc = text1.substr(text1_length - text2_length); - } else if (text1_length < text2_length) { - text2_trunc = text2.substr(0, text1_length); - } - const auto text_length = std::min(text1_length, text2_length); - // Quick check for the worst case. - if (text1_trunc == text2_trunc) { - return text_length; - } - - // Start by looking for a single character match - // and increase length until no match is found. - // Performance analysis: http://neil.fraser.name/news/2010/11/04/ - std::size_t best = 0; - std::size_t length = 1; - while (true) { - std::wstring pattern = (length < text1_trunc.length()) - ? text1_trunc.substr(text_length - length) - : std::wstring(); - if (pattern.empty()) return best; - - auto found = text2_trunc.find(pattern); - if (found == std::string::npos) { - return best; - } - length += found; - if (found == 0 || text1_trunc.substr(text_length - length) == - text2_trunc.substr(0, length)) { - best = length; - length++; - } - } -} - -std::size_t diff_match_patch::diff_commonOverlap(const std::string &text1, - const std::string &text2) { - return diff_commonOverlap(NUtils::to_wstring(text1), - NUtils::to_wstring(text2)); -} - -diff_match_patch::TStringVector diff_match_patch::diff_halfMatch( - const std::wstring &text1, const std::wstring &text2) { - if (Diff_Timeout <= 0) { - // Don't risk returning a non-optimal diff if we have unlimited time. - return {}; - } - const std::wstring longtext = text1.length() > text2.length() ? text1 : text2; - const std::wstring shorttext = - text1.length() > text2.length() ? text2 : text1; - if (longtext.length() < 4 || shorttext.length() * 2 < longtext.length()) { - return {}; // Pointless. - } - - // First check if the second quarter is the seed for a half-match. - const TStringVector hm1 = - diff_halfMatchI(longtext, shorttext, (longtext.length() + 3) / 4); - // Check again based on the third quarter. - const TStringVector hm2 = - diff_halfMatchI(longtext, shorttext, (longtext.length() + 1) / 2); - TStringVector hm; - if (hm1.empty() && hm2.empty()) { - return {}; - } else if (hm2.empty()) { - hm = hm1; - } else if (hm1.empty()) { - hm = hm2; - } else { - // Both matched. Select the longest. - hm = hm1[4].length() > hm2[4].length() ? hm1 : hm2; - } - - // A half-match was found, sort out the return data. - if (text1.length() > text2.length()) { - return hm; - } else { - TStringVector listRet({hm[2], hm[3], hm[0], hm[1], hm[4]}); - return listRet; - } -} - -diff_match_patch::TStringVector diff_match_patch::diff_halfMatch( - const std::string &text1, const std::string &text2) { - return diff_halfMatch(NUtils::to_wstring(text1), NUtils::to_wstring(text2)); -} - -diff_match_patch::TStringVector diff_match_patch::diff_halfMatchI( - const std::wstring &longtext, const std::wstring &shorttext, - std::size_t i) { - // Start with a 1/4 length substring at position i as a seed. - const std::wstring seed = safeMid(longtext, i, longtext.length() / 4); - std::size_t j = std::string::npos; - std::wstring best_common; - std::wstring best_longtext_a, best_longtext_b; - std::wstring best_shorttext_a, best_shorttext_b; - while ((j = shorttext.find(seed, j + 1)) != std::string::npos) { - const auto prefixLength = - diff_commonPrefix(safeMid(longtext, i), safeMid(shorttext, j)); - const auto suffixLength = - diff_commonSuffix(longtext.substr(0, i), shorttext.substr(0, j)); - if (best_common.length() < suffixLength + prefixLength) { - best_common = safeMid(shorttext, j - suffixLength, suffixLength) + - safeMid(shorttext, j, prefixLength); - best_longtext_a = longtext.substr(0, i - suffixLength); - best_longtext_b = safeMid(longtext, i + prefixLength); - best_shorttext_a = shorttext.substr(0, j - suffixLength); - best_shorttext_b = safeMid(shorttext, j + prefixLength); - } - } - if (best_common.length() * 2 >= longtext.length()) { - TStringVector listRet({best_longtext_a, best_longtext_b, best_shorttext_a, - best_shorttext_b, best_common}); - return listRet; - } else { - return {}; - } -} - -diff_match_patch::TStringVector diff_match_patch::diff_halfMatchI( - const std::string &longtext, const std::string &shorttext, std::size_t i) { - return diff_halfMatchI(NUtils::to_wstring(longtext), - NUtils::to_wstring(shorttext), i); -} - -void diff_match_patch::diff_cleanupSemantic(TDiffVector &diffs) { - if (diffs.empty()) return; - - bool changes = false; - // Stack of indices where equalities are found. - std::stack equalities; // stack of equalities - // Always equal to equalities[equalitiesLength-1][1] - std::wstring lastEquality; - std::size_t pointer = 0; // Index of current position. - // Number of characters that changed prior to the equality. - std::size_t length_insertions1 = 0; - std::size_t length_deletions1 = 0; - // Number of characters that changed after the equality. - std::size_t length_insertions2 = 0; - std::size_t length_deletions2 = 0; - while (pointer < diffs.size()) { - if (diffs[pointer].operation == EQUAL) { // Equality found. - equalities.push(pointer); - length_insertions1 = length_insertions2; - length_deletions1 = length_deletions2; - length_insertions2 = 0; - length_deletions2 = 0; - lastEquality = diffs[pointer].text; - } else { // an insertion or deletion - if (diffs[pointer].operation == INSERT) { - length_insertions2 += diffs[pointer].text.length(); - } else { - length_deletions2 += diffs[pointer].text.length(); - } - // Eliminate an equality that is smaller or equal to the edits on both - // sides of it. - if (!lastEquality.empty() && - (lastEquality.length() <= - std::max(length_insertions1, length_deletions1)) && - (lastEquality.length() <= - std::max(length_insertions2, length_deletions2))) { - // Duplicate record. - diffs.insert(diffs.begin() + equalities.top(), - Diff(DELETE, lastEquality)); - // Change second copy to insert. - diffs[equalities.top() + 1].operation = INSERT; - // Throw away the equality we just deleted. - equalities.pop(); - if (!equalities.empty()) { - equalities.pop(); - } - pointer = !equalities.empty() ? equalities.top() : -1; - length_insertions1 = 0; // Reset the counters. - length_deletions1 = 0; - length_insertions2 = 0; - length_deletions2 = 0; - lastEquality.clear(); - changes = true; - } - } - pointer++; - } - - // Normalize the diff. - if (changes) { - diff_cleanupMerge(diffs); - } - diff_cleanupSemanticLossless(diffs); - - // Find any overlaps between deletions and insertions. - // e.g: abcxxxxxxdef - // -> abcxxxdef - // e.g: xxxabcdefxxx - // -> defxxxabc - // Only extract an overlap if it is as big as the edit ahead or behind it. - pointer = 1; - while (pointer < diffs.size()) { - if (diffs[pointer - 1].operation == DELETE && - diffs[pointer].operation == INSERT) { - auto deletion = diffs[pointer - 1].text; - auto insertion = diffs[pointer].text; - std::size_t overlap_length1 = diff_commonOverlap(deletion, insertion); - std::size_t overlap_length2 = diff_commonOverlap(insertion, deletion); - if (overlap_length1 >= overlap_length2) { - if (overlap_length1 >= deletion.length() / 2.0 || - overlap_length1 >= insertion.length() / 2.0) { - // Overlap found. - // Insert an equality and trim the surrounding edits. - diffs.emplace(diffs.begin() + pointer, EQUAL, - insertion.substr(0, overlap_length1)); - diffs[pointer - 1].text = - deletion.substr(0, deletion.length() - overlap_length1); - diffs[pointer + 1].text = insertion.substr(overlap_length1); - pointer++; - } - } else { - if (overlap_length2 >= deletion.length() / 2.0 || - overlap_length2 >= insertion.length() / 2.0) { - // Reverse overlap found. - // Insert an equality and swap and trim the surrounding edits. - diffs.emplace(diffs.begin() + pointer, EQUAL, - deletion.substr(0, overlap_length2)); - diffs[pointer - 1].operation = INSERT; - diffs[pointer - 1].text = - insertion.substr(0, insertion.length() - overlap_length2); - diffs[pointer + 1].operation = DELETE; - diffs[pointer + 1].text = deletion.substr(overlap_length2); - pointer++; - } - } - pointer++; - } - pointer++; - } -} - -void diff_match_patch::diff_cleanupSemanticLossless(TDiffVector &diffs) { - int pointer = 1; - // Intentionally ignore the first and last element (don't need checking). - while ((pointer != -1) && !diffs.empty() && (pointer < (diffs.size() - 1))) { - if (diffs[pointer - 1].operation == EQUAL && - diffs[pointer + 1].operation == EQUAL) { - // This is a single edit surrounded by equalities. - auto equality1 = diffs[pointer - 1].text; - auto edit = diffs[pointer].text; - auto equality2 = diffs[pointer + 1].text; - - // First, shift the edit as far left as possible. - auto commonOffset = diff_commonSuffix(equality1, edit); - if (commonOffset > 0) { - auto commonString = safeMid(edit, edit.length() - commonOffset); - equality1 = equality1.substr(0, equality1.length() - commonOffset); - edit = commonString + edit.substr(0, edit.length() - commonOffset); - equality2 = commonString + equality2; - } - - // Second, step character by character right, - // looking for the best fit. - auto bestEquality1 = equality1; - auto bestEdit = edit; - auto bestEquality2 = equality2; - auto bestScore = diff_cleanupSemanticScore(equality1, edit) + - diff_cleanupSemanticScore(edit, equality2); - while (!edit.empty() && !equality2.empty() && edit[0] == equality2[0]) { - equality1 += edit[0]; - edit = edit.substr(1) + equality2[0]; - equality2 = equality2.substr(1); - auto score = diff_cleanupSemanticScore(equality1, edit) + - diff_cleanupSemanticScore(edit, equality2); - // The >= encourages trailing rather than leading whitespace on - // edits. - if (score >= bestScore) { - bestScore = score; - bestEquality1 = equality1; - bestEdit = edit; - bestEquality2 = equality2; - } - } - - if (diffs[pointer - 1].text != bestEquality1) { - // We have an improvement, save it back to the diff. - if (!bestEquality1.empty()) { - diffs[pointer - 1].text = bestEquality1; - } else { - diffs.erase(diffs.begin() + pointer - 1); - pointer--; - } - diffs[pointer].text = bestEdit; - if (!bestEquality2.empty()) { - diffs[pointer + 1].text = bestEquality2; - } else { - diffs.erase(diffs.begin() + pointer + 1); - pointer--; - } - } - } - pointer++; - } -} - -int64_t diff_match_patch::diff_cleanupSemanticScore(const std::wstring &one, - const std::wstring &two) { - if (one.empty() || two.empty()) { - // Edges are the best. - return 6; - } - - // Each port of this function behaves slightly differently due to - // subtle differences in each language's definition of things like - // 'whitespace'. Since this function's purpose is largely cosmetic, - // the choice has been made to use each language's native features - // rather than force total conformity. - auto char1 = one[one.length() - 1]; - auto char2 = two[0]; - bool nonAlphaNumeric1 = !std::iswalnum(char1); - bool nonAlphaNumeric2 = !std::iswalnum(char2); - bool whitespace1 = nonAlphaNumeric1 && std::iswspace(char1); - bool whitespace2 = nonAlphaNumeric2 && std::iswspace(char2); - bool lineBreak1 = whitespace1 && std::iswcntrl(char1); - bool lineBreak2 = whitespace2 && std::iswcntrl(char2); - bool blankLine1 = lineBreak1 && std::regex_search(one, BLANKLINEEND); - bool blankLine2 = lineBreak2 && std::regex_search(two, BLANKLINESTART); - - if (blankLine1 || blankLine2) { - // Five points for blank lines. - return 5; - } else if (lineBreak1 || lineBreak2) { - // Four points for line breaks. - return 4; - } else if (nonAlphaNumeric1 && !whitespace1 && whitespace2) { - // Three points for end of sentences. - return 3; - } else if (whitespace1 || whitespace2) { - // Two points for whitespace. - return 2; - } else if (nonAlphaNumeric1 || nonAlphaNumeric2) { - // One point for non-alphanumeric. - return 1; - } - return 0; -} - -int64_t diff_match_patch::diff_cleanupSemanticScore(const std::string &one, - const std::string &two) { - return diff_cleanupSemanticScore(NUtils::to_wstring(one), - NUtils::to_wstring(two)); -} - -// Define some regex patterns for matching boundaries. -std::wregex diff_match_patch::BLANKLINEEND = std::wregex(LR"(\n\r?\n$)"); -std::wregex diff_match_patch::BLANKLINESTART = std::wregex(LR"(^\r?\n\r?\n)"); - -void diff_match_patch::diff_cleanupEfficiency(TDiffVector &diffs) { - bool changes = false; - // Stack of indices where equalities are found. - std::stack equalities; - // Always equal to equalities[equalitiesLength-1][1] - std::wstring lastEquality; - std::size_t pointer = 0; // Index of current position. - // Is there an insertion operation before the last equality. - bool pre_ins = false; - // Is there a deletion operation before the last equality. - bool pre_del = false; - // Is there an insertion operation after the last equality. - bool post_ins = false; - // Is there a deletion operation after the last equality. - bool post_del = false; - while (pointer < diffs.size()) { - if (diffs[pointer].operation == EQUAL) { // Equality found. - if (diffs[pointer].text.length() < Diff_EditCost && - (post_ins || post_del)) { - // Candidate found. - equalities.push(pointer); - pre_ins = post_ins; - pre_del = post_del; - lastEquality = diffs[pointer].text; - } else { - // Not a candidate, and can never become one. - equalities = {}; - lastEquality.clear(); - } - post_ins = post_del = false; - } else { // An insertion or deletion. - if (diffs[pointer].operation == DELETE) { - post_del = true; - } else { - post_ins = true; - } - /* + std::wstring Patch::toString() const + { + auto text = getPatchHeader(); + // Escape the body of the patch with %xx notation. + for ( auto &&aDiff : diffs ) + { + text += aDiff.toString( EStringType::ePatch ); + } + + return text; + } + + std::wstring Patch::getPatchHeader() const + { + auto coords1 = getCoordinateString( start1, length1 ); + auto coords2 = getCoordinateString( start2, length2 ); + auto text = std::wstring( L"@@ -" ) + coords1 + std::wstring( L" +" ) + coords2 + std::wstring( L" @@\n" ); + return text; + } + + std::wstring Patch::getCoordinateString( std::size_t start, std::size_t length ) const + { + std::wstring retVal; + if ( length == 0 ) + { + retVal = std::to_wstring( start ) + std::wstring( L",0" ); + } + else if ( length == 1 ) + { + retVal = std::to_wstring( start + 1 ); + } + else + { + retVal = std::to_wstring( start + 1 ) + std::wstring( L"," ) + std::to_wstring( length ); + } + return retVal; + } + + ///////////////////////////////////////////// + // + // diff_match_patch Class + // + ///////////////////////////////////////////// + + // all class members initialized in the class + diff_match_patch::diff_match_patch() + { + } + + TDiffVector diff_match_patch::diff_main( const std::wstring &text1, const std::wstring &text2 ) + { + return diff_main( text1, text2, true ); + } + + TDiffVector diff_match_patch::diff_main( const std::wstring &text1, const std::wstring &text2, bool checklines ) + { + // Set a deadline by which time the diff must be complete. + clock_t deadline; + if ( Diff_Timeout <= 0 ) + { + deadline = std::numeric_limits< clock_t >::max(); + } + else + { + deadline = clock() + (clock_t)( Diff_Timeout * CLOCKS_PER_SEC ); + } + return diff_main( text1, text2, checklines, deadline ); + } + + TDiffVector diff_match_patch::diff_main( const std::wstring &text1, const std::wstring &text2, bool checklines, clock_t deadline ) + { + // Check for equality (speedup). + TDiffVector diffs; + if ( text1 == text2 ) + { + if ( !text1.empty() ) + { + diffs.emplace_back( EOperation::eEQUAL, text1 ); + } + return diffs; + } + + if ( !text1.empty() && text2.empty() ) + { + diffs.emplace_back( EOperation::eDELETE, text1 ); + return diffs; + } + + if ( text1.empty() && !text2.empty() ) + { + diffs.emplace_back( EOperation::eINSERT, text2 ); + return diffs; + } + + // Trim off common prefix (speedup). + auto commonlength = diff_commonPrefix( text1, text2 ); + auto commonprefix = text1.substr( 0, commonlength ); + auto textChopped1 = text1.substr( commonlength ); + auto textChopped2 = text2.substr( commonlength ); + + // Trim off common suffix (speedup). + commonlength = diff_commonSuffix( textChopped1, textChopped2 ); + auto commonsuffix = textChopped1.substr( textChopped1.length() - commonlength ); + textChopped1 = textChopped1.substr( 0, textChopped1.length() - commonlength ); + textChopped2 = textChopped2.substr( 0, textChopped2.length() - commonlength ); + + // Compute the diff on the middle block. + diffs = diff_compute( textChopped1, textChopped2, checklines, deadline ); + + // Restore the prefix and suffix. + if ( !commonprefix.empty() ) + { + diffs.emplace( diffs.begin(), EOperation::eEQUAL, commonprefix ); + } + if ( !commonsuffix.empty() ) + { + diffs.emplace_back( EOperation::eEQUAL, commonsuffix ); + } + + diff_cleanupMerge( diffs ); + + return diffs; + } + + TDiffVector diff_match_patch::diff_main( const std::string &text1, const std::string &text2 ) + { + return diff_main( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ) ); + } + + TDiffVector diff_match_patch::diff_main( const std::string &text1, const std::string &text2, bool checklines ) + { + return diff_main( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ), checklines ); + } + + TDiffVector diff_match_patch::diff_main( const std::string &text1, const std::string &text2, bool checklines, clock_t deadline ) + { + return diff_main( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ), checklines, deadline ); + } + + TDiffVector diff_match_patch::diff_compute( const std::wstring &text1, const std::wstring &text2, bool checklines, clock_t deadline ) + { + TDiffVector diffs; + + if ( text1.empty() ) + { + // Just add some text (speedup). + diffs.emplace_back( EOperation::eINSERT, text2 ); + return diffs; + } + + if ( text2.empty() ) + { + // Just delete some text (speedup). + diffs.emplace_back( EOperation::eDELETE, text1 ); + return diffs; + } + + { + auto [ longtext, shorttext ] = ( text1.length() > text2.length() ) ? std::make_pair( text1, text2 ) : std::make_pair( text2, text1 ); + auto i = longtext.find( shorttext ); + if ( i != std::string::npos ) + { + // Shorter text is inside the longer text (speedup). + const auto op = ( text1.length() > text2.length() ) ? EOperation::eDELETE : EOperation::eINSERT; + diffs.emplace_back( op, longtext.substr( 0, i ) ); + diffs.emplace_back( EOperation::eEQUAL, shorttext ); + diffs.emplace_back( op, safeMid( longtext, i + shorttext.length() ) ); + return diffs; + } + + if ( shorttext.length() == 1 ) + { + // Single character string. + // After the previous speedup, the character can't be an equality. + diffs.emplace_back( EOperation::eDELETE, text1 ); + diffs.emplace_back( EOperation::eINSERT, text2 ); + return diffs; + } + // Garbage collect longtext and shorttext by scoping out. + } + + // Check to see if the problem can be split in two. + const TStringVector hm = diff_halfMatch( text1, text2 ); + if ( !hm.empty() ) + { + // A half-match was found, sort out the return data. + auto &&text1_a = hm[ 0 ]; + auto &&text1_b = hm[ 1 ]; + auto &&text2_a = hm[ 2 ]; + auto &&text2_b = hm[ 3 ]; + auto &&mid_common = hm[ 4 ]; + // Send both pairs off for separate processing. + diffs = diff_main( text1_a, text2_a, checklines, deadline ); + const auto &&diffs_b = diff_main( text1_b, text2_b, checklines, deadline ); + // Merge the results. + diffs.emplace_back( EOperation::eEQUAL, mid_common ); + diffs.insert( diffs.end(), diffs_b.begin(), diffs_b.end() ); + return diffs; + } + + // Perform a real diff. + if ( checklines && ( text1.length() > 100 ) && ( text2.length() > 100 ) ) + { + return diff_lineMode( text1, text2, deadline ); + } + + return diff_bisect( text1, text2, deadline ); + } + + TDiffVector diff_match_patch::diff_compute( const std::string &text1, const std::string &text2, bool checklines, clock_t deadline ) + { + return diff_compute( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ), checklines, deadline ); + } + + TDiffVector diff_match_patch::diff_lineMode( std::wstring text1, std::wstring text2, clock_t deadline ) + { + // Scan the text on a line-by-line basis first. + auto a = diff_linesToChars( text1, text2 ); + text1 = std::get< std::wstring >( a[ 0 ] ); + text2 = std::get< std::wstring >( a[ 1 ] ); + auto linearray = std::get< TStringVector >( a[ 2 ] ); + + auto diffs = diff_main( text1, text2, false, deadline ); + + // Convert the diff back to original text. + diff_charsToLines( diffs, linearray ); + // Eliminate freak matches (e.g. blank lines) + diff_cleanupSemantic( diffs ); + + // Rediff any replacement blocks, this time character-by-character. + // Add a dummy entry at the end. + diffs.emplace_back( EOperation::eEQUAL, L"" ); + std::size_t pointer = 0; + int count_delete = 0; + int count_insert = 0; + std::wstring text_delete; + std::wstring text_insert; + while ( pointer < diffs.size() ) + { + if ( diffs[ pointer ].isInsert() ) + { + count_insert++; + text_insert += diffs[ pointer ].text(); + } + else if ( diffs[ pointer ].isDelete() ) + { + count_delete++; + text_delete += diffs[ pointer ].text(); + } + else if ( diffs[ pointer ].isEqual() ) + { // Upon reaching an equality, check for prior redundancies. + if ( count_delete >= 1 && count_insert >= 1 ) + { + // Delete the offending records and add the merged ones. + auto numElements = count_delete + count_insert; + auto start = diffs.begin() + pointer - numElements; + auto end = start + numElements; + diffs.erase( start, end ); + pointer = pointer - count_delete - count_insert; + auto subDiff = diff_main( text_delete, text_insert, false, deadline ); + diffs.insert( diffs.begin() + pointer, subDiff.begin(), subDiff.end() ); + pointer = pointer + subDiff.size(); + } + count_insert = 0; + count_delete = 0; + text_delete.clear(); + text_insert.clear(); + } + pointer++; + } + diffs.pop_back(); // Remove the dummy entry at the end. + + return diffs; + } + + TDiffVector diff_match_patch::diff_lineMode( std::string text1, std::string text2, clock_t deadline ) + { + return diff_lineMode( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ), deadline ); + } + + // using int64_t rather thant size_t due to the backward walking nature of the + // algorithm + TDiffVector diff_match_patch::diff_bisect( const std::wstring &text1, const std::wstring &text2, clock_t deadline ) + { + // Cache the text lengths to prevent multiple calls. + auto text1_length = static_cast< int64_t >( text1.length() ); + auto text2_length = static_cast< int64_t >( text2.length() ); + auto max_d = ( text1_length + text2_length + 1 ) / 2; + auto v_offset = max_d; + auto v_length = 2 * max_d; + auto v1 = std::vector< int64_t >( v_length, -1 ); + auto v2 = std::vector< int64_t >( v_length, -1 ); + v1[ v_offset + 1 ] = 0; + v2[ v_offset + 1 ] = 0; + auto delta = text1_length - text2_length; + // If the total number of characters is odd, then the front path will + // collide with the reverse path. + bool front = ( delta % 2 != 0 ); + // Offsets for start and end of k loop. + // Prevents mapping of space beyond the grid. + int64_t k1start = 0; + int64_t k1end = 0; + int64_t k2start = 0; + int64_t k2end = 0; + for ( int64_t d = 0; d < max_d; d++ ) + { + // Bail out if deadline is reached. + if ( clock() > deadline ) + { + break; + } + + // Walk the front path one step. + for ( auto k1 = -d + k1start; k1 <= d - k1end; k1 += 2 ) + { + auto k1_offset = v_offset + k1; + int64_t x1; + if ( ( k1 == -d ) || ( k1 != d ) && ( v1[ k1_offset - 1 ] < v1[ k1_offset + 1 ] ) ) + { + x1 = v1[ k1_offset + 1 ]; + } + else + { + x1 = v1[ k1_offset - 1 ] + 1; + } + int64_t y1 = x1 - k1; + while ( ( x1 < text1_length ) && ( y1 < text2_length ) && ( text1[ x1 ] == text2[ y1 ] ) ) + { + x1++; + y1++; + } + v1[ k1_offset ] = x1; + if ( x1 > text1_length ) + { + // Ran off the right of the graph. + k1end += 2; + } + else if ( y1 > text2_length ) + { + // Ran off the bottom of the graph. + k1start += 2; + } + else if ( front ) + { + auto k2_offset = v_offset + delta - k1; + if ( ( k2_offset >= 0 ) && ( k2_offset < v_length ) && ( v2[ k2_offset ] != -1 ) ) + { + // Mirror x2 onto top-left coordinate system. + auto x2 = text1_length - v2[ k2_offset ]; + if ( x1 >= x2 ) + { + // Overlap detected. + return diff_bisectSplit( text1, text2, x1, y1, deadline ); + } + } + } + } + + // Walk the reverse path one step. + for ( auto k2 = -d + k2start; k2 <= d - k2end; k2 += 2 ) + { + auto k2_offset = v_offset + k2; + int64_t x2; + if ( ( k2 == -d ) || ( k2 != d ) && ( v2[ k2_offset - 1 ] < v2[ k2_offset + 1 ] ) ) + { + x2 = v2[ k2_offset + 1 ]; + } + else + { + x2 = v2[ k2_offset - 1 ] + 1; + } + auto y2 = x2 - k2; + while ( ( x2 < text1_length ) && ( y2 < text2_length ) && ( text1[ text1_length - x2 - 1 ] == text2[ text2_length - y2 - 1 ] ) ) + { + x2++; + y2++; + } + v2[ k2_offset ] = x2; + if ( x2 > text1_length ) + { + // Ran off the left of the graph. + k2end += 2; + } + else if ( y2 > text2_length ) + { + // Ran off the top of the graph. + k2start += 2; + } + else if ( !front ) + { + auto k1_offset = v_offset + delta - k2; + if ( ( k1_offset >= 0 ) && ( k1_offset < v_length ) && ( v1[ k1_offset ] != -1 ) ) + { + auto x1 = v1[ k1_offset ]; + auto y1 = v_offset + x1 - k1_offset; + // Mirror x2 onto top-left coordinate system. + x2 = text1_length - v2[ k2_offset ]; + if ( x1 >= x2 ) + { + // Overlap detected. + return diff_bisectSplit( text1, text2, x1, y1, deadline ); + } + } + } + } + } + // Diff took too long and hit the deadline or + // number of diffs equals number of characters, no commonality at all. + auto diffs = TDiffVector( { Diff( EOperation::eDELETE, text1 ), Diff( EOperation::eINSERT, text2 ) } ); + return diffs; + } + + TDiffVector diff_match_patch::diff_bisect( const std::string &text1, const std::string &text2, clock_t deadline ) + { + return diff_bisect( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ), deadline ); + } + + TDiffVector diff_match_patch::diff_bisectSplit( const std::wstring &text1, const std::wstring &text2, std::size_t x, std::size_t y, clock_t deadline ) + { + auto text1a = text1.substr( 0, x ); + auto text2a = text2.substr( 0, y ); + auto text1b = safeMid( text1, x ); + auto text2b = safeMid( text2, y ); + + // Compute both diffs serially. + TDiffVector diffs = diff_main( text1a, text2a, false, deadline ); + TDiffVector diffsb = diff_main( text1b, text2b, false, deadline ); + + diffs.insert( diffs.end(), diffsb.begin(), diffsb.end() ); + return diffs; + } + + TDiffVector diff_match_patch::diff_bisectSplit( const std::string &text1, const std::string &text2, std::size_t x, std::size_t y, clock_t deadline ) + { + return diff_bisectSplit( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ), x, y, deadline ); + } + + diff_match_patch::TVariantVector diff_match_patch::diff_linesToChars( const std::wstring &text1, const std::wstring &text2 ) + { + TStringVector lineArray; + std::unordered_map< std::wstring, std::size_t > lineHash; + // e.g. linearray[4] == "Hello\n" + // e.g. linehash.get("Hello\n") == 4 + + // "\x00" is a valid character, but various debuggers don't like it. + // So we'll insert a junk entry to avoid generating a nullptr character. + lineArray.emplace_back( L"" ); + + const std::wstring chars1 = diff_linesToCharsMunge( text1, lineArray, lineHash ); + const std::wstring chars2 = diff_linesToCharsMunge( text2, lineArray, lineHash ); + + TVariantVector listRet; + listRet.emplace_back( chars1 ); + listRet.emplace_back( chars2 ); + listRet.emplace_back( lineArray ); + return listRet; + } + + std::vector< diff_match_patch::diff_match_patch::TVariant > diff_match_patch::diff_linesToChars( const std::string &text1, const std::string &text2 ) + { + return diff_linesToChars( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ) ); + } + + std::wstring diff_match_patch::diff_linesToCharsMunge( const std::wstring &text, TStringVector &lineArray, std::unordered_map< std::wstring, std::size_t > &lineHash ) + { + std::size_t lineStart = 0; + std::size_t lineEnd = std::string::npos; + std::wstring line; + std::wstring chars; + // Walk the text, pulling out a substring for each line. + // text.split('\n') would would temporarily double our memory footprint. + // Modifying text would create many large strings to garbage collect. + bool firstTime = true; + while ( ( firstTime && ( lineEnd == -1 ) && !text.empty() ) || lineEnd < ( text.length() - 1 ) ) + { + firstTime = false; + lineEnd = text.find( '\n', lineStart ); + if ( lineEnd == -1 ) + { + lineEnd = text.length() - 1; + } + line = safeMid( text, lineStart, lineEnd + 1 - lineStart ); + + auto pos = lineHash.find( line ); + if ( pos != lineHash.end() ) + { + chars += static_cast< wchar_t >( ( *pos ).second ); + } + else + { + lineArray.emplace_back( line ); + lineHash[ line ] = lineArray.size() - 1; + chars += static_cast< wchar_t >( lineArray.size() - 1 ); + } + + lineStart = lineEnd + 1; + } + return chars; + } + + void diff_match_patch::diff_charsToLines( TDiffVector &diffs, const TStringVector &lineArray ) + { + for ( auto &&diff : diffs ) + { + std::wstring text; + for ( auto &&y : diff.text() ) + { + text += lineArray[ y ]; + } + diff.fText = text; + } + } + + std::size_t diff_match_patch::diff_commonPrefix( const std::wstring &text1, const std::wstring &text2 ) + { + // Performance analysis: http://neil.fraser.name/news/2007/10/09/ + const auto n = std::min( text1.length(), text2.length() ); + for ( std::size_t i = 0; i < n; i++ ) + { + if ( text1[ i ] != text2[ i ] ) + { + return i; + } + } + return n; + } + + std::size_t diff_match_patch::diff_commonPrefix( const std::string &text1, const std::string &text2 ) + { + return diff_commonPrefix( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ) ); + } + + std::size_t diff_match_patch::diff_commonSuffix( const std::wstring &text1, const std::wstring &text2 ) + { + // Performance analysis: http://neil.fraser.name/news/2007/10/09/ + const auto text1_length = text1.length(); + const auto text2_length = text2.length(); + const auto n = std::min( text1_length, text2_length ); + for ( std::size_t i = 1; i <= n; i++ ) + { + if ( text1[ text1_length - i ] != text2[ text2_length - i ] ) + { + return i - 1; + } + } + return n; + } + + std::size_t diff_match_patch::diff_commonSuffix( const std::string &text1, const std::string &text2 ) + { + return diff_commonSuffix( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ) ); + } + + std::size_t diff_match_patch::diff_commonOverlap( const std::wstring &text1, const std::wstring &text2 ) + { + // Cache the text lengths to prevent multiple calls. + const auto text1_length = text1.length(); + const auto text2_length = text2.length(); + // Eliminate the nullptr case. + if ( text1_length == 0 || text2_length == 0 ) + { + return 0; + } + // Truncate the longer string. + std::wstring text1_trunc = text1; + std::wstring text2_trunc = text2; + if ( text1_length > text2_length ) + { + text1_trunc = text1.substr( text1_length - text2_length ); + } + else if ( text1_length < text2_length ) + { + text2_trunc = text2.substr( 0, text1_length ); + } + const auto text_length = std::min( text1_length, text2_length ); + // Quick check for the worst case. + if ( text1_trunc == text2_trunc ) + { + return text_length; + } + + // Start by looking for a single character match + // and increase length until no match is found. + // Performance analysis: http://neil.fraser.name/news/2010/11/04/ + std::size_t best = 0; + std::size_t length = 1; + while ( true ) + { + std::wstring pattern = ( length < text1_trunc.length() ) ? text1_trunc.substr( text_length - length ) : std::wstring(); + if ( pattern.empty() ) + return best; + + auto found = text2_trunc.find( pattern ); + if ( found == std::string::npos ) + { + return best; + } + length += found; + if ( found == 0 || text1_trunc.substr( text_length - length ) == text2_trunc.substr( 0, length ) ) + { + best = length; + length++; + } + } + } + + std::size_t diff_match_patch::diff_commonOverlap( const std::string &text1, const std::string &text2 ) + { + return diff_commonOverlap( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ) ); + } + + diff_match_patch::TStringVector diff_match_patch::diff_halfMatch( const std::wstring &text1, const std::wstring &text2 ) + { + if ( Diff_Timeout <= 0 ) + { + // Don't risk returning a non-optimal diff if we have unlimited time. + return {}; + } + const std::wstring longtext = text1.length() > text2.length() ? text1 : text2; + const std::wstring shorttext = text1.length() > text2.length() ? text2 : text1; + if ( ( longtext.length() < 4 ) || ( ( shorttext.length() * 2 ) < longtext.length() ) ) + { + return {}; // Pointless. + } + + // First check if the second quarter is the seed for a half-match. + const TStringVector hm1 = diff_halfMatchI( longtext, shorttext, ( longtext.length() + 3 ) / 4 ); + // Check again based on the third quarter. + const TStringVector hm2 = diff_halfMatchI( longtext, shorttext, ( longtext.length() + 1 ) / 2 ); + TStringVector hm; + if ( hm1.empty() && hm2.empty() ) + { + return {}; + } + else if ( hm2.empty() ) + { + hm = hm1; + } + else if ( hm1.empty() ) + { + hm = hm2; + } + else + { + // Both matched. Select the longest. + hm = hm1[ 4 ].length() > hm2[ 4 ].length() ? hm1 : hm2; + } + + // A half-match was found, sort out the return data. + if ( text1.length() > text2.length() ) + { + return hm; + } + else + { + TStringVector listRet( { hm[ 2 ], hm[ 3 ], hm[ 0 ], hm[ 1 ], hm[ 4 ] } ); + return listRet; + } + } + + diff_match_patch::TStringVector diff_match_patch::diff_halfMatch( const std::string &text1, const std::string &text2 ) + { + return diff_halfMatch( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ) ); + } + + diff_match_patch::TStringVector diff_match_patch::diff_halfMatchI( const std::wstring &longtext, const std::wstring &shorttext, std::size_t i ) + { + // Start with a 1/4 length substring at position i as a seed. + const std::wstring seed = safeMid( longtext, i, longtext.length() / 4 ); + std::size_t j = std::string::npos; + std::wstring best_common; + std::wstring best_longtext_a, best_longtext_b; + std::wstring best_shorttext_a, best_shorttext_b; + while ( ( j = shorttext.find( seed, j + 1 ) ) != std::string::npos ) + { + const auto prefixLength = diff_commonPrefix( safeMid( longtext, i ), safeMid( shorttext, j ) ); + const auto suffixLength = diff_commonSuffix( longtext.substr( 0, i ), shorttext.substr( 0, j ) ); + if ( best_common.length() < suffixLength + prefixLength ) + { + best_common = safeMid( shorttext, j - suffixLength, suffixLength ) + safeMid( shorttext, j, prefixLength ); + best_longtext_a = longtext.substr( 0, i - suffixLength ); + best_longtext_b = safeMid( longtext, i + prefixLength ); + best_shorttext_a = shorttext.substr( 0, j - suffixLength ); + best_shorttext_b = safeMid( shorttext, j + prefixLength ); + } + } + if ( best_common.length() * 2 >= longtext.length() ) + { + TStringVector listRet( { best_longtext_a, best_longtext_b, best_shorttext_a, best_shorttext_b, best_common } ); + return listRet; + } + else + { + return {}; + } + } + + diff_match_patch::TStringVector diff_match_patch::diff_halfMatchI( const std::string &longtext, const std::string &shorttext, std::size_t i ) + { + return diff_halfMatchI( NUtils::to_wstring( longtext ), NUtils::to_wstring( shorttext ), i ); + } + + void diff_match_patch::diff_cleanupSemantic( TDiffVector &diffs ) + { + if ( diffs.empty() ) + return; + + bool changes = false; + // Stack of indices where equalities are found. + std::stack< std::size_t > equalities; // stack of equalities + // Always equal to equalities[equalitiesLength-1][1] + std::wstring lastEquality; + std::size_t pointer = 0; // Index of current position. + // Number of characters that changed prior to the equality. + std::size_t length_insertions1 = 0; + std::size_t length_deletions1 = 0; + // Number of characters that changed after the equality. + std::size_t length_insertions2 = 0; + std::size_t length_deletions2 = 0; + while ( pointer < diffs.size() ) + { + if ( diffs[ pointer ].isEqual() ) + { // Equality found. + equalities.push( pointer ); + length_insertions1 = length_insertions2; + length_deletions1 = length_deletions2; + length_insertions2 = 0; + length_deletions2 = 0; + lastEquality = diffs[ pointer ].text(); + } + else + { // an insertion or deletion + if ( diffs[ pointer ].isInsert() ) + { + length_insertions2 += diffs[ pointer ].text().length(); + } + else + { + length_deletions2 += diffs[ pointer ].text().length(); + } + // Eliminate an equality that is smaller or equal to the edits on both + // sides of it. + if ( !lastEquality.empty() && ( lastEquality.length() <= std::max( length_insertions1, length_deletions1 ) ) && ( lastEquality.length() <= std::max( length_insertions2, length_deletions2 ) ) ) + { + // Duplicate record. + diffs.insert( diffs.begin() + equalities.top(), Diff( EOperation::eDELETE, lastEquality ) ); + // Change second copy to insert. + diffs[ equalities.top() + 1 ].fOperation = EOperation::eINSERT; + // Throw away the equality we just deleted. + equalities.pop(); + if ( !equalities.empty() ) + { + equalities.pop(); + } + pointer = !equalities.empty() ? equalities.top() : -1; + length_insertions1 = 0; // Reset the counters. + length_deletions1 = 0; + length_insertions2 = 0; + length_deletions2 = 0; + lastEquality.clear(); + changes = true; + } + } + pointer++; + } + + // Normalize the diff. + if ( changes ) + { + diff_cleanupMerge( diffs ); + } + diff_cleanupSemanticLossless( diffs ); + + // Find any overlaps between deletions and insertions. + // e.g: abcxxxxxxdef + // -> abcxxxdef + // e.g: xxxabcdefxxx + // -> defxxxabc + // Only extract an overlap if it is as big as the edit ahead or behind it. + pointer = 1; + while ( pointer < diffs.size() ) + { + if ( diffs[ pointer - 1 ].isDelete() && diffs[ pointer ].isInsert() ) + { + auto deletion = diffs[ pointer - 1 ].text(); + auto insertion = diffs[ pointer ].text(); + std::size_t overlap_length1 = diff_commonOverlap( deletion, insertion ); + std::size_t overlap_length2 = diff_commonOverlap( insertion, deletion ); + if ( overlap_length1 >= overlap_length2 ) + { + if ( overlap_length1 >= deletion.length() / 2.0 || overlap_length1 >= insertion.length() / 2.0 ) + { + // Overlap found. + // Insert an equality and trim the surrounding edits. + diffs.emplace( diffs.begin() + pointer, EOperation::eEQUAL, insertion.substr( 0, overlap_length1 ) ); + diffs[ pointer - 1 ].fText = deletion.substr( 0, deletion.length() - overlap_length1 ); + diffs[ pointer + 1 ].fText = insertion.substr( overlap_length1 ); + pointer++; + } + } + else + { + if ( overlap_length2 >= deletion.length() / 2.0 || overlap_length2 >= insertion.length() / 2.0 ) + { + // Reverse overlap found. + // Insert an equality and swap and trim the surrounding edits. + diffs.emplace( diffs.begin() + pointer, EOperation::eEQUAL, deletion.substr( 0, overlap_length2 ) ); + diffs[ pointer - 1 ].fOperation = EOperation::eINSERT; + diffs[ pointer - 1 ].fText = insertion.substr( 0, insertion.length() - overlap_length2 ); + diffs[ pointer + 1 ].fOperation = EOperation::eDELETE; + diffs[ pointer + 1 ].fText = deletion.substr( overlap_length2 ); + pointer++; + } + } + pointer++; + } + pointer++; + } + } + + void diff_match_patch::diff_cleanupSemanticLossless( TDiffVector &diffs ) + { + int pointer = 1; + // Intentionally ignore the first and last element (don't need checking). + while ( ( pointer != -1 ) && !diffs.empty() && ( pointer < ( diffs.size() - 1 ) ) ) + { + if ( diffs[ pointer - 1 ].isEqual() && diffs[ pointer + 1 ].isEqual() ) + { + // This is a single edit surrounded by equalities. + auto equality1 = diffs[ pointer - 1 ].text(); + auto edit = diffs[ pointer ].text(); + auto equality2 = diffs[ pointer + 1 ].text(); + + // First, shift the edit as far left as possible. + auto commonOffset = diff_commonSuffix( equality1, edit ); + if ( commonOffset > 0 ) + { + auto commonString = safeMid( edit, edit.length() - commonOffset ); + equality1 = equality1.substr( 0, equality1.length() - commonOffset ); + edit = commonString + edit.substr( 0, edit.length() - commonOffset ); + equality2 = commonString + equality2; + } + + // Second, step character by character right, + // looking for the best fit. + auto bestEquality1 = equality1; + auto bestEdit = edit; + auto bestEquality2 = equality2; + auto bestScore = diff_cleanupSemanticScore( equality1, edit ) + diff_cleanupSemanticScore( edit, equality2 ); + while ( !edit.empty() && !equality2.empty() && edit[ 0 ] == equality2[ 0 ] ) + { + equality1 += edit[ 0 ]; + edit = edit.substr( 1 ) + equality2[ 0 ]; + equality2 = equality2.substr( 1 ); + auto score = diff_cleanupSemanticScore( equality1, edit ) + diff_cleanupSemanticScore( edit, equality2 ); + // The >= encourages trailing rather than leading whitespace on + // edits. + if ( score >= bestScore ) + { + bestScore = score; + bestEquality1 = equality1; + bestEdit = edit; + bestEquality2 = equality2; + } + } + + if ( diffs[ pointer - 1 ].text() != bestEquality1 ) + { + // We have an improvement, save it back to the diff. + if ( !bestEquality1.empty() ) + { + diffs[ pointer - 1 ].fText = bestEquality1; + } + else + { + diffs.erase( diffs.begin() + pointer - 1 ); + pointer--; + } + diffs[ pointer ].fText = bestEdit; + if ( !bestEquality2.empty() ) + { + diffs[ pointer + 1 ].fText = bestEquality2; + } + else + { + diffs.erase( diffs.begin() + pointer + 1 ); + pointer--; + } + } + } + pointer++; + } + } + + int64_t diff_match_patch::diff_cleanupSemanticScore( const std::wstring &one, const std::wstring &two ) + { + if ( one.empty() || two.empty() ) + { + // Edges are the best. + return 6; + } + + // Each port of this function behaves slightly differently due to + // subtle differences in each language's definition of things like + // 'whitespace'. Since this function's purpose is largely cosmetic, + // the choice has been made to use each language's native features + // rather than force total conformity. + auto char1 = one[ one.length() - 1 ]; + auto char2 = two[ 0 ]; + bool nonAlphaNumeric1 = !std::iswalnum( char1 ); + bool nonAlphaNumeric2 = !std::iswalnum( char2 ); + bool whitespace1 = nonAlphaNumeric1 && std::iswspace( char1 ); + bool whitespace2 = nonAlphaNumeric2 && std::iswspace( char2 ); + bool lineBreak1 = whitespace1 && std::iswcntrl( char1 ); + bool lineBreak2 = whitespace2 && std::iswcntrl( char2 ); + bool blankLine1 = lineBreak1 && std::regex_search( one, BLANKLINEEND ); + bool blankLine2 = lineBreak2 && std::regex_search( two, BLANKLINESTART ); + + if ( blankLine1 || blankLine2 ) + { + // Five points for blank lines. + return 5; + } + else if ( lineBreak1 || lineBreak2 ) + { + // Four points for line breaks. + return 4; + } + else if ( nonAlphaNumeric1 && !whitespace1 && whitespace2 ) + { + // Three points for end of sentences. + return 3; + } + else if ( whitespace1 || whitespace2 ) + { + // Two points for whitespace. + return 2; + } + else if ( nonAlphaNumeric1 || nonAlphaNumeric2 ) + { + // One point for non-alphanumeric. + return 1; + } + return 0; + } + + int64_t diff_match_patch::diff_cleanupSemanticScore( const std::string &one, const std::string &two ) + { + return diff_cleanupSemanticScore( NUtils::to_wstring( one ), NUtils::to_wstring( two ) ); + } + + // Define some regex patterns for matching boundaries. + std::wregex diff_match_patch::BLANKLINEEND = std::wregex( LR"(\n\r?\n$)" ); + std::wregex diff_match_patch::BLANKLINESTART = std::wregex( LR"(^\r?\n\r?\n)" ); + + void diff_match_patch::diff_cleanupEfficiency( TDiffVector &diffs ) + { + bool changes = false; + // Stack of indices where equalities are found. + std::stack< std::size_t > equalities; + // Always equal to equalities[equalitiesLength-1][1] + std::wstring lastEquality; + std::size_t pointer = 0; // Index of current position. + // Is there an insertion operation before the last equality. + bool pre_ins = false; + // Is there a deletion operation before the last equality. + bool pre_del = false; + // Is there an insertion operation after the last equality. + bool post_ins = false; + // Is there a deletion operation after the last equality. + bool post_del = false; + while ( pointer < diffs.size() ) + { + if ( diffs[ pointer ].isEqual() ) + { // Equality found. + if ( diffs[ pointer ].text().length() < Diff_EditCost && ( post_ins || post_del ) ) + { + // Candidate found. + equalities.push( pointer ); + pre_ins = post_ins; + pre_del = post_del; + lastEquality = diffs[ pointer ].text(); + } + else + { + // Not a candidate, and can never become one. + equalities = {}; + lastEquality.clear(); + } + post_ins = post_del = false; + } + else + { // An insertion or deletion. + if ( diffs[ pointer ].isDelete() ) + { + post_del = true; + } + else + { + post_ins = true; + } + /* * Five types to be split: * ABXYCD * AXCD @@ -1100,1101 +1243,1155 @@ void diff_match_patch::diff_cleanupEfficiency(TDiffVector &diffs) { * AXCD * ABXC */ - if ((lastEquality.length() != 0) && - ((pre_ins && pre_del && post_ins && post_del) || - ((lastEquality.length() < Diff_EditCost / 2) && - ((pre_ins ? 1 : 0) + (pre_del ? 1 : 0) + (post_ins ? 1 : 0) + - (post_del ? 1 : 0)) == 3))) { - // Duplicate record. - diffs.emplace(diffs.begin() + equalities.top(), DELETE, lastEquality); - // Change second copy to insert. - diffs[equalities.top() + 1].operation = INSERT; - equalities.pop(); // Throw away the equality we just deleted. - lastEquality.clear(); - if (pre_ins && pre_del) { - // No changes made which could affect previous entry, keep going. - post_ins = post_del = true; - equalities = {}; - } else { - if (!equalities.empty()) { - equalities.pop(); - } - - pointer = !equalities.empty() ? equalities.top() : -1; - post_ins = post_del = false; - } - changes = true; - } - } - pointer++; - } - - if (changes) { - diff_cleanupMerge(diffs); - } -} - -void diff_match_patch::diff_cleanupMerge(TDiffVector &diffs) { - diffs.emplace_back(EQUAL, L""); - int pointer = 0; - int count_delete = 0; - int count_insert = 0; - std::wstring text_delete; - std::wstring text_insert; - - while (pointer < diffs.size()) { - switch (diffs[pointer].operation) { - case INSERT: - count_insert++; - text_insert += diffs[pointer].text; - pointer++; - break; - case DELETE: - count_delete++; - text_delete += diffs[pointer].text; - pointer++; - break; - case EQUAL: - // Upon reaching an equality, check for prior redundancies. - if (count_delete + count_insert > 1) { - if (count_delete != 0 && count_insert != 0) { - // Factor out any common prefixies. - auto commonlength = diff_commonPrefix(text_insert, text_delete); - if (commonlength != 0) { - if ((pointer > (count_delete + count_insert)) && - diffs[pointer - (count_delete + count_insert) - 1] - .operation == EQUAL) { - diffs[pointer - count_delete - count_insert - 1].text += - text_insert.substr(0, commonlength); - } else { - diffs.emplace(diffs.begin(), EQUAL, - text_insert.substr(0, commonlength)); + if ( ( lastEquality.length() != 0 ) && ( ( pre_ins && pre_del && post_ins && post_del ) || ( ( lastEquality.length() < Diff_EditCost / 2 ) && ( ( pre_ins ? 1 : 0 ) + ( pre_del ? 1 : 0 ) + ( post_ins ? 1 : 0 ) + ( post_del ? 1 : 0 ) ) == 3 ) ) ) + { + // Duplicate record. + diffs.emplace( diffs.begin() + equalities.top(), EOperation::eDELETE, lastEquality ); + // Change second copy to insert. + diffs[ equalities.top() + 1 ].fOperation = EOperation::eINSERT; + equalities.pop(); // Throw away the equality we just deleted. + lastEquality.clear(); + if ( pre_ins && pre_del ) + { + // No changes made which could affect previous entry, keep going. + post_ins = post_del = true; + equalities = {}; + } + else + { + if ( !equalities.empty() ) + { + equalities.pop(); + } + + pointer = !equalities.empty() ? equalities.top() : -1; + post_ins = post_del = false; + } + changes = true; + } + } + pointer++; + } + + if ( changes ) + { + diff_cleanupMerge( diffs ); + } + } + + void diff_match_patch::diff_cleanupMerge( TDiffVector &diffs ) + { + diffs.emplace_back( EOperation::eEQUAL, L"" ); + int pointer = 0; + int count_delete = 0; + int count_insert = 0; + std::wstring text_delete; + std::wstring text_insert; + + while ( pointer < diffs.size() ) + { + if ( diffs[ pointer ].isInsert() ) + { + count_insert++; + text_insert += diffs[ pointer ].text(); + pointer++; + } + else if ( diffs[ pointer ].isDelete() ) + { + count_delete++; + text_delete += diffs[ pointer ].text(); pointer++; - } - text_insert = text_insert.substr(commonlength); - text_delete = text_delete.substr(commonlength); } - // Factor out any common suffixies. - commonlength = diff_commonSuffix(text_insert, text_delete); - if (commonlength != 0) { - diffs[pointer].text = - safeMid(text_insert, text_insert.length() - commonlength) + - diffs[pointer].text; - text_insert = - text_insert.substr(0, text_insert.length() - commonlength); - text_delete = - text_delete.substr(0, text_delete.length() - commonlength); + else if ( diffs[ pointer ].isEqual() ) + { + // Upon reaching an equality, check for prior redundancies. + if ( count_delete + count_insert > 1 ) + { + if ( count_delete != 0 && count_insert != 0 ) + { + // Factor out any common prefixies. + auto commonlength = diff_commonPrefix( text_insert, text_delete ); + if ( commonlength != 0 ) + { + if ( ( pointer > ( count_delete + count_insert ) ) && diffs[ pointer - ( count_delete + count_insert ) - 1 ].isEqual() ) + { + diffs[ pointer - count_delete - count_insert - 1 ].fText += text_insert.substr( 0, commonlength ); + } + else + { + diffs.emplace( diffs.begin(), EOperation::eEQUAL, text_insert.substr( 0, commonlength ) ); + pointer++; + } + text_insert = text_insert.substr( commonlength ); + text_delete = text_delete.substr( commonlength ); + } + // Factor out any common suffixies. + commonlength = diff_commonSuffix( text_insert, text_delete ); + if ( commonlength != 0 ) + { + diffs[ pointer ].fText = safeMid( text_insert, text_insert.length() - commonlength ) + diffs[ pointer ].text(); + text_insert = text_insert.substr( 0, text_insert.length() - commonlength ); + text_delete = text_delete.substr( 0, text_delete.length() - commonlength ); + } + } + // Delete the offending records and add the merged ones. + pointer -= count_delete + count_insert; + NUtils::Splice( diffs, pointer, count_delete + count_insert ); + if ( !text_delete.empty() ) + { + NUtils::Splice( diffs, pointer, 0, { Diff( EOperation::eDELETE, text_delete ) } ); + pointer++; + } + if ( !text_insert.empty() ) + { + NUtils::Splice( diffs, pointer, 0, { Diff( EOperation::eINSERT, text_insert ) } ); + pointer++; + } + pointer++; + } + else if ( pointer != 0 && diffs[ pointer - 1 ].isEqual() ) + { + // Merge this equality with the previous one. + diffs[ pointer - 1 ].fText += diffs[ pointer ].text(); + diffs.erase( diffs.begin() + pointer ); + } + else + { + pointer++; + } + count_insert = 0; + count_delete = 0; + text_delete.clear(); + text_insert.clear(); + } + } + if ( diffs.back().text().empty() ) + { + diffs.pop_back(); // Remove the dummy entry at the end. + } + + // Second pass: look for single edits surrounded on both sides by + // equalities which can be shifted sideways to eliminate an equality. + // e.g: ABAC -> ABAC + bool changes = false; + pointer = 1; + // Intentionally ignore the first and last element (don't need checking). + while ( !diffs.empty() && pointer < ( diffs.size() - 1 ) ) + { + if ( diffs[ pointer - 1 ].isEqual() && diffs[ pointer + 1 ].isEqual() ) + { + // This is a single edit surrounded by equalities. + if ( NUtils::endsWith( diffs[ pointer ].text(), diffs[ pointer - 1 ].text() ) ) + { + // Shift the edit over the previous equality. + diffs[ pointer ].fText = diffs[ pointer - 1 ].text() + diffs[ pointer ].text().substr( 0, diffs[ pointer ].text().length() - diffs[ pointer - 1 ].text().length() ); + diffs[ pointer + 1 ].fText = diffs[ pointer - 1 ].text() + diffs[ pointer + 1 ].text(); + NUtils::Splice( diffs, pointer - 1, 1 ); + changes = true; + } + else if ( diffs[ pointer ].text().find( diffs[ pointer + 1 ].text() ) == 0 ) + { + // Shift the edit over the next equality. + diffs[ pointer - 1 ].fText += diffs[ pointer + 1 ].text(); + diffs[ pointer ].fText = diffs[ pointer ].text().substr( diffs[ pointer + 1 ].text().length() ) + diffs[ pointer + 1 ].text(); + NUtils::Splice( diffs, pointer + 1, 1 ); + changes = true; + } } - } - // Delete the offending records and add the merged ones. - pointer -= count_delete + count_insert; - NUtils::Splice(diffs, pointer, count_delete + count_insert); - if (!text_delete.empty()) { - NUtils::Splice(diffs, pointer, 0, {Diff(DELETE, text_delete)}); - pointer++; - } - if (!text_insert.empty()) { - NUtils::Splice(diffs, pointer, 0, {Diff(INSERT, text_insert)}); pointer++; - } - pointer++; - } else if (pointer != 0 && diffs[pointer - 1].operation == EQUAL) { - // Merge this equality with the previous one. - diffs[pointer - 1].text += diffs[pointer].text; - diffs.erase(diffs.begin() + pointer); - } else { - pointer++; - } - count_insert = 0; - count_delete = 0; - text_delete.clear(); - text_insert.clear(); - break; - } - } - if (diffs.back().text.empty()) { - diffs.pop_back(); // Remove the dummy entry at the end. - } - - // Second pass: look for single edits surrounded on both sides by - // equalities which can be shifted sideways to eliminate an equality. - // e.g: ABAC -> ABAC - bool changes = false; - pointer = 1; - // Intentionally ignore the first and last element (don't need checking). - while (!diffs.empty() && pointer < (diffs.size() - 1)) { - if (diffs[pointer - 1].operation == EQUAL && - diffs[pointer + 1].operation == EQUAL) { - // This is a single edit surrounded by equalities. - if (NUtils::endsWith(diffs[pointer].text, diffs[pointer - 1].text)) { - // Shift the edit over the previous equality. - diffs[pointer].text = - diffs[pointer - 1].text + - diffs[pointer].text.substr(0, diffs[pointer].text.length() - - diffs[pointer - 1].text.length()); - diffs[pointer + 1].text = - diffs[pointer - 1].text + diffs[pointer + 1].text; - NUtils::Splice(diffs, pointer - 1, 1); - changes = true; - } else if (diffs[pointer].text.find(diffs[pointer + 1].text) == 0) { - // Shift the edit over the next equality. - diffs[pointer - 1].text += diffs[pointer + 1].text; - diffs[pointer].text = - diffs[pointer].text.substr(diffs[pointer + 1].text.length()) + - diffs[pointer + 1].text; - NUtils::Splice(diffs, pointer + 1, 1); - changes = true; - } - } - pointer++; - } - // If shifts were made, the diff needs reordering and another shift sweep. - if (changes) { - diff_cleanupMerge(diffs); - } -} -std::size_t diff_match_patch::diff_xIndex(const TDiffVector &diffs, - std::size_t loc) { - std::size_t chars1 = 0; - std::size_t chars2 = 0; - std::size_t last_chars1 = 0; - std::size_t last_chars2 = 0; - Diff lastDiff; - for (auto &&aDiff : diffs) { - if (aDiff.operation != INSERT) { - // Equality or deletion. - chars1 += aDiff.text.length(); - } - if (aDiff.operation != DELETE) { - // Equality or insertion. - chars2 += aDiff.text.length(); - } - if (chars1 > loc) { - // Overshot the location. - lastDiff = aDiff; - break; - } - last_chars1 = chars1; - last_chars2 = chars2; - } - if (lastDiff.operation == DELETE) { - // The location was deleted. - return last_chars2; - } - // Add the remaining character length. - return last_chars2 + (loc - last_chars1); -} - -std::wstring diff_match_patch::diff_prettyHtml(const TDiffVector &diffs) { - std::wstring html; - std::wstring text; - for (auto &&aDiff : diffs) { - text = aDiff.text; - NUtils::replace(text, L"&", L"&"); - NUtils::replace(text, L"<", L"<"); - NUtils::replace(text, L">", L">"); - NUtils::replace(text, L"\n", L"¶
"); - switch (aDiff.operation) { - case INSERT: - html += std::wstring(L"") + text + - std::wstring(L""); - break; - case DELETE: - html += std::wstring(L"") + text + - std::wstring(L""); - break; - case EQUAL: - html += std::wstring(L"") + text + std::wstring(L""); - break; - } - } - return html; -} - -std::wstring diff_match_patch::diff_prettyConsole(const TDiffVector &diffs) { - static std::wstring kRed{L"\033[0;31m"}; - static std::wstring kGreen{L"\033[0;32m"}; - static std::wstring kYellow{L"\033[0;33m"}; - static std::wstring kReset{L"\033[m"}; - static std::wstring kEOL{NUtils::fromPercentEncoding(L"%C2%B6") + L"\n"}; - - std::wstring retVal; - std::wstring text; - for (auto &&aDiff : diffs) { - text = aDiff.text; - NUtils::replace(text, L"\n", kEOL); - switch (aDiff.operation) { - case INSERT: - retVal += kGreen + text + kReset; - break; - case DELETE: - retVal += kRed + text + kReset; - break; - case EQUAL: - retVal += text; - break; - } - } - return retVal; -} - -std::wstring diff_match_patch::diff_text1(const TDiffVector &diffs) { - std::wstring text; - for (auto &&aDiff : diffs) { - if (aDiff.operation != INSERT) { - text += aDiff.text; - } - } - return text; -} - -std::wstring diff_match_patch::diff_text2(const TDiffVector &diffs) { - std::wstring text; - for (auto &&aDiff : diffs) { - if (aDiff.operation != DELETE) { - text += aDiff.text; - } - } - return text; -} - -std::size_t diff_match_patch::diff_levenshtein(const TDiffVector &diffs) { - std::size_t levenshtein = 0; - std::size_t insertions = 0; - std::size_t deletions = 0; - for (auto &&aDiff : diffs) { - switch (aDiff.operation) { - case INSERT: - insertions += aDiff.text.length(); - break; - case DELETE: - deletions += aDiff.text.length(); - break; - case EQUAL: - // A deletion and an insertion is one substitution. - levenshtein += std::max(insertions, deletions); - insertions = 0; - deletions = 0; - break; - } - } - levenshtein += std::max(insertions, deletions); - return levenshtein; -} - -std::wstring diff_match_patch::diff_toDelta(const TDiffVector &diffs) { - std::wstring text; - for (auto &&aDiff : diffs) { - switch (aDiff.operation) { - case INSERT: - text += L"+" + - NUtils::toPercentEncoding(aDiff.text, L" !~*'();/?:@&=+$,#") + - L"\t"; - break; - case DELETE: - text += L"-" + std::to_wstring(aDiff.text.length()) + L"\t"; - break; - case EQUAL: - text += L"=" + std::to_wstring(aDiff.text.length()) + L"\t"; - break; - } - } - if (!text.empty()) { - // Strip off trailing tab character. - text = text.substr(0, text.length() - 1); - } - return text; -} - -TDiffVector diff_match_patch::diff_fromDelta(const std::wstring &text1, - const std::wstring &delta) { - TDiffVector diffs; - std::size_t pointer = 0; // Cursor in text1 - auto tokens = NUtils::splitString(delta, L"\t", false); - for (auto &&token : tokens) { - if (token.empty()) { - // Blank tokens are ok (from a trailing \t). - continue; - } - // Each token begins with a one character parameter which specifies the - // operation of this token (delete, insert, equality). - std::wstring param = safeMid(token, 1); - switch (token[0]) { - case '+': - NUtils::replace(param, L"+", L"%2b"); - param = NUtils::fromPercentEncoding(param); - diffs.emplace_back(INSERT, param); - break; - case '-': - // Fall through. - case '=': { - auto n = NUtils::toInt(param); - if (n < 0) { - throw std::wstring(L"Negative number in diff_fromDelta: " + param); } + // If shifts were made, the diff needs reordering and another shift sweep. + if ( changes ) + { + diff_cleanupMerge( diffs ); + } + } + std::size_t diff_match_patch::diff_xIndex( const TDiffVector &diffs, std::size_t loc ) + { + std::size_t chars1 = 0; + std::size_t chars2 = 0; + std::size_t last_chars1 = 0; + std::size_t last_chars2 = 0; + Diff lastDiff; + for ( auto &&aDiff : diffs ) + { + if ( !aDiff.isInsert() ) + { + // Equality or deletion. + chars1 += aDiff.text().length(); + } + if ( !aDiff.isDelete() ) + { + // Equality or insertion. + chars2 += aDiff.text().length(); + } + if ( chars1 > loc ) + { + // Overshot the location. + lastDiff = aDiff; + break; + } + last_chars1 = chars1; + last_chars2 = chars2; + } + if ( lastDiff.isDelete() ) + { + // The location was deleted. + return last_chars2; + } + // Add the remaining character length. + return last_chars2 + ( loc - last_chars1 ); + } + + std::wstring diff_match_patch::diff_prettyHtml( const TDiffVector &diffs ) + { + std::wstring html; + for ( auto &&aDiff : diffs ) + { + html += aDiff.toHtml(); + } + return html; + } + + std::wstring diff_match_patch::diff_prettyConsole( const TDiffVector &diffs ) + { + std::wstring retVal; + for ( auto &&aDiff : diffs ) + { + retVal += aDiff.toConsole(); + } + return retVal; + } + + std::wstring diff_match_patch::diff_text1( const TDiffVector &diffs ) + { std::wstring text; - if ((pointer + n) > text1.length()) { - throw std::wstring(L"Delta length (" + std::to_wstring(pointer + n) + - L") larger than source text length (" + - std::to_wstring(text1.length()) + L")."); - } - - text = safeMid(text1, pointer, n); - pointer += n; - if (token[0] == L'=') { - diffs.emplace_back(EQUAL, text); - } else { - diffs.emplace_back(DELETE, text); - } - break; - } - default: - throw std::wstring(L"Invalid diff operation in diff_fromDelta: " + - token[0]); - } - } - if (pointer != text1.length()) { - throw std::wstring(L"Delta length (") + std::to_wstring(pointer) + - L") smaller than source text length (" + - std::to_wstring(text1.length()) + L")"; - } - return diffs; -} - -TDiffVector diff_match_patch::diff_fromDelta(const std::string &text1, - const std::string &delta) { - return diff_fromDelta(NUtils::to_wstring(text1), NUtils::to_wstring(delta)); -} - -// MATCH FUNCTIONS - -std::size_t diff_match_patch::match_main(const std::wstring &text, - const std::wstring &pattern, - std::size_t loc) { - // Check for null inputs not needed since null can't be passed via - // std::wstring - - loc = std::max(kZERO, std::min(loc, text.length())); - if (text == pattern) { - // Shortcut (potentially not guaranteed by the algorithm) - return 0; - } else if (text.empty()) { - // Nothing to match. - return -1; - } else if (loc + pattern.length() <= text.length() && - safeMid(text, loc, pattern.length()) == pattern) { - // Perfect match at the perfect spot! (Includes case of nullptr pattern) - return loc; - } else { - // Do a fuzzy compare. - return match_bitap(text, pattern, loc); - } -} - -std::size_t diff_match_patch::match_main(const std::string &text, - const std::string &pattern, - std::size_t loc) { - return match_main(NUtils::to_wstring(text), NUtils::to_wstring(pattern), loc); -} - -std::size_t diff_match_patch::match_bitap(const std::wstring &text, - const std::wstring &pattern, - std::size_t loc) { - if (!(Match_MaxBits == 0 || pattern.length() <= Match_MaxBits)) { - throw "Pattern too long for this application."; - } - - // Initialise the alphabet. - auto &&s = match_alphabet(pattern); - - // Highest score beyond which we give up. - double score_threshold = Match_Threshold; - // Is there a nearby exact match? (speedup) - auto best_loc = text.find(pattern, loc); - if (best_loc != std::string::npos) { - score_threshold = - std::min(match_bitapScore(0, best_loc, loc, pattern), score_threshold); - // What about in the other direction? (speedup) - auto start = std::min(loc + pattern.length(), text.length()); - best_loc = text.rfind(pattern, start); - if (best_loc != std::string::npos) { - score_threshold = std::min(match_bitapScore(0, best_loc, loc, pattern), - score_threshold); - } - } - - // Initialise the bit arrays. - auto matchmask = 1 << (pattern.length() - 1); - best_loc = std::string::npos; - - std::size_t bin_min, bin_mid; - auto bin_max = pattern.length() + text.length(); - std::vector rd; - std::vector last_rd; - for (int d = 0; d < pattern.length(); d++) { - // Scan for the best match; each iteration allows for one more error. - // Run a binary search to determine how far from 'loc' we can stray at - // this error level. - bin_min = 0; - bin_mid = bin_max; - while (bin_min < bin_mid) { - if (match_bitapScore(d, loc + bin_mid, loc, pattern) <= score_threshold) { - bin_min = bin_mid; - } else { - bin_max = bin_mid; - } - bin_mid = (bin_max - bin_min) / 2 + bin_min; - } - // Use the result from this iteration as the maximum for the next. - bin_max = bin_mid; - auto start = std::max(kONE, (loc > bin_mid) ? (loc - bin_mid + 1) : kZERO); - auto finish = std::min(loc + bin_mid, text.length()) + pattern.length(); - - rd = std::vector(finish + 2, 0); - rd[finish + 1] = (1 << d) - 1; - for (auto j = finish; (j != -1) && (j >= start); j--) { - int64_t charMatch; - if (text.length() <= j - 1) { - // Out of range. - charMatch = 0; - } else { - auto pos = s.find(text[j - 1]); - if (pos == s.end()) - charMatch = 0; + for ( auto &&aDiff : diffs ) + { + if ( !aDiff.isInsert() ) + { + text += aDiff.text(); + } + } + return text; + } + + std::wstring diff_match_patch::diff_text2( const TDiffVector &diffs ) + { + std::wstring text; + for ( auto &&aDiff : diffs ) + { + if ( !aDiff.isDelete() ) + { + text += aDiff.text(); + } + } + return text; + } + + std::size_t diff_match_patch::diff_levenshtein( const TDiffVector &diffs ) + { + std::size_t levenshtein = 0; + std::size_t insertions = 0; + std::size_t deletions = 0; + for ( auto &&aDiff : diffs ) + { + if ( aDiff.isInsert() ) + insertions += aDiff.text().length(); + else if ( aDiff.isDelete() ) + deletions += aDiff.text().length(); + else if ( aDiff.isEqual() ) + { + // A deletion and an insertion is one substitution. + levenshtein += std::max( insertions, deletions ); + insertions = 0; + deletions = 0; + } + } + levenshtein += std::max( insertions, deletions ); + return levenshtein; + } + + std::wstring diff_match_patch::diff_toDelta( const TDiffVector &diffs ) + { + std::wstring text; + for ( auto &&aDiff : diffs ) + { + text += aDiff.toDelta(); + } + if ( !text.empty() ) + { + // Strip off trailing tab character. + text = text.substr( 0, text.length() - 1 ); + } + return text; + } + + TDiffVector diff_match_patch::diff_fromDelta( const std::wstring &text1, const std::wstring &delta ) + { + TDiffVector diffs; + std::size_t pointer = 0; // Cursor in text1 + auto tokens = NUtils::splitString( delta, L"\t", false ); + for ( auto &&token : tokens ) + { + if ( token.empty() ) + { + // Blank tokens are ok (from a trailing \t). + continue; + } + // Each token begins with a one character parameter which specifies the + // operation of this token (delete, insert, equality). + std::wstring param = safeMid( token, 1 ); + switch ( token[ 0 ] ) + { + case '+': + NUtils::replace( param, L"+", L"%2b" ); + param = NUtils::fromPercentEncoding( param ); + diffs.emplace_back( EOperation::eINSERT, param ); + break; + case '-': + // Fall through. + case '=': + { + auto n = NUtils::toInt( param ); + if ( n < 0 ) + { + throw std::wstring( L"Negative number in diff_fromDelta: " + param ); + } + std::wstring text; + if ( ( pointer + n ) > text1.length() ) + { + throw std::wstring( L"Delta length (" + std::to_wstring( pointer + n ) + L") larger than source text length (" + std::to_wstring( text1.length() ) + L")." ); + } + + text = safeMid( text1, pointer, n ); + pointer += n; + if ( token[ 0 ] == L'=' ) + { + diffs.emplace_back( EOperation::eEQUAL, text ); + } + else + { + diffs.emplace_back( EOperation::eDELETE, text ); + } + break; + } + default: + throw std::wstring( L"Invalid diff operation in diff_fromDelta: " + token[ 0 ] ); + } + } + if ( pointer != text1.length() ) + { + throw std::wstring( L"Delta length (" ) + std::to_wstring( pointer ) + L") smaller than source text length (" + std::to_wstring( text1.length() ) + L")"; + } + return diffs; + } + + TDiffVector diff_match_patch::diff_fromDelta( const std::string &text1, const std::string &delta ) + { + return diff_fromDelta( NUtils::to_wstring( text1 ), NUtils::to_wstring( delta ) ); + } + + // MATCH FUNCTIONS + + std::size_t diff_match_patch::match_main( const std::wstring &text, const std::wstring &pattern, std::size_t loc ) + { + // Check for null inputs not needed since null can't be passed via + // std::wstring + + loc = std::max( kZERO, std::min( loc, text.length() ) ); + if ( text == pattern ) + { + // Shortcut (potentially not guaranteed by the algorithm) + return 0; + } + else if ( text.empty() ) + { + // Nothing to match. + return -1; + } + else if ( loc + pattern.length() <= text.length() && safeMid( text, loc, pattern.length() ) == pattern ) + { + // Perfect match at the perfect spot! (Includes case of nullptr pattern) + return loc; + } else - charMatch = (*pos).second; - } - if (d == 0) { - // First pass: exact match. - rd[j] = ((rd[j + 1] << 1) | 1) & charMatch; - } else { - // Subsequent passes: fuzzy match. - rd[j] = ((rd[j + 1] << 1) | 1) & charMatch | - (((last_rd[j + 1] | last_rd[j]) << 1) | 1) | last_rd[j + 1]; - } - if ((rd[j] & matchmask) != 0) { - double score = match_bitapScore(d, j - 1, loc, pattern); - // This match will almost certainly be better than any existing - // match. But check anyway. - if (score <= score_threshold) { - // Told you so. - score_threshold = score; - best_loc = j - 1; - if (best_loc > loc) { - // When passing loc, don't exceed our current distance from loc. - start = - std::max(kONE, (2 * loc > best_loc) ? 2 * loc - best_loc : 1); - } else { - // Already passed loc, downhill from here on in. - break; - } - } - } - } - if (match_bitapScore(d + 1, loc, loc, pattern) > score_threshold) { - // No hope for a (better) match at greater error levels. - break; - } - last_rd = std::move(rd); - } - return best_loc; -} - -std::size_t diff_match_patch::match_bitap(const std::string &text, - const std::string &pattern, - std::size_t loc) { - return match_bitap(NUtils::to_wstring(text), NUtils::to_wstring(pattern), - loc); -} - -double diff_match_patch::match_bitapScore(int64_t e, int64_t x, int64_t loc, - const std::wstring &pattern) { - const float accuracy = static_cast(e) / pattern.length(); - const auto proximity = std::abs(loc - x); - if (Match_Distance == 0) { - // Dodge divide by zero error. - return proximity == 0 ? accuracy : 1.0; - } - return accuracy + (proximity / static_cast(Match_Distance)); -} - -diff_match_patch::TCharPosMap diff_match_patch::match_alphabet( - const std::wstring &pattern) { - TCharPosMap s; - std::size_t i; - for (i = 0; i < pattern.length(); i++) { - auto c = pattern[i]; - s[c] = 0; - } - for (i = 0; i < pattern.length(); i++) { - auto c = pattern[i]; - auto pos = s.find(c); - std::size_t prev = 0; - if (pos != s.end()) prev = (*pos).second; - s[c] = prev | (1ULL << (pattern.length() - i - 1)); - } - return s; -} - -diff_match_patch::TCharPosMap diff_match_patch::match_alphabet( - const std::string &pattern) { - return match_alphabet(NUtils::to_wstring(pattern)); -} - -// PATCH FUNCTIONS - -void diff_match_patch::patch_addContext(Patch &patch, - const std::wstring &text) { - if (text.empty()) { - return; - } - std::wstring pattern = safeMid(text, patch.start2, patch.length1); - std::size_t padding = 0; - - // Look for the first and last matches of pattern in text. If two different - // matches are found, increase the pattern length. - while ((text.find(pattern) != text.rfind(pattern)) && - (pattern.length() < (Match_MaxBits - Patch_Margin - Patch_Margin))) { - padding += Patch_Margin; - pattern = safeMid( - text, - std::max(kZERO, - ((patch.start2 > padding) ? patch.start2 - padding : 0UL)), - std::min(text.length(), patch.start2 + patch.length1 + padding) - - std::max(kZERO, - (patch.start2 > padding) ? patch.start2 - padding : 0)); - } - // Add one chunk for good luck. - padding += Patch_Margin; - - // Add the prefix. - std::wstring prefix = safeMid( - text, - std::max(kZERO, - ((patch.start2 > padding) ? patch.start2 - padding : 0UL)), - patch.start2 - - std::max(kZERO, - ((patch.start2 > padding) ? patch.start2 - padding : 0UL))); - if (!prefix.empty()) { - patch.diffs.emplace(patch.diffs.begin(), EQUAL, prefix); - } - // Add the suffix. - std::wstring suffix = - safeMid(text, patch.start2 + patch.length1, - std::min(text.length(), patch.start2 + patch.length1 + padding) - - (patch.start2 + patch.length1)); - if (!suffix.empty()) { - patch.diffs.emplace_back(EQUAL, suffix); - } - - // Roll back the start points. - patch.start1 -= prefix.length(); - patch.start2 -= prefix.length(); - // Extend the lengths. - patch.length1 += prefix.length() + suffix.length(); - patch.length2 += prefix.length() + suffix.length(); -} - -void diff_match_patch::patch_addContext(Patch &patch, const std::string &text) { - return patch_addContext(patch, NUtils::to_wstring(text)); -} - -TPatchVector diff_match_patch::patch_make(const std::wstring &text1, - const std::wstring &text2) { - // Check for null inputs not needed since null can't be passed via - // std::wstring - - // No diffs provided, compute our own. - TDiffVector diffs = diff_main(text1, text2, true); - if (diffs.size() > 2) { - diff_cleanupSemantic(diffs); - diff_cleanupEfficiency(diffs); - } - - return patch_make(text1, diffs); -} - -TPatchVector diff_match_patch::patch_make(const TDiffVector &diffs) { - // No origin string provided, compute our own. - const std::wstring text1 = diff_text1(diffs); - return patch_make(text1, diffs); -} - -TPatchVector diff_match_patch::patch_make(const std::wstring &text1, - const std::wstring & /*text2*/, - const TDiffVector &diffs) { - // text2 is entirely unused. - return patch_make(text1, diffs); -} - -TPatchVector diff_match_patch::patch_make(const std::wstring &text1, - const TDiffVector &diffs) { - // Check for null inputs not needed since null can't be passed via - // std::wstring - - TPatchVector patches; - if (diffs.empty()) { - return patches; // Get rid of the nullptr case. - } - Patch patch; - std::size_t char_count1 = 0; // Number of characters into the text1 string. - std::size_t char_count2 = 0; // Number of characters into the text2 string. - // Start with text1 (prepatch_text) and apply the diffs until we arrive at - // text2 (postpatch_text). We recreate the patches one by one to determine - // context info. - std::wstring prepatch_text = text1; - std::wstring postpatch_text = text1; - for (auto &&aDiff : diffs) { - if (patch.diffs.empty() && aDiff.operation != EQUAL) { - // A new patch starts here. - patch.start1 = char_count1; - patch.start2 = char_count2; - } - - switch (aDiff.operation) { - case INSERT: - patch.diffs.emplace_back(aDiff); - patch.length2 += aDiff.text.length(); - postpatch_text = postpatch_text.substr(0, char_count2) + aDiff.text + - safeMid(postpatch_text, char_count2); - break; - case DELETE: - patch.length1 += aDiff.text.length(); - patch.diffs.emplace_back(aDiff); - postpatch_text = - postpatch_text.substr(0, char_count2) + - safeMid(postpatch_text, char_count2 + aDiff.text.length()); - break; - case EQUAL: - if (aDiff.text.length() <= 2 * Patch_Margin && !patch.diffs.empty() && - !(aDiff == diffs.back())) { - // Small equality inside a patch. - patch.diffs.emplace_back(aDiff); - patch.length1 += aDiff.text.length(); - patch.length2 += aDiff.text.length(); - } - - if (aDiff.text.length() >= 2 * Patch_Margin) { - // Time for a new patch. - if (!patch.diffs.empty()) { - patch_addContext(patch, prepatch_text); - patches.emplace_back(patch); - patch = Patch(); - // Unlike Unidiff, our patch lists have a rolling context. - // http://code.google.com/p/google-diff-match-patch/wiki/Unidiff - // Update prepatch text & pos to reflect the application of the - // just completed patch. - prepatch_text = postpatch_text; - char_count1 = char_count2; - } - } - break; - } - - // Update the current character count. - if (aDiff.operation != INSERT) { - char_count1 += aDiff.text.length(); - } - if (aDiff.operation != DELETE) { - char_count2 += aDiff.text.length(); - } - } - // Pick up the leftover patch if not empty. - if (!patch.diffs.empty()) { - patch_addContext(patch, prepatch_text); - patches.emplace_back(patch); - } - - return patches; -} - -TPatchVector diff_match_patch::patch_make(const std::string &text1, - const TDiffVector &diffs) { - return patch_make(NUtils::to_wstring(text1), diffs); -} - -TPatchVector diff_match_patch::patch_make(const std::string &text1, - const std::string &text2, - const TDiffVector &diffs) { - return patch_make(NUtils::to_wstring(text1), NUtils::to_wstring(text2), - diffs); -} - -TPatchVector diff_match_patch::patch_make(const std::string &text1, - const std::string &text2) { - return patch_make(NUtils::to_wstring(text1), NUtils::to_wstring(text2)); -} - -TPatchVector diff_match_patch::patch_deepCopy(const TPatchVector &patches) { - TPatchVector patchesCopy; - for (auto &&aPatch : patches) { - Patch patchCopy = Patch(); - for (auto &&aDiff : aPatch.diffs) { - patchCopy.diffs.emplace_back(aDiff.operation, aDiff.text); - } - patchCopy.start1 = aPatch.start1; - patchCopy.start2 = aPatch.start2; - patchCopy.length1 = aPatch.length1; - patchCopy.length2 = aPatch.length2; - patchesCopy.emplace_back(patchCopy); - } - return patchesCopy; -} - -std::pair > diff_match_patch::patch_apply( - TPatchVector patches, std::wstring text) { - if (patches.empty()) { - return {text, std::vector(0)}; - } - - // Deep copy the patches so that no changes are made to originals. - patches = patch_deepCopy(patches); - - std::wstring nullPadding = patch_addPadding(patches); - text = nullPadding + text + nullPadding; - patch_splitMax(patches); - - std::size_t x = 0; - // delta keeps track of the offset between the expected and actual location - // of the previous patch. If there are patches expected at positions 10 and - // 20, but the first patch was found at 12, delta is 2 and the second patch - // has an effective expected position of 22. - uint64_t delta = 0; - std::vector results(patches.size()); - for (auto &&aPatch : patches) { - auto expected_loc = aPatch.start2 + delta; - std::wstring text1 = diff_text1(aPatch.diffs); - std::size_t start_loc; - std::size_t end_loc = std::string::npos; - if (text1.length() > Match_MaxBits) { - // patch_splitMax will only provide an oversized pattern in the case of - // a monster delete. - start_loc = - match_main(text, text1.substr(0, Match_MaxBits), expected_loc); - if (start_loc != -1) { - end_loc = match_main(text, text1.substr(text1.length() - Match_MaxBits), - expected_loc + text1.length() - Match_MaxBits); - if (end_loc == -1 || start_loc >= end_loc) { - // Can't find valid trailing context. Drop this patch. - start_loc = -1; - } - } - } else { - start_loc = match_main(text, text1, expected_loc); - } - if (start_loc == -1) { - // No match found. :( - results[x] = false; - // Subtract the delta for this failed patch from subsequent patches. - delta -= aPatch.length2 - aPatch.length1; - } else { - // Found a match. :) - results[x] = true; - delta = start_loc - expected_loc; - std::wstring text2; - if (end_loc == -1) { - text2 = safeMid(text, start_loc, text1.length()); - } else { - text2 = safeMid(text, start_loc, end_loc + Match_MaxBits - start_loc); - } - if (text1 == text2) { - // Perfect match, just shove the replacement text in. - text = text.substr(0, start_loc) + diff_text2(aPatch.diffs) + - safeMid(text, start_loc + text1.length()); - } else { - // Imperfect match. Run a diff to get a framework of equivalent - // indices. - TDiffVector diffs = diff_main(text1, text2, false); - if (text1.length() > Match_MaxBits && - diff_levenshtein(diffs) / static_cast(text1.length()) > - Patch_DeleteThreshold) { - // The end points match, but the content is unacceptably bad. - results[x] = false; - } else { - diff_cleanupSemanticLossless(diffs); - std::size_t index1 = 0; - for (auto &&aDiff : aPatch.diffs) { - if (aDiff.operation != EQUAL) { - auto index2 = diff_xIndex(diffs, index1); - if (aDiff.operation == INSERT) { - // Insertion - text = text.substr(0, start_loc + index2) + aDiff.text + - safeMid(text, start_loc + index2); - } else if (aDiff.operation == DELETE) { - // Deletion - text = - text.substr(0, start_loc + index2) + - safeMid(text, start_loc + - diff_xIndex( - diffs, index1 + aDiff.text.length())); - } + { + // Do a fuzzy compare. + return match_bitap( text, pattern, loc ); + } + } + + std::size_t diff_match_patch::match_main( const std::string &text, const std::string &pattern, std::size_t loc ) + { + return match_main( NUtils::to_wstring( text ), NUtils::to_wstring( pattern ), loc ); + } + + std::size_t diff_match_patch::match_bitap( const std::wstring &text, const std::wstring &pattern, std::size_t loc ) + { + if ( !( Match_MaxBits == 0 || pattern.length() <= Match_MaxBits ) ) + { + throw "Pattern too long for this application."; + } + + // Initialise the alphabet. + auto &&s = match_alphabet( pattern ); + + // Highest score beyond which we give up. + double score_threshold = Match_Threshold; + // Is there a nearby exact match? (speedup) + auto best_loc = text.find( pattern, loc ); + if ( best_loc != std::string::npos ) + { + score_threshold = std::min( match_bitapScore( 0, best_loc, loc, pattern ), score_threshold ); + // What about in the other direction? (speedup) + auto start = std::min( loc + pattern.length(), text.length() ); + best_loc = text.rfind( pattern, start ); + if ( best_loc != std::string::npos ) + { + score_threshold = std::min( match_bitapScore( 0, best_loc, loc, pattern ), score_threshold ); + } + } + + // Initialise the bit arrays. + auto matchmask = 1 << ( pattern.length() - 1 ); + best_loc = std::string::npos; + + std::size_t bin_min, bin_mid; + auto bin_max = pattern.length() + text.length(); + std::vector< int64_t > rd; + std::vector< int64_t > last_rd; + for ( int d = 0; d < pattern.length(); d++ ) + { + // Scan for the best match; each iteration allows for one more error. + // Run a binary search to determine how far from 'loc' we can stray at + // this error level. + bin_min = 0; + bin_mid = bin_max; + while ( bin_min < bin_mid ) + { + if ( match_bitapScore( d, loc + bin_mid, loc, pattern ) <= score_threshold ) + { + bin_min = bin_mid; + } + else + { + bin_max = bin_mid; + } + bin_mid = ( bin_max - bin_min ) / 2 + bin_min; } - if (aDiff.operation != DELETE) { - index1 += aDiff.text.length(); + // Use the result from this iteration as the maximum for the next. + bin_max = bin_mid; + auto start = std::max( kONE, ( loc > bin_mid ) ? ( loc - bin_mid + 1 ) : kZERO ); + auto finish = std::min( loc + bin_mid, text.length() ) + pattern.length(); + + rd = std::vector< int64_t >( finish + 2, 0 ); + rd[ finish + 1 ] = ( 1 << d ) - 1; + for ( auto j = finish; ( j != -1 ) && ( j >= start ); j-- ) + { + int64_t charMatch; + if ( text.length() <= j - 1 ) + { + // Out of range. + charMatch = 0; + } + else + { + auto pos = s.find( text[ j - 1 ] ); + if ( pos == s.end() ) + charMatch = 0; + else + charMatch = ( *pos ).second; + } + if ( d == 0 ) + { + // First pass: exact match. + rd[ j ] = ( ( rd[ j + 1 ] << 1 ) | 1 ) & charMatch; + } + else + { + // Subsequent passes: fuzzy match. + rd[ j ] = ( ( rd[ j + 1 ] << 1 ) | 1 ) & charMatch | ( ( ( last_rd[ j + 1 ] | last_rd[ j ] ) << 1 ) | 1 ) | last_rd[ j + 1 ]; + } + if ( ( rd[ j ] & matchmask ) != 0 ) + { + double score = match_bitapScore( d, j - 1, loc, pattern ); + // This match will almost certainly be better than any existing + // match. But check anyway. + if ( score <= score_threshold ) + { + // Told you so. + score_threshold = score; + best_loc = j - 1; + if ( best_loc > loc ) + { + // When passing loc, don't exceed our current distance from loc. + start = std::max( kONE, ( 2 * loc > best_loc ) ? 2 * loc - best_loc : 1 ); + } + else + { + // Already passed loc, downhill from here on in. + break; + } + } + } } - } - } - } - } - x++; - } - // Strip the padding off. - text = safeMid(text, nullPadding.length(), - text.length() - 2 * nullPadding.length()); - return {text, results}; -} - -std::pair > diff_match_patch::patch_apply( - TPatchVector patches, std::string text) { - return patch_apply(patches, NUtils::to_wstring(text)); -} - -std::wstring diff_match_patch::patch_addPadding(TPatchVector &patches) { - auto paddingLength = Patch_Margin; - std::wstring nullPadding; - for (char x = 1; x <= paddingLength; x++) { - nullPadding += NUtils::to_wstring(x); - } - - // Bump all the patches forward. - for (auto &&aPatch : patches) { - aPatch.start1 += paddingLength; - aPatch.start2 += paddingLength; - } - - // Add some padding on start of first diff. - // auto && patch = patches.front(); - // TDiffVector & diffs = patch.diffs; - if (patches.front().diffs.empty() || - patches.front().diffs.front().operation != EQUAL) { - // Add nullPadding equality. - patches.front().diffs.emplace(patches.front().diffs.begin(), EQUAL, - nullPadding); - patches.front().start1 -= paddingLength; // Should be 0. - patches.front().start2 -= paddingLength; // Should be 0. - patches.front().length1 += paddingLength; - patches.front().length2 += paddingLength; - } else if (paddingLength > patches.front().diffs.front().text.length()) { - // Grow first equality. - auto &&firstDiff = patches.front().diffs.front(); - auto extraLength = paddingLength - firstDiff.text.length(); - firstDiff.text = - nullPadding.substr(firstDiff.text.length()) + firstDiff.text; - patches.front().start1 -= extraLength; - patches.front().start2 -= extraLength; - patches.front().length1 += extraLength; - patches.front().length2 += extraLength; - } - - // Add some padding on end of last diff. - // patch = patches.back(); - // diffs = patch.diffs; - if ((patches.back().diffs.size() == 0) || - patches.back().diffs.back().operation != EQUAL) { - // Add nullPadding equality. - patches.back().diffs.emplace_back(EQUAL, nullPadding); - patches.back().length1 += paddingLength; - patches.back().length2 += paddingLength; - } else if (paddingLength > patches.back().diffs.back().text.length()) { - // Grow last equality. - // Diff &lastDiff = patches.back().diffs.back(); - auto extraLength = - paddingLength - patches.back().diffs.back().text.length(); - patches.back().diffs.back().text += nullPadding.substr(0, extraLength); - patches.back().length1 += extraLength; - patches.back().length2 += extraLength; - } - - return nullPadding; -} - -void diff_match_patch::patch_splitMax(TPatchVector &patches) { - auto patch_size = Match_MaxBits; - for (int x = 0; x < patches.size(); x++) { - if (patches[x].length1 <= patch_size) { - continue; - } - Patch bigpatch = patches[x]; - // Remove the big old patch. - NUtils::Splice(patches, x--, 1); - auto start1 = bigpatch.start1; - auto start2 = bigpatch.start2; - std::wstring precontext; - while (!bigpatch.diffs.empty()) { - // Create one of several smaller patches. - Patch patch; - bool empty = true; - patch.start1 = start1 - precontext.length(); - patch.start2 = start2 - precontext.length(); - if (precontext.length() != 0) { - patch.length1 = patch.length2 = precontext.length(); - patch.diffs.emplace_back(EQUAL, precontext); - } - while (!bigpatch.diffs.empty() && - (patch.length1 < (patch_size - Patch_Margin))) { - auto diff_type = bigpatch.diffs[0].operation; - auto diff_text = bigpatch.diffs[0].text; - if (diff_type == INSERT) { - // Insertions are harmless. - patch.length2 += diff_text.length(); - start2 += diff_text.length(); - patch.diffs.push_back(bigpatch.diffs.front()); - bigpatch.diffs.erase(bigpatch.diffs.begin()); - empty = false; - } else if ((diff_type == DELETE) && (patch.diffs.size() == 1) && - (patch.diffs.front().operation == EQUAL) && - (diff_text.length() > 2 * patch_size)) { - // This is a large deletion. Let it pass in one chunk. - patch.length1 += diff_text.length(); - start1 += diff_text.length(); - empty = false; - patch.diffs.emplace_back(diff_type, diff_text); - bigpatch.diffs.erase(bigpatch.diffs.begin()); - } else { - // Deletion or equality. Only take as much as we can stomach. - diff_text = diff_text.substr( - 0, std::min(diff_text.length(), - (patch_size > (patch.length1 + Patch_Margin)) - ? (patch_size - patch.length1 - Patch_Margin) - : (-1 * 1UL))); - patch.length1 += diff_text.length(); - start1 += diff_text.length(); - if (diff_type == EQUAL) { - patch.length2 += diff_text.length(); - start2 += diff_text.length(); - } else { - empty = false; - } - patch.diffs.emplace_back(diff_type, diff_text); - if (diff_text == bigpatch.diffs[0].text) { - bigpatch.diffs.erase(bigpatch.diffs.begin()); - } else { - bigpatch.diffs[0].text = - bigpatch.diffs[0].text.substr(diff_text.length()); - } - } - } - // Compute the head context for the next patch. - precontext = diff_text2(patch.diffs); - precontext = precontext.substr( - std::max(kZERO, (precontext.length() > Patch_Margin) - ? (precontext.length() - Patch_Margin) - : 0)); - - std::wstring postcontext; - // Append the end context for this patch. - if (diff_text1(bigpatch.diffs).length() > Patch_Margin) { - postcontext = diff_text1(bigpatch.diffs).substr(0, Patch_Margin); - } else { - postcontext = diff_text1(bigpatch.diffs); - } - - if (postcontext.length() != 0) { - patch.length1 += postcontext.length(); - patch.length2 += postcontext.length(); - if ((patch.diffs.size() != 0) && - (patch.diffs[patch.diffs.size() - 1].operation == EQUAL)) { - patch.diffs[patch.diffs.size() - 1].text += postcontext; - } else { - patch.diffs.emplace_back(EQUAL, postcontext); - } - } - if (!empty) { - NUtils::Splice(patches, ++x, 0ULL, patch); - } - } - } -} - -std::wstring diff_match_patch::patch_toText(const TPatchVector &patches) { - std::wstring text; - for (auto &&aPatch : patches) { - text += aPatch.toString(); - } - return text; -} - -TPatchVector diff_match_patch::patch_fromText(const std::wstring &textline) { - TPatchVector patches; - if (textline.empty()) { - return patches; - } - auto text = NUtils::splitString(textline, L"\n", true); - int textPointer = 0; - std::wstring line; - while (textPointer < text.size()) { - patches.push_back(text[textPointer]); - auto &patch = patches.back(); - textPointer++; - - while (textPointer < text.size()) { - if (text[textPointer].empty()) { - ++textPointer; - continue; - } - - auto sign = text[textPointer][0]; - - line = text[textPointer].substr(1); - NUtils::replace(line, L"+", L"%2b"); - line = NUtils::fromPercentEncoding(line); - if (sign == '-') { - // Deletion. - patch.diffs.emplace_back(DELETE, line); - } else if (sign == '+') { - // Insertion. - patch.diffs.emplace_back(INSERT, line); - } else if (sign == ' ') { - // Minor equality. - patch.diffs.emplace_back(EQUAL, line); - } else if (sign == '@') { - // Start of next patch. - break; - } else { - // WTF? - throw std::wstring(std::wstring(L"Invalid patch mode '") + sign + - std::wstring(L" in: ") + line); - return {}; - } - textPointer++; - } - } - return patches; -} - -TPatchVector diff_match_patch::patch_fromText(const std::string &textline) { - return patch_fromText(NUtils::to_wstring(textline)); -} - -std::wstring diff_match_patch::safeMid(const std::wstring &str, - std::size_t pos) { - return safeMid(str, pos, std::string::npos); -} - -std::wstring diff_match_patch::safeMid(const std::wstring &str, std::size_t pos, - std::size_t len) { - return (pos == str.length()) ? std::wstring() : str.substr(pos, len); -} - -std::wstring NUtils::to_wstring(const diff_match_patch::TVariant &variant, - bool doubleQuoteEmpty) { - std::wstring retVal; - if (std::holds_alternative(variant)) - retVal = std::get(variant); - - if (doubleQuoteEmpty && retVal.empty()) return LR"("")"; - - return retVal; -} - -std::wstring NUtils::to_wstring(const Patch &patch, bool doubleQuoteEmpty) { - auto retVal = patch.toString(); - if (doubleQuoteEmpty && retVal.empty()) return LR"("")"; - return retVal; -} - -std::wstring NUtils::to_wstring(const Diff &diff, bool doubleQuoteEmpty) { - auto retVal = diff.toString(); - if (doubleQuoteEmpty && retVal.empty()) return LR"("")"; - return retVal; -} + if ( match_bitapScore( d + 1, loc, loc, pattern ) > score_threshold ) + { + // No hope for a (better) match at greater error levels. + break; + } + last_rd = std::move( rd ); + } + return best_loc; + } + + std::size_t diff_match_patch::match_bitap( const std::string &text, const std::string &pattern, std::size_t loc ) + { + return match_bitap( NUtils::to_wstring( text ), NUtils::to_wstring( pattern ), loc ); + } + + double diff_match_patch::match_bitapScore( int64_t e, int64_t x, int64_t loc, const std::wstring &pattern ) + { + const float accuracy = static_cast< float >( e ) / pattern.length(); + const auto proximity = std::abs( loc - x ); + if ( Match_Distance == 0 ) + { + // Dodge divide by zero error. + return proximity == 0 ? accuracy : 1.0; + } + return accuracy + ( proximity / static_cast< float >( Match_Distance ) ); + } + + diff_match_patch::TCharPosMap diff_match_patch::match_alphabet( const std::wstring &pattern ) + { + TCharPosMap s; + std::size_t i; + for ( i = 0; i < pattern.length(); i++ ) + { + auto c = pattern[ i ]; + s[ c ] = 0; + } + for ( i = 0; i < pattern.length(); i++ ) + { + auto c = pattern[ i ]; + auto pos = s.find( c ); + std::size_t prev = 0; + if ( pos != s.end() ) + prev = ( *pos ).second; + s[ c ] = prev | ( 1ULL << ( pattern.length() - i - 1 ) ); + } + return s; + } + + diff_match_patch::TCharPosMap diff_match_patch::match_alphabet( const std::string &pattern ) + { + return match_alphabet( NUtils::to_wstring( pattern ) ); + } + + // PATCH FUNCTIONS + + void diff_match_patch::patch_addContext( Patch &patch, const std::wstring &text ) + { + if ( text.empty() ) + { + return; + } + std::wstring pattern = safeMid( text, patch.start2, patch.length1 ); + std::size_t padding = 0; + + // Look for the first and last matches of pattern in text. If two different + // matches are found, increase the pattern length. + while ( ( text.find( pattern ) != text.rfind( pattern ) ) && ( pattern.length() < ( Match_MaxBits - Patch_Margin - Patch_Margin ) ) ) + { + padding += Patch_Margin; + pattern = safeMid( text, std::max( kZERO, ( ( patch.start2 > padding ) ? patch.start2 - padding : 0UL ) ), std::min( text.length(), patch.start2 + patch.length1 + padding ) - std::max( kZERO, ( patch.start2 > padding ) ? patch.start2 - padding : 0 ) ); + } + // Add one chunk for good luck. + padding += Patch_Margin; + + // Add the prefix. + std::wstring prefix = safeMid( text, std::max( kZERO, ( ( patch.start2 > padding ) ? patch.start2 - padding : 0UL ) ), patch.start2 - std::max( kZERO, ( ( patch.start2 > padding ) ? patch.start2 - padding : 0UL ) ) ); + if ( !prefix.empty() ) + { + patch.diffs.emplace( patch.diffs.begin(), EOperation::eEQUAL, prefix ); + } + // Add the suffix. + std::wstring suffix = safeMid( text, patch.start2 + patch.length1, std::min( text.length(), patch.start2 + patch.length1 + padding ) - ( patch.start2 + patch.length1 ) ); + if ( !suffix.empty() ) + { + patch.diffs.emplace_back( EOperation::eEQUAL, suffix ); + } + + // Roll back the start points. + patch.start1 -= prefix.length(); + patch.start2 -= prefix.length(); + // Extend the lengths. + patch.length1 += prefix.length() + suffix.length(); + patch.length2 += prefix.length() + suffix.length(); + } + + void diff_match_patch::patch_addContext( Patch &patch, const std::string &text ) + { + return patch_addContext( patch, NUtils::to_wstring( text ) ); + } + + TPatchVector diff_match_patch::patch_make( const std::wstring &text1, const std::wstring &text2 ) + { + // Check for null inputs not needed since null can't be passed via + // std::wstring + + // No diffs provided, compute our own. + TDiffVector diffs = diff_main( text1, text2, true ); + if ( diffs.size() > 2 ) + { + diff_cleanupSemantic( diffs ); + diff_cleanupEfficiency( diffs ); + } + + return patch_make( text1, diffs ); + } + + TPatchVector diff_match_patch::patch_make( const TDiffVector &diffs ) + { + // No origin string provided, compute our own. + const std::wstring text1 = diff_text1( diffs ); + return patch_make( text1, diffs ); + } + + TPatchVector diff_match_patch::patch_make( const std::wstring &text1, const std::wstring & /*text2*/, const TDiffVector &diffs ) + { + // text2 is entirely unused. + return patch_make( text1, diffs ); + } + + TPatchVector diff_match_patch::patch_make( const std::wstring &text1, const TDiffVector &diffs ) + { + // Check for null inputs not needed since null can't be passed via + // std::wstring + + TPatchVector patches; + if ( diffs.empty() ) + { + return patches; // Get rid of the nullptr case. + } + Patch patch; + std::size_t char_count1 = 0; // Number of characters into the text1 string. + std::size_t char_count2 = 0; // Number of characters into the text2 string. + // Start with text1 (prepatch_text) and apply the diffs until we arrive at + // text2 (postpatch_text). We recreate the patches one by one to determine + // context info. + std::wstring prepatch_text = text1; + std::wstring postpatch_text = text1; + for ( auto &&aDiff : diffs ) + { + if ( patch.diffs.empty() && !aDiff.isEqual() ) + { + // A new patch starts here. + patch.start1 = char_count1; + patch.start2 = char_count2; + } + + if ( aDiff.isInsert() ) + { + patch.diffs.push_back( aDiff ); + patch.length2 += aDiff.text().length(); + postpatch_text = postpatch_text.substr( 0, char_count2 ) + aDiff.text() + safeMid( postpatch_text, char_count2 ); + } + else if ( aDiff.isDelete() ) + { + patch.length1 += aDiff.text().length(); + patch.diffs.push_back( aDiff ); + postpatch_text = postpatch_text.substr( 0, char_count2 ) + safeMid( postpatch_text, char_count2 + aDiff.text().length() ); + } + else if ( aDiff.isEqual() ) + { + if ( aDiff.text().length() <= 2 * Patch_Margin && !patch.diffs.empty() && !( aDiff == diffs.back() ) ) + { + // Small equality inside a patch. + patch.diffs.push_back( aDiff ); + patch.length1 += aDiff.text().length(); + patch.length2 += aDiff.text().length(); + } + + if ( aDiff.text().length() >= 2 * Patch_Margin ) + { + // Time for a new patch. + if ( !patch.diffs.empty() ) + { + patch_addContext( patch, prepatch_text ); + patches.emplace_back( patch ); + patch = Patch(); + // Unlike Unidiff, our patch lists have a rolling context. + // http://code.google.com/p/google-diff-match-patch/wiki/Unidiff + // Update prepatch text & pos to reflect the application of the + // just completed patch. + prepatch_text = postpatch_text; + char_count1 = char_count2; + } + } + } + + // Update the current character count. + if ( !aDiff.isInsert() ) + { + char_count1 += aDiff.text().length(); + } + if ( !aDiff.isDelete() ) + { + char_count2 += aDiff.text().length(); + } + } + // Pick up the leftover patch if not empty. + if ( !patch.diffs.empty() ) + { + patch_addContext( patch, prepatch_text ); + patches.emplace_back( patch ); + } + + return patches; + } + + TPatchVector diff_match_patch::patch_make( const std::string &text1, const TDiffVector &diffs ) + { + return patch_make( NUtils::to_wstring( text1 ), diffs ); + } + + TPatchVector diff_match_patch::patch_make( const std::string &text1, const std::string &text2, const TDiffVector &diffs ) + { + return patch_make( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ), diffs ); + } + + TPatchVector diff_match_patch::patch_make( const std::string &text1, const std::string &text2 ) + { + return patch_make( NUtils::to_wstring( text1 ), NUtils::to_wstring( text2 ) ); + } + + TPatchVector diff_match_patch::patch_deepCopy( const TPatchVector &patches ) + { + TPatchVector patchesCopy; + for ( auto &&aPatch : patches ) + { + Patch patchCopy = Patch(); + for ( auto &&aDiff : aPatch.diffs ) + { + patchCopy.diffs.push_back( aDiff ); + } + patchCopy.start1 = aPatch.start1; + patchCopy.start2 = aPatch.start2; + patchCopy.length1 = aPatch.length1; + patchCopy.length2 = aPatch.length2; + patchesCopy.emplace_back( patchCopy ); + } + return patchesCopy; + } + + std::pair< std::wstring, std::vector< bool > > diff_match_patch::patch_apply( TPatchVector patches, std::wstring text ) + { + if ( patches.empty() ) + { + return { text, std::vector< bool >( 0 ) }; + } + + // Deep copy the patches so that no changes are made to originals. + patches = patch_deepCopy( patches ); + + std::wstring nullPadding = patch_addPadding( patches ); + text = nullPadding + text + nullPadding; + patch_splitMax( patches ); + + std::size_t x = 0; + // delta keeps track of the offset between the expected and actual location + // of the previous patch. If there are patches expected at positions 10 and + // 20, but the first patch was found at 12, delta is 2 and the second patch + // has an effective expected position of 22. + uint64_t delta = 0; + std::vector< bool > results( patches.size() ); + for ( auto &&aPatch : patches ) + { + auto expected_loc = aPatch.start2 + delta; + std::wstring text1 = diff_text1( aPatch.diffs ); + std::size_t start_loc; + std::size_t end_loc = std::string::npos; + if ( text1.length() > Match_MaxBits ) + { + // patch_splitMax will only provide an oversized pattern in the case of + // a monster delete. + start_loc = match_main( text, text1.substr( 0, Match_MaxBits ), expected_loc ); + if ( start_loc != -1 ) + { + end_loc = match_main( text, text1.substr( text1.length() - Match_MaxBits ), expected_loc + text1.length() - Match_MaxBits ); + if ( end_loc == -1 || start_loc >= end_loc ) + { + // Can't find valid trailing context. Drop this patch. + start_loc = -1; + } + } + } + else + { + start_loc = match_main( text, text1, expected_loc ); + } + if ( start_loc == -1 ) + { + // No match found. :( + results[ x ] = false; + // Subtract the delta for this failed patch from subsequent patches. + delta -= aPatch.length2 - aPatch.length1; + } + else + { + // Found a match. :) + results[ x ] = true; + delta = start_loc - expected_loc; + std::wstring text2; + if ( end_loc == -1 ) + { + text2 = safeMid( text, start_loc, text1.length() ); + } + else + { + text2 = safeMid( text, start_loc, end_loc + Match_MaxBits - start_loc ); + } + if ( text1 == text2 ) + { + // Perfect match, just shove the replacement text in. + text = text.substr( 0, start_loc ) + diff_text2( aPatch.diffs ) + safeMid( text, start_loc + text1.length() ); + } + else + { + // Imperfect match. Run a diff to get a framework of equivalent + // indices. + TDiffVector diffs = diff_main( text1, text2, false ); + if ( text1.length() > Match_MaxBits && diff_levenshtein( diffs ) / static_cast< float >( text1.length() ) > Patch_DeleteThreshold ) + { + // The end points match, but the content is unacceptably bad. + results[ x ] = false; + } + else + { + diff_cleanupSemanticLossless( diffs ); + std::size_t index1 = 0; + for ( auto &&aDiff : aPatch.diffs ) + { + if ( !aDiff.isEqual() ) + { + auto index2 = diff_xIndex( diffs, index1 ); + if ( aDiff.isInsert() ) + { + // Insertion + text = text.substr( 0, start_loc + index2 ) + aDiff.text() + safeMid( text, start_loc + index2 ); + } + else if ( aDiff.isDelete() ) + { + // Deletion + text = text.substr( 0, start_loc + index2 ) + safeMid( text, start_loc + diff_xIndex( diffs, index1 + aDiff.text().length() ) ); + } + } + if ( !aDiff.isDelete() ) + { + index1 += aDiff.text().length(); + } + } + } + } + } + x++; + } + // Strip the padding off. + text = safeMid( text, nullPadding.length(), text.length() - 2 * nullPadding.length() ); + return { text, results }; + } + + std::pair< std::wstring, std::vector< bool > > diff_match_patch::patch_apply( TPatchVector patches, std::string text ) + { + return patch_apply( patches, NUtils::to_wstring( text ) ); + } + + std::wstring diff_match_patch::patch_addPadding( TPatchVector &patches ) + { + auto paddingLength = Patch_Margin; + std::wstring nullPadding; + for ( char x = 1; x <= paddingLength; x++ ) + { + nullPadding += NUtils::to_wstring( x ); + } + + // Bump all the patches forward. + for ( auto &&aPatch : patches ) + { + aPatch.start1 += paddingLength; + aPatch.start2 += paddingLength; + } + + // Add some padding on start of first diff. + // auto && patch = patches.front(); + // TDiffVector & diffs = patch.diffs; + if ( patches.front().diffs.empty() || !patches.front().diffs.front().isEqual() ) + { + // Add nullPadding equality. + patches.front().diffs.emplace( patches.front().diffs.begin(), EOperation::eEQUAL, nullPadding ); + patches.front().start1 -= paddingLength; // Should be 0. + patches.front().start2 -= paddingLength; // Should be 0. + patches.front().length1 += paddingLength; + patches.front().length2 += paddingLength; + } + else if ( paddingLength > patches.front().diffs.front().text().length() ) + { + // Grow first equality. + auto &&firstDiff = patches.front().diffs.front(); + auto extraLength = paddingLength - firstDiff.text().length(); + firstDiff.fText = nullPadding.substr( firstDiff.text().length() ) + firstDiff.text(); + patches.front().start1 -= extraLength; + patches.front().start2 -= extraLength; + patches.front().length1 += extraLength; + patches.front().length2 += extraLength; + } + + // Add some padding on end of last diff. + // patch = patches.back(); + // diffs = patch.diffs; + if ( ( patches.back().diffs.size() == 0 ) || !patches.back().diffs.back().isEqual() ) + { + // Add nullPadding equality. + patches.back().diffs.emplace_back( EOperation::eEQUAL, nullPadding ); + patches.back().length1 += paddingLength; + patches.back().length2 += paddingLength; + } + else if ( paddingLength > patches.back().diffs.back().text().length() ) + { + // Grow last equality. + // Diff &lastDiff = patches.back().diffs.back(); + auto extraLength = paddingLength - patches.back().diffs.back().text().length(); + patches.back().diffs.back().fText += nullPadding.substr( 0, extraLength ); + patches.back().length1 += extraLength; + patches.back().length2 += extraLength; + } + + return nullPadding; + } + + void diff_match_patch::patch_splitMax( TPatchVector &patches ) + { + auto patch_size = Match_MaxBits; + for ( int x = 0; x < patches.size(); x++ ) + { + if ( patches[ x ].length1 <= patch_size ) + { + continue; + } + Patch bigpatch = patches[ x ]; + // Remove the big old patch. + NUtils::Splice( patches, x--, 1 ); + auto start1 = bigpatch.start1; + auto start2 = bigpatch.start2; + std::wstring precontext; + while ( !bigpatch.diffs.empty() ) + { + // Create one of several smaller patches. + Patch patch; + bool empty = true; + patch.start1 = start1 - precontext.length(); + patch.start2 = start2 - precontext.length(); + if ( precontext.length() != 0 ) + { + patch.length1 = patch.length2 = precontext.length(); + patch.diffs.emplace_back( EOperation::eEQUAL, precontext ); + } + while ( !bigpatch.diffs.empty() && ( patch.length1 < ( patch_size - Patch_Margin ) ) ) + { + auto diff_text = bigpatch.diffs[ 0 ].text(); + if ( bigpatch.diffs[ 0 ].isInsert() ) + { + // Insertions are harmless. + patch.length2 += diff_text.length(); + start2 += diff_text.length(); + patch.diffs.push_back( bigpatch.diffs.front() ); + bigpatch.diffs.erase( bigpatch.diffs.begin() ); + empty = false; + } + else if ( ( bigpatch.diffs[ 0 ].isDelete() ) && ( patch.diffs.size() == 1 ) && ( patch.diffs.front().isEqual() ) && ( diff_text.length() > 2 * patch_size ) ) + { + // This is a large deletion. Let it pass in one chunk. + patch.length1 += diff_text.length(); + start1 += diff_text.length(); + empty = false; + patch.diffs.emplace_back( bigpatch.diffs[ 0 ] ); + bigpatch.diffs.erase( bigpatch.diffs.begin() ); + } + else + { + // Deletion or equality. Only take as much as we can stomach. + diff_text = diff_text.substr( 0, std::min( diff_text.length(), ( patch_size > ( patch.length1 + Patch_Margin ) ) ? ( patch_size - patch.length1 - Patch_Margin ) : ( -1 * 1UL ) ) ); + patch.length1 += diff_text.length(); + start1 += diff_text.length(); + if ( bigpatch.diffs[ 0 ].isEqual() ) + { + patch.length2 += diff_text.length(); + start2 += diff_text.length(); + } + else + { + empty = false; + } + patch.diffs.emplace_back( bigpatch.diffs[ 0 ], diff_text ); + if ( diff_text == bigpatch.diffs[ 0 ].text() ) + { + bigpatch.diffs.erase( bigpatch.diffs.begin() ); + } + else + { + bigpatch.diffs[ 0 ].fText = bigpatch.diffs[ 0 ].text().substr( diff_text.length() ); + } + } + } + // Compute the head context for the next patch. + precontext = diff_text2( patch.diffs ); + precontext = precontext.substr( std::max( kZERO, ( precontext.length() > Patch_Margin ) ? ( precontext.length() - Patch_Margin ) : 0 ) ); + + std::wstring postcontext; + // Append the end context for this patch. + if ( diff_text1( bigpatch.diffs ).length() > Patch_Margin ) + { + postcontext = diff_text1( bigpatch.diffs ).substr( 0, Patch_Margin ); + } + else + { + postcontext = diff_text1( bigpatch.diffs ); + } + + if ( postcontext.length() != 0 ) + { + patch.length1 += postcontext.length(); + patch.length2 += postcontext.length(); + if ( ( patch.diffs.size() != 0 ) && ( patch.diffs[ patch.diffs.size() - 1 ].isEqual() ) ) + { + patch.diffs[ patch.diffs.size() - 1 ].fText += postcontext; + } + else + { + patch.diffs.emplace_back( EOperation::eEQUAL, postcontext ); + } + } + if ( !empty ) + { + NUtils::Splice( patches, ++x, 0ULL, patch ); + } + } + } + } + + std::wstring diff_match_patch::patch_toText( const TPatchVector &patches ) + { + std::wstring text; + for ( auto &&aPatch : patches ) + { + text += aPatch.toString(); + } + return text; + } + + TPatchVector diff_match_patch::patch_fromText( const std::wstring &textline ) + { + TPatchVector patches; + if ( textline.empty() ) + { + return patches; + } + auto text = NUtils::splitString( textline, L"\n", true ); + int textPointer = 0; + std::wstring line; + while ( textPointer < text.size() ) + { + patches.push_back( text[ textPointer ] ); + auto &patch = patches.back(); + textPointer++; + + while ( textPointer < text.size() ) + { + if ( text[ textPointer ].empty() ) + { + ++textPointer; + continue; + } + + auto sign = text[ textPointer ][ 0 ]; + + line = text[ textPointer ].substr( 1 ); + NUtils::replace( line, L"+", L"%2b" ); + line = NUtils::fromPercentEncoding( line ); + if ( sign == '-' ) + { + // Deletion. + patch.diffs.emplace_back( EOperation::eDELETE, line ); + } + else if ( sign == '+' ) + { + // Insertion. + patch.diffs.emplace_back( EOperation::eINSERT, line ); + } + else if ( sign == ' ' ) + { + // Minor equality. + patch.diffs.emplace_back( EOperation::eEQUAL, line ); + } + else if ( sign == '@' ) + { + // Start of next patch. + break; + } + else + { + // WTF? + throw std::wstring( std::wstring( L"Invalid patch mode '" ) + sign + std::wstring( L" in: " ) + line ); + return {}; + } + textPointer++; + } + } + return patches; + } + + TPatchVector diff_match_patch::patch_fromText( const std::string &textline ) + { + return patch_fromText( NUtils::to_wstring( textline ) ); + } + + std::wstring diff_match_patch::safeMid( const std::wstring &str, std::size_t pos ) + { + return safeMid( str, pos, std::string::npos ); + } + + std::wstring diff_match_patch::safeMid( const std::wstring &str, std::size_t pos, std::size_t len ) + { + return ( pos == str.length() ) ? std::wstring() : str.substr( pos, len ); + } + + std::wstring NUtils::to_wstring( const diff_match_patch::TVariant &variant, bool doubleQuoteEmpty ) + { + std::wstring retVal; + if ( std::holds_alternative< std::wstring >( variant ) ) + retVal = std::get< std::wstring >( variant ); + + if ( doubleQuoteEmpty && retVal.empty() ) + return LR"("")"; + + return retVal; + } + + std::wstring NUtils::to_wstring( const Patch &patch, bool doubleQuoteEmpty ) + { + auto retVal = patch.toString(); + if ( doubleQuoteEmpty && retVal.empty() ) + return LR"("")"; + return retVal; + } + + std::wstring NUtils::to_wstring( const Diff &diff, bool doubleQuoteEmpty ) + { + auto retVal = diff.toString( EStringType::eDefault ); + if ( doubleQuoteEmpty && retVal.empty() ) + return LR"("")"; + return retVal; + } +} \ No newline at end of file diff --git a/cpp17/diff_match_patch.h b/cpp17/diff_match_patch.h index b0e37bd2..57c25cbb 100644 --- a/cpp17/diff_match_patch.h +++ b/cpp17/diff_match_patch.h @@ -20,6 +20,8 @@ #ifndef DIFF_MATCH_PATCH_H #define DIFF_MATCH_PATCH_H +#include "diff_match_patch_utils.h" + #include #include #include @@ -28,7 +30,7 @@ #include #include #ifdef USE_GTEST -#include "gtest/gtest.h" + #include "gtest/gtest.h" #endif /* @@ -65,125 +67,163 @@ */ -/**- +namespace NDiffMatchPatch +{ + class diff_match_patch_test; + + /**- * The data structure representing a diff is a Linked list of Diff objects: * {Diff(Operation.DELETE, "Hello"), Diff(Operation.INSERT, "Goodbye"), * Diff(Operation.EQUAL, " world.")} * which means: delete "Hello", add "Goodbye" and keep " world." */ -enum Operation { DELETE, INSERT, EQUAL }; - -/** + enum class EOperation + { + eDELETE, + eINSERT, + eEQUAL + }; + std::wstring toString( EOperation op ); + + enum class EStringType + { + eDefault, + eUnitTest, + ePatch + }; + /** * Class representing one diff operation. */ -class Diff { - public: - Operation operation{DELETE}; - // One of: INSERT, DELETE or EQUAL. - std::wstring text; - // The text associated with this diff operation. - - /** + class Diff + { + public: + // The text associated with this diff operation. + + /** * Constructor. Initializes the diff with the provided values. * @param operation One of INSERT, DELETE or EQUAL. * @param text The text being applied. */ - Diff(Operation _operation, const std::wstring &_text); - Diff(Operation _operation, const wchar_t *_text); - Diff(Operation _operation, const std::string &_text); - Diff(Operation _operation, const char *_text); - Diff(); - inline bool isNull() const; - std::wstring toString() const; - bool operator==(const Diff &d) const; - bool operator!=(const Diff &d) const; - - static std::wstring strOperation(Operation op); -}; -using TDiffVector = std::vector; - -/** + Diff() = default; + + template< typename STRING_TYPE > + Diff( EOperation _operation, const STRING_TYPE &_text ) : + fOperation( _operation ), + fText( NUtils::to_wstring( _text ) ) + { + } + + template< typename STRING_TYPE > + Diff( const Diff &rhs, const STRING_TYPE &_text ) : + fOperation( rhs.fOperation ), + fText( NUtils::to_wstring( _text ) ) + { + } + + inline bool isNull() const; + std::wstring toString( EStringType stringType ) const; + std::wstring toHtml() const; + std::wstring toConsole() const; + std::wstring toDelta() const; + + bool isDelete() const { return fOperation == EOperation::eDELETE; } + bool isInsert() const { return fOperation == EOperation::eINSERT; } + bool isEqual() const { return fOperation == EOperation::eEQUAL; } + + const std::wstring &text() const { return fText; } + + bool operator==( const Diff &d ) const; + bool operator!=( const Diff &d ) const; + + EOperation fOperation{ EOperation::eDELETE }; + std::wstring fText; + }; + using TDiffVector = std::vector< Diff >; + + /** * Class representing one patch operation. */ -class Patch { - public: - TDiffVector diffs; - std::size_t length1{0}; - std::size_t length2{0}; - std::size_t start1{0}; - std::size_t start2{0}; - - /** + class Patch + { + public: + TDiffVector diffs; + std::size_t length1{ 0 }; + std::size_t length2{ 0 }; + std::size_t start1{ 0 }; + std::size_t start2{ 0 }; + + /** * Constructor. Initializes with an empty list of diffs. */ - Patch(); - Patch(std::wstring &text); // modifies text, and removes the text used - bool isNull() const; - std::wstring toString() const; + Patch(); + Patch( std::wstring &text ); // modifies text, and removes the text used + bool isNull() const; + std::wstring toString() const; - private: - std::wstring getPatchHeader() const; - std::wstring getCoordinateString(std::size_t start, std::size_t length) const; -}; -using TPatchVector = std::vector; + private: + std::wstring getPatchHeader() const; + std::wstring getCoordinateString( std::size_t start, std::size_t length ) const; + }; + using TPatchVector = std::vector< Patch >; -/** + /** * Class containing the diff, match and patch methods. * Also contains the behaviour settings. */ -class diff_match_patch { - friend class diff_match_patch_test; + class diff_match_patch + { + friend class NDiffMatchPatch::diff_match_patch_test; #ifdef USE_GTEST - FRIEND_TEST(diff_match_patch_test, testDiffCommonOverlap); - FRIEND_TEST(diff_match_patch_test, testDiffHalfmatch); - FRIEND_TEST(diff_match_patch_test, testDiffLinesToChars); - FRIEND_TEST(diff_match_patch_test, testDiffCharsToLines); - FRIEND_TEST(diff_match_patch_test, testDiffBisect); - FRIEND_TEST(diff_match_patch_test, testMatchAlphabet); - FRIEND_TEST(diff_match_patch_test, testMatchBitap); - FRIEND_TEST(diff_match_patch_test, testPatchAddContext); + FRIEND_TEST( diff_match_patch_test, testDiffCommonOverlap ); + FRIEND_TEST( diff_match_patch_test, testDiffHalfmatch ); + FRIEND_TEST( diff_match_patch_test, testDiffLinesToChars ); + FRIEND_TEST( diff_match_patch_test, testDiffCharsToLines ); + FRIEND_TEST( diff_match_patch_test, testDiffBisect ); + FRIEND_TEST( diff_match_patch_test, testMatchAlphabet ); + FRIEND_TEST( diff_match_patch_test, testMatchBitap ); + FRIEND_TEST( diff_match_patch_test, testPatchAddContext ); #endif - public: - // Defaults. - // Set these on your diff_match_patch instance to override the defaults. - - // Number of seconds to map a diff before giving up (0 for infinity). - float Diff_Timeout{1.0f}; - // Cost of an empty edit operation in terms of edit characters. - short Diff_EditCost{4}; - // At what point is no match declared (0.0 = perfection, 1.0 = very loose). - float Match_Threshold{0.5f}; - // How far to search for a match (0 = exact location, 1000+ = broad match). - // A match this many characters away from the expected location will add - // 1.0 to the score (0.0 is a perfect match). - int64_t Match_Distance{1000}; - // When deleting a large block of text (over ~64 characters), how close does - // the contents have to match the expected contents. (0.0 = perfection, - // 1.0 = very loose). Note that Match_Threshold controls how closely the - // end points of a delete need to match. - float Patch_DeleteThreshold{0.5f}; - // Chunk size for context length. - short Patch_Margin{4}; - - short Match_MaxBits{32}; // unit tests are based on 32 bits - - private: - // Define some regex patterns for matching boundaries. - static std::wregex BLANKLINEEND; - static std::wregex BLANKLINESTART; - - public: - using TStringVector = std::vector; - using TVariant = std::variant; - using TVariantVector = std::vector; - using TCharPosMap = std::map; - - diff_match_patch(); - - // DIFF FUNCTIONS - - /** + public: + // Defaults. + // Set these on your diff_match_patch instance to override the defaults. + + // Number of seconds to map a diff before giving up (0 for infinity). + float Diff_Timeout{ 1.0f }; + // Cost of an empty edit operation in terms of edit characters. + short Diff_EditCost{ 4 }; + // At what point is no match declared (0.0 = perfection, 1.0 = very loose). + float Match_Threshold{ 0.5f }; + // How far to search for a match (0 = exact location, 1000+ = broad match). + // A match this many characters away from the expected location will add + // 1.0 to the score (0.0 is a perfect match). + int64_t Match_Distance{ 1000 }; + // When deleting a large block of text (over ~64 characters), how close does + // the contents have to match the expected contents. (0.0 = perfection, + // 1.0 = very loose). Note that Match_Threshold controls how closely the + // end points of a delete need to match. + float Patch_DeleteThreshold{ 0.5f }; + // Chunk size for context length. + short Patch_Margin{ 4 }; + + short Match_MaxBits{ 32 }; // unit tests are based on 32 bits + + private: + // Define some regex patterns for matching boundaries. + static std::wregex BLANKLINEEND; + static std::wregex BLANKLINESTART; + + public: + using TStringVector = std::vector< std::wstring >; + using TVariant = std::variant< std::wstring, TStringVector >; + using TVariantVector = std::vector< TVariant >; + using TCharPosMap = std::map< wchar_t, std::size_t >; + + diff_match_patch(); + + // DIFF FUNCTIONS + + /** * Find the differences between two texts. * Run a faster slightly less optimal diff. * This method allows the 'checklines' of diff_main() to be optional. @@ -192,10 +232,10 @@ class diff_match_patch { * @param text2 New string to be diffed. * @return Linked List of Diff objects. */ - TDiffVector diff_main(const std::wstring &text1, const std::wstring &text2); - TDiffVector diff_main(const std::string &text1, const std::string &text2); + TDiffVector diff_main( const std::wstring &text1, const std::wstring &text2 ); + TDiffVector diff_main( const std::string &text1, const std::string &text2 ); - /** + /** * Find the differences between two texts. * @param text1 Old string to be diffed. * @param text2 New string to be diffed. @@ -204,12 +244,10 @@ class diff_match_patch { * If true, then run a faster slightly less optimal diff. * @return Linked List of Diff objects. */ - TDiffVector diff_main(const std::wstring &text1, const std::wstring &text2, - bool checklines); - TDiffVector diff_main(const std::string &text1, const std::string &text2, - bool checklines); + TDiffVector diff_main( const std::wstring &text1, const std::wstring &text2, bool checklines ); + TDiffVector diff_main( const std::string &text1, const std::string &text2, bool checklines ); - /** + /** * Find the differences between two texts. Simplifies the problem by * stripping any common prefix or suffix off the texts before diffing. * @param text1 Old string to be diffed. @@ -221,13 +259,11 @@ class diff_match_patch { * internally for recursive calls. Users should set DiffTimeout instead. * @return Linked List of Diff objects. */ - private: - TDiffVector diff_main(const std::wstring &text1, const std::wstring &text2, - bool checklines, clock_t deadline); - TDiffVector diff_main(const std::string &text1, const std::string &text2, - bool checklines, clock_t deadline); + private: + TDiffVector diff_main( const std::wstring &text1, const std::wstring &text2, bool checklines, clock_t deadline ); + TDiffVector diff_main( const std::string &text1, const std::string &text2, bool checklines, clock_t deadline ); - /** + /** * Find the differences between two texts. Assumes that the texts do not * have any common prefix or suffix. * @param text1 Old string to be diffed. @@ -238,13 +274,11 @@ class diff_match_patch { * @param deadline Time when the diff should be complete by. * @return Linked List of Diff objects. */ - private: - TDiffVector diff_compute(const std::wstring &text1, const std::wstring &text2, - bool checklines, clock_t deadline); - TDiffVector diff_compute(const std::string &text1, const std::string &text2, - bool checklines, clock_t deadline); + private: + TDiffVector diff_compute( const std::wstring &text1, const std::wstring &text2, bool checklines, clock_t deadline ); + TDiffVector diff_compute( const std::string &text1, const std::string &text2, bool checklines, clock_t deadline ); - /** + /** * Do a quick line-level diff on both strings, then rediff the parts for * greater accuracy. * This speedup can produce non-minimal diffs. @@ -253,13 +287,11 @@ class diff_match_patch { * @param deadline Time when the diff should be complete by. * @return Linked List of Diff objects. */ - private: - TDiffVector diff_lineMode(std::wstring text1, std::wstring text2, - clock_t deadline); - TDiffVector diff_lineMode(std::string text1, std::string text2, - clock_t deadline); + private: + TDiffVector diff_lineMode( std::wstring text1, std::wstring text2, clock_t deadline ); + TDiffVector diff_lineMode( std::string text1, std::string text2, clock_t deadline ); - /** + /** * Find the 'middle snake' of a diff, split the problem in two * and return the recursively constructed diff. * See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations. @@ -267,13 +299,11 @@ class diff_match_patch { * @param text2 New string to be diffed. * @return Linked List of Diff objects. */ - protected: - TDiffVector diff_bisect(const std::wstring &text1, const std::wstring &text2, - clock_t deadline); - TDiffVector diff_bisect(const std::string &text1, const std::string &text2, - clock_t deadline); + protected: + TDiffVector diff_bisect( const std::wstring &text1, const std::wstring &text2, clock_t deadline ); + TDiffVector diff_bisect( const std::string &text1, const std::string &text2, clock_t deadline ); - /** + /** * Given the location of the 'middle snake', split the diff in two parts * and recurse. * @param text1 Old string to be diffed. @@ -283,15 +313,11 @@ class diff_match_patch { * @param deadline Time at which to bail if not yet complete. * @return LinkedList of Diff objects. */ - private: - TDiffVector diff_bisectSplit(const std::wstring &text1, - const std::wstring &text2, std::size_t x, - std::size_t y, clock_t deadline); - TDiffVector diff_bisectSplit(const std::string &text1, - const std::string &text2, std::size_t x, - std::size_t y, clock_t deadline); + private: + TDiffVector diff_bisectSplit( const std::wstring &text1, const std::wstring &text2, std::size_t x, std::size_t y, clock_t deadline ); + TDiffVector diff_bisectSplit( const std::string &text1, const std::string &text2, std::size_t x, std::size_t y, clock_t deadline ); - /** + /** * Split two texts into a list of strings. Reduce the texts to a string of * hashes where each Unicode character represents one line. * @param text1 First string. @@ -300,15 +326,13 @@ class diff_match_patch { * encoded text2 and the List of unique strings. The zeroth element * of the List of unique strings is intentionally blank. */ - protected: - std::vector diff_linesToChars( - const std::wstring &text1, - const std::wstring &text2); // return elems 0 and 1 are std::wstring, - // elem 2 is TStringVector - std::vector diff_linesToChars(const std::string &text1, - const std::string &text2); + protected: + std::vector< TVariant > diff_linesToChars( const std::wstring &text1, + const std::wstring &text2 ); // return elems 0 and 1 are std::wstring, + // elem 2 is TStringVector + std::vector< TVariant > diff_linesToChars( const std::string &text1, const std::string &text2 ); - /** + /** * Split a text into a list of strings. Reduce the texts to a string of * hashes where each Unicode character represents one line. * @param text String to encode. @@ -316,58 +340,50 @@ class diff_match_patch { * @param lineHash Map of strings to indices. * @return Encoded string. */ - private: - std::wstring diff_linesToCharsMunge( - const std::wstring &text, TStringVector &lineArray, - std::unordered_map &lineHash); + private: + std::wstring diff_linesToCharsMunge( const std::wstring &text, TStringVector &lineArray, std::unordered_map< std::wstring, std::size_t > &lineHash ); - /** + /** * Rehydrate the text in a diff from a string of line hashes to real lines of * text. * @param diffs LinkedList of Diff objects. * @param lineArray List of unique strings. */ - private: - void diff_charsToLines(TDiffVector &diffs, const TStringVector &lineArray); + private: + void diff_charsToLines( TDiffVector &diffs, const TStringVector &lineArray ); - /** + /** * Determine the common prefix of two strings. * @param text1 First string. * @param text2 Second string. * @return The number of characters common to the start of each string. */ - public: - std::size_t diff_commonPrefix(const std::wstring &text1, - const std::wstring &text2); - std::size_t diff_commonPrefix(const std::string &text1, - const std::string &text2); + public: + std::size_t diff_commonPrefix( const std::wstring &text1, const std::wstring &text2 ); + std::size_t diff_commonPrefix( const std::string &text1, const std::string &text2 ); - /** + /** * Determine the common suffix of two strings. * @param text1 First string. * @param text2 Second string. * @return The number of characters common to the end of each string. */ - public: - std::size_t diff_commonSuffix(const std::wstring &text1, - const std::wstring &text2); - std::size_t diff_commonSuffix(const std::string &text1, - const std::string &text2); + public: + std::size_t diff_commonSuffix( const std::wstring &text1, const std::wstring &text2 ); + std::size_t diff_commonSuffix( const std::string &text1, const std::string &text2 ); - /** + /** * Determine if the suffix of one string is the prefix of another. * @param text1 First string. * @param text2 Second string. * @return The number of characters common to the end of the first * string and the start of the second string. */ - protected: - std::size_t diff_commonOverlap(const std::wstring &text1, - const std::wstring &text2); - std::size_t diff_commonOverlap(const std::string &text1, - const std::string &text2); + protected: + std::size_t diff_commonOverlap( const std::wstring &text1, const std::wstring &text2 ); + std::size_t diff_commonOverlap( const std::string &text1, const std::string &text2 ); - /** + /** * Do the two texts share a substring which is at least half the length of * the longer text? * This speedup can produce non-minimal diffs. @@ -377,13 +393,11 @@ class diff_match_patch { * suffix of text1, the prefix of text2, the suffix of text2 and the * common middle. Or null if there was no match. */ - protected: - TStringVector diff_halfMatch(const std::wstring &text1, - const std::wstring &text2); - TStringVector diff_halfMatch(const std::string &text1, - const std::string &text2); + protected: + TStringVector diff_halfMatch( const std::wstring &text1, const std::wstring &text2 ); + TStringVector diff_halfMatch( const std::string &text1, const std::string &text2 ); - /** + /** * Does a substring of shorttext exist within longtext such that the * substring is at least half the length of longtext? * @param longtext Longer string. @@ -393,29 +407,27 @@ class diff_match_patch { * suffix of longtext, the prefix of shorttext, the suffix of shorttext * and the common middle. Or null if there was no match. */ - private: - TStringVector diff_halfMatchI(const std::wstring &longtext, - const std::wstring &shorttext, std::size_t i); - TStringVector diff_halfMatchI(const std::string &longtext, - const std::string &shorttext, std::size_t i); + private: + TStringVector diff_halfMatchI( const std::wstring &longtext, const std::wstring &shorttext, std::size_t i ); + TStringVector diff_halfMatchI( const std::string &longtext, const std::string &shorttext, std::size_t i ); - /** + /** * Reduce the number of edits by eliminating semantically trivial equalities. * @param diffs LinkedList of Diff objects. */ - public: - void diff_cleanupSemantic(TDiffVector &diffs); + public: + void diff_cleanupSemantic( TDiffVector &diffs ); - /** + /** * Look for single edits surrounded on both sides by equalities * which can be shifted sideways to align the edit to a word boundary. * e.g: The cat came. -> The cat came. * @param diffs LinkedList of Diff objects. */ - public: - void diff_cleanupSemanticLossless(TDiffVector &diffs); + public: + void diff_cleanupSemanticLossless( TDiffVector &diffs ); - /** + /** * Given two strings, compute a score representing whether the internal * boundary falls on logical boundaries. * Scores range from 6 (best) to 0 (worst). @@ -423,28 +435,26 @@ class diff_match_patch { * @param two Second string. * @return The score. */ - private: - int64_t diff_cleanupSemanticScore(const std::wstring &one, - const std::wstring &two); - int64_t diff_cleanupSemanticScore(const std::string &one, - const std::string &two); + private: + int64_t diff_cleanupSemanticScore( const std::wstring &one, const std::wstring &two ); + int64_t diff_cleanupSemanticScore( const std::string &one, const std::string &two ); - /** + /** * Reduce the number of edits by eliminating operationally trivial equalities. * @param diffs LinkedList of Diff objects. */ - public: - void diff_cleanupEfficiency(TDiffVector &diffs); + public: + void diff_cleanupEfficiency( TDiffVector &diffs ); - /** + /** * Reorder and merge like edit sections. Merge equalities. * Any edit section can move as long as it doesn't cross an equality. * @param diffs LinkedList of Diff objects. */ - public: - void diff_cleanupMerge(TDiffVector &diffs); + public: + void diff_cleanupMerge( TDiffVector &diffs ); - /** + /** * loc is a location in text1, compute and return the equivalent location in * text2. * e.g. "The cat" vs "The big cat", 1->1, 5->8 @@ -452,51 +462,51 @@ class diff_match_patch { * @param loc Location within text1. * @return Location within text2. */ - public: - std::size_t diff_xIndex(const TDiffVector &diffs, std::size_t loc); + public: + std::size_t diff_xIndex( const TDiffVector &diffs, std::size_t loc ); - /** + /** * Convert a Diff list into a pretty HTML report. * @param diffs LinkedList of Diff objects. * @return HTML representation. */ - public: - std::wstring diff_prettyHtml(const TDiffVector &diffs); + public: + std::wstring diff_prettyHtml( const TDiffVector &diffs ); - /** + /** * Convert a Diff list into a pretty Console report. Red for delete, and green for insert * @param diffs LinkedList of Diff objects. * @return Console representation. */ - public: - std::wstring diff_prettyConsole(const TDiffVector &diffs); + public: + std::wstring diff_prettyConsole( const TDiffVector &diffs ); - /** + /** * Compute and return the source text (all equalities and deletions). * @param diffs LinkedList of Diff objects. * @return Source text. */ - public: - std::wstring diff_text1(const TDiffVector &diffs); + public: + std::wstring diff_text1( const TDiffVector &diffs ); - /** + /** * Compute and return the destination text (all equalities and insertions). * @param diffs LinkedList of Diff objects. * @return Destination text. */ - public: - std::wstring diff_text2(const TDiffVector &diffs); + public: + std::wstring diff_text2( const TDiffVector &diffs ); - /** + /** * Compute the Levenshtein distance; the number of inserted, deleted or * substituted characters. * @param diffs LinkedList of Diff objects. * @return Number of changes. */ - public: - std::size_t diff_levenshtein(const TDiffVector &diffs); + public: + std::size_t diff_levenshtein( const TDiffVector &diffs ); - /** + /** * Crush the diff into an encoded string which describes the operations * required to transform text1 into text2. * E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'. @@ -504,10 +514,10 @@ class diff_match_patch { * @param diffs Array of diff tuples. * @return Delta text. */ - public: - std::wstring diff_toDelta(const TDiffVector &diffs); + public: + std::wstring diff_toDelta( const TDiffVector &diffs ); - /** + /** * Given the original text1, and an encoded string which describes the * operations required to transform text1 into text2, compute the full diff. * @param text1 Source string for the diff. @@ -515,15 +525,13 @@ class diff_match_patch { * @return Array of diff tuples or null if invalid. * @throws std::wstring If invalid input. */ - public: - TDiffVector diff_fromDelta(const std::wstring &text1, - const std::wstring &delta); - TDiffVector diff_fromDelta(const std::string &text1, - const std::string &delta); + public: + TDiffVector diff_fromDelta( const std::wstring &text1, const std::wstring &delta ); + TDiffVector diff_fromDelta( const std::string &text1, const std::string &delta ); - // MATCH FUNCTIONS + // MATCH FUNCTIONS - /** + /** * Locate the best instance of 'pattern' in 'text' near 'loc'. * Returns -1 if no match found. * @param text The text to search. @@ -531,13 +539,11 @@ class diff_match_patch { * @param loc The location to search around. * @return Best match index or -1. */ - public: - std::size_t match_main(const std::wstring &text, const std::wstring &pattern, - std::size_t loc); - std::size_t match_main(const std::string &text, const std::string &pattern, - std::size_t loc); + public: + std::size_t match_main( const std::wstring &text, const std::wstring &pattern, std::size_t loc ); + std::size_t match_main( const std::string &text, const std::string &pattern, std::size_t loc ); - /** + /** * Locate the best instance of 'pattern' in 'text' near 'loc' using the * Bitap algorithm. Returns -1 if no match found. * @param text The text to search. @@ -545,13 +551,11 @@ class diff_match_patch { * @param loc The location to search around. * @return Best match index or -1. */ - protected: - std::size_t match_bitap(const std::wstring &text, const std::wstring &pattern, - std::size_t loc); - std::size_t match_bitap(const std::string &text, const std::string &pattern, - std::size_t loc); + protected: + std::size_t match_bitap( const std::wstring &text, const std::wstring &pattern, std::size_t loc ); + std::size_t match_bitap( const std::string &text, const std::string &pattern, std::size_t loc ); - /** + /** * Compute and return the score for a match with e errors and x location. * @param e Number of errors in match. * @param x Location of match. @@ -559,52 +563,51 @@ class diff_match_patch { * @param pattern Pattern being sought. * @return Overall score for match (0.0 = good, 1.0 = bad). */ - private: - double match_bitapScore(int64_t e, int64_t x, int64_t loc, - const std::wstring &pattern); + private: + double match_bitapScore( int64_t e, int64_t x, int64_t loc, const std::wstring &pattern ); - /** + /** * Initialise the alphabet for the Bitap algorithm. * @param pattern The text to encode. * @return Hash of character locations. */ - protected: - TCharPosMap match_alphabet(const std::wstring &pattern); - TCharPosMap match_alphabet(const std::string &pattern); + protected: + TCharPosMap match_alphabet( const std::wstring &pattern ); + TCharPosMap match_alphabet( const std::string &pattern ); - // PATCH FUNCTIONS + // PATCH FUNCTIONS - /** + /** * Increase the context until it is unique, * but don't let the pattern expand beyond Match_MaxBits. * @param patch The patch to grow. * @param text Source text. */ - protected: - void patch_addContext(Patch &patch, const std::wstring &text); - void patch_addContext(Patch &patch, const std::string &text); + protected: + void patch_addContext( Patch &patch, const std::wstring &text ); + void patch_addContext( Patch &patch, const std::string &text ); - /** + /** * Compute a list of patches to turn text1 into text2. * A set of diffs will be computed. * @param text1 Old text. * @param text2 New text. * @return LinkedList of Patch objects. */ - public: - TPatchVector patch_make(const std::wstring &text1, const std::wstring &text2); - TPatchVector patch_make(const std::string &text1, const std::string &text2); + public: + TPatchVector patch_make( const std::wstring &text1, const std::wstring &text2 ); + TPatchVector patch_make( const std::string &text1, const std::string &text2 ); - /** + /** * Compute a list of patches to turn text1 into text2. * text1 will be derived from the provided diffs. * @param diffs Array of diff tuples for text1 to text2. * @return LinkedList of Patch objects. */ - public: - TPatchVector patch_make(const TDiffVector &diffs); + public: + TPatchVector patch_make( const TDiffVector &diffs ); - /** + /** * Compute a list of patches to turn text1 into text2. * text2 is ignored, diffs are the delta between text1 and text2. * @param text1 Old text. @@ -614,32 +617,30 @@ class diff_match_patch { * @deprecated Prefer patch_make(const std::wstring &text1, const std::list< * Diff > &diffs). */ - public: - TPatchVector patch_make(const std::wstring &text1, const std::wstring &text2, - const TDiffVector &diffs); - TPatchVector patch_make(const std::string &text1, const std::string &text2, - const TDiffVector &diffs); + public: + TPatchVector patch_make( const std::wstring &text1, const std::wstring &text2, const TDiffVector &diffs ); + TPatchVector patch_make( const std::string &text1, const std::string &text2, const TDiffVector &diffs ); - /** + /** * Compute a list of patches to turn text1 into text2. * text2 is not provided, diffs are the delta between text1 and text2. * @param text1 Old text. * @param diffs Array of diff tuples for text1 to text2. * @return LinkedList of Patch objects. */ - public: - TPatchVector patch_make(const std::wstring &text1, const TDiffVector &diffs); - TPatchVector patch_make(const std::string &text1, const TDiffVector &diffs); + public: + TPatchVector patch_make( const std::wstring &text1, const TDiffVector &diffs ); + TPatchVector patch_make( const std::string &text1, const TDiffVector &diffs ); - /** + /** * Given an array of patches, return another array that is identical. * @param patches Array of patch objects. * @return Array of patch objects. */ - public: - TPatchVector patch_deepCopy(const TPatchVector &patches); + public: + TPatchVector patch_deepCopy( const TPatchVector &patches ); - /** + /** * Merge a set of patches onto the text. Return a patched text, as well * as an array of true/false values indicating which patches were applied. * @param patches Array of patch objects. @@ -647,60 +648,58 @@ class diff_match_patch { * @return Two element Object array, containing the new text and an array of * boolean values. */ - public: - std::pair > patch_apply(TPatchVector patches, - std::wstring text); - std::pair > patch_apply(TPatchVector patches, - std::string text); + public: + std::pair< std::wstring, std::vector< bool > > patch_apply( TPatchVector patches, std::wstring text ); + std::pair< std::wstring, std::vector< bool > > patch_apply( TPatchVector patches, std::string text ); - /** + /** * Add some padding on text start and end so that edges can match something. * Intended to be called only from within patch_apply. * @param patches Array of patch objects. * @return The padding string added to each side. */ - public: - std::wstring patch_addPadding(TPatchVector &patches); + public: + std::wstring patch_addPadding( TPatchVector &patches ); - /** + /** * Look through the patches and break up any which are longer than the * maximum limit of the match algorithm. * Intended to be called only from within patch_apply. * @param patches LinkedList of Patch objects. */ - public: - void patch_splitMax(TPatchVector &patches); + public: + void patch_splitMax( TPatchVector &patches ); - /** + /** * Take a list of patches and return a textual representation. * @param patches List of Patch objects. * @return Text representation of patches. */ - public: - std::wstring patch_toText(const TPatchVector &patches); + public: + std::wstring patch_toText( const TPatchVector &patches ); - /** + /** * Parse a textual representation of patches and return a List of Patch * objects. * @param textline Text representation of patches. * @return List of Patch objects. * @throws std::wstring If invalid input. */ - public: - TPatchVector patch_fromText(const std::wstring &textline); - TPatchVector patch_fromText(const std::string &textline); + public: + TPatchVector patch_fromText( const std::wstring &textline ); + TPatchVector patch_fromText( const std::string &textline ); - /** + /** * A safer version of std::wstring.mid(pos). This one returns "" instead of * null when the postion equals the string length. * @param str String to take a substring from. * @param pos Position to start the substring from. * @return Substring. */ - private: - static std::wstring safeMid(const std::wstring &str, std::size_t pos); + private: + static std::wstring safeMid( const std::wstring &str, std::size_t pos ); - /** + /** * A safer version of std::wstring.mid(pos, len). This one returns "" instead * of null when the postion equals the string length. * @param str String to take a substring from. @@ -708,15 +707,15 @@ class diff_match_patch { * @param len Length of substring. * @return Substring. */ - private: - static std::wstring safeMid(const std::wstring &str, std::size_t pos, - std::size_t len); -}; - -namespace NUtils { -std::wstring to_wstring(const diff_match_patch::TVariant &variant, - bool doubleQuoteEmpty = false); -std::wstring to_wstring(const Diff &diff, bool doubleQuoteEmpty = false); -std::wstring to_wstring(const Patch &patch, bool doubleQuoteEmpty = false); -} // namespace NUtils -#endif // DIFF_MATCH_PATCH_H + private: + static std::wstring safeMid( const std::wstring &str, std::size_t pos, std::size_t len ); + }; + + namespace NUtils + { + std::wstring to_wstring( const diff_match_patch::TVariant &variant, bool doubleQuoteEmpty ); + std::wstring to_wstring( const Diff &diff, bool doubleQuoteEmpty ); + std::wstring to_wstring( const Patch &patch, bool doubleQuoteEmpty ); + } // namespace NUtils +} +#endif // DIFF_MATCH_PATCH_H diff --git a/cpp17/diff_match_patch_test.cpp b/cpp17/diff_match_patch_test.cpp index 5dc5abd6..efd9c2fc 100644 --- a/cpp17/diff_match_patch_test.cpp +++ b/cpp17/diff_match_patch_test.cpp @@ -22,1343 +22,1088 @@ #include "diff_match_patch_utils.h" #ifdef USE_GTEST -#include "gtest/gtest.h" + #include "gtest/gtest.h" #endif #include #include +#include +#include -int main(int argc, char **argv) { +int main( int argc, char **argv ) +{ #ifdef USE_GTEST - ::testing::InitGoogleTest(&argc, argv); - int retVal = RUN_ALL_TESTS(); + ::testing::InitGoogleTest( &argc, argv ); + int retVal = RUN_ALL_TESTS(); #else - diff_match_patch_test dmp_test; - std::cerr << "Starting diff_match_patch unit tests.\n"; - int retVal = dmp_test.run_all_tests(); - std::cerr << "Done.\n"; + diff_match_patch_test dmp_test; + std::cerr << "Starting diff_match_patch unit tests.\n"; + int retVal = dmp_test.run_all_tests(); + std::cerr << "Done.\n"; #endif - return retVal; + return retVal; } -static wchar_t kZero{0}; -static wchar_t kOne{1}; -static wchar_t kTwo{2}; - -diff_match_patch_test::diff_match_patch_test() {} +static wchar_t kZero{ 0 }; +static wchar_t kOne{ 1 }; +static wchar_t kTwo{ 2 }; +namespace NDiffMatchPatch +{ + void PrintTo( const Diff &value, ::std::ostream *os ) + { + std::wostringstream oss; + oss << value.toString( EStringType::eUnitTest ); + std::wstring_convert< std::codecvt_utf8< wchar_t > > utf8_conv; + *os << utf8_conv.to_bytes( oss.str() ); + } + + diff_match_patch_test::diff_match_patch_test() + { + } #ifndef USE_GTEST -bool diff_match_patch_test::runTest(std::function test) { - bool retVal = false; - try { - test(); - numPassedTests++; - retVal = true; - } catch (std::string msg) { - std::cerr << "Test failed: " << msg << "\n"; - numFailedTests++; - retVal = false; - } - return retVal; -} - -int diff_match_patch_test::run_all_tests() { - auto startTime = std::chrono::high_resolution_clock::now(); - - runTest(std::bind(&diff_match_patch_test::testDiffCommonPrefix, this)); - runTest(std::bind(&diff_match_patch_test::testDiffCommonSuffix, this)); - runTest(std::bind(&diff_match_patch_test::testDiffCommonOverlap, this)); - runTest(std::bind(&diff_match_patch_test::testDiffHalfmatch, this)); - runTest(std::bind(&diff_match_patch_test::testDiffLinesToChars, this)); - runTest(std::bind(&diff_match_patch_test::testDiffCharsToLines, this)); - runTest(std::bind(&diff_match_patch_test::testDiffCleanupMerge, this)); - runTest( - std::bind(&diff_match_patch_test::testDiffCleanupSemanticLossless, this)); - runTest(std::bind(&diff_match_patch_test::testDiffCleanupSemantic, this)); - runTest(std::bind(&diff_match_patch_test::testDiffCleanupEfficiency, this)); - runTest(std::bind(&diff_match_patch_test::testDiffPrettyHtml, this)); - runTest(std::bind(&diff_match_patch_test::testDiffPrettyConsole, this)); - runTest(std::bind(&diff_match_patch_test::testDiffText, this)); - runTest(std::bind(&diff_match_patch_test::testDiffDelta, this)); - runTest(std::bind(&diff_match_patch_test::testDiffXIndex, this)); - runTest(std::bind(&diff_match_patch_test::testDiffLevenshtein, this)); - runTest(std::bind(&diff_match_patch_test::testDiffBisect, this)); - runTest(std::bind(&diff_match_patch_test::testDiffMain, this)); - - runTest(std::bind(&diff_match_patch_test::testMatchAlphabet, this)); - runTest(std::bind(&diff_match_patch_test::testMatchBitap, this)); - runTest(std::bind(&diff_match_patch_test::testMatchMain, this)); - - runTest(std::bind(&diff_match_patch_test::testPatchObj, this)); - runTest(std::bind(&diff_match_patch_test::testPatchFromText, this)); - runTest(std::bind(&diff_match_patch_test::testPatchToText, this)); - runTest(std::bind(&diff_match_patch_test::testPatchAddContext, this)); - runTest(std::bind(&diff_match_patch_test::testPatchMake, this)); - runTest(std::bind(&diff_match_patch_test::testPatchSplitMax, this)); - runTest(std::bind(&diff_match_patch_test::testPatchAddPadding, this)); - runTest(std::bind(&diff_match_patch_test::testPatchApply, this)); - if (numFailedTests == 0) - std::cout << numPassedTests << " Tests Passed\n" - << numFailedTests << " Tests Failed\n"; - else - std::cerr << numPassedTests << " Tests Passed\n" - << numFailedTests << " Tests Failed\n"; - auto endTime = std::chrono::high_resolution_clock::now(); - auto elapsed = - std::chrono::duration_cast(endTime - startTime) - .count(); - std::wcout << "Total time: " << elapsed << " ms\n"; - return (numFailedTests == 0) ? 0 : 1; -} + bool diff_match_patch_test::runTest( std::function< void() > test ) + { + bool retVal = false; + try + { + test(); + numPassedTests++; + retVal = true; + } + catch ( std::string msg ) + { + std::cerr << "Test failed: " << msg << "\n"; + numFailedTests++; + retVal = false; + } + return retVal; + } + + int diff_match_patch_test::run_all_tests() + { + auto startTime = std::chrono::high_resolution_clock::now(); + + runTest( std::bind( &diff_match_patch_test::testDiffCommonPrefix, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffCommonSuffix, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffCommonOverlap, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffHalfmatch, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffLinesToChars, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffCharsToLines, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffCleanupMerge, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffCleanupSemanticLossless, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffCleanupSemantic, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffCleanupEfficiency, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffPrettyHtml, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffPrettyConsole, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffText, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffDelta, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffXIndex, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffLevenshtein, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffBisect, this ) ); + runTest( std::bind( &diff_match_patch_test::testDiffMain, this ) ); + + runTest( std::bind( &diff_match_patch_test::testMatchAlphabet, this ) ); + runTest( std::bind( &diff_match_patch_test::testMatchBitap, this ) ); + runTest( std::bind( &diff_match_patch_test::testMatchMain, this ) ); + + runTest( std::bind( &diff_match_patch_test::testPatchObj, this ) ); + runTest( std::bind( &diff_match_patch_test::testPatchFromText, this ) ); + runTest( std::bind( &diff_match_patch_test::testPatchToText, this ) ); + runTest( std::bind( &diff_match_patch_test::testPatchAddContext, this ) ); + runTest( std::bind( &diff_match_patch_test::testPatchMake, this ) ); + runTest( std::bind( &diff_match_patch_test::testPatchSplitMax, this ) ); + runTest( std::bind( &diff_match_patch_test::testPatchAddPadding, this ) ); + runTest( std::bind( &diff_match_patch_test::testPatchApply, this ) ); + if ( numFailedTests == 0 ) + std::cout << numPassedTests << " Tests Passed\n" << numFailedTests << " Tests Failed\n"; + else + std::cerr << numPassedTests << " Tests Passed\n" << numFailedTests << " Tests Failed\n"; + auto endTime = std::chrono::high_resolution_clock::now(); + auto elapsed = std::chrono::duration_cast< std::chrono::milliseconds >( endTime - startTime ).count(); + std::wcout << "Total time: " << elapsed << " ms\n"; + return ( numFailedTests == 0 ) ? 0 : 1; + } #endif -// DIFF TEST FUNCTIONS - -TEST_F(diff_match_patch_test, testDiffCommonPrefix) { - // Detect any common prefix. - assertEquals("diff_commonPrefix: nullptr case.", 0, - dmp.diff_commonPrefix("abc", "xyz")); - - assertEquals("diff_commonPrefix: Non-nullptr case.", 4, - dmp.diff_commonPrefix("1234abcdef", "1234xyz")); - - assertEquals("diff_commonPrefix: Whole case.", 4, - dmp.diff_commonPrefix("1234", "1234xyz")); -} - -TEST_F(diff_match_patch_test, testDiffCommonSuffix) { - // Detect any common suffix. - assertEquals("diff_commonSuffix: nullptr case.", 0, - dmp.diff_commonSuffix("abc", "xyz")); - - assertEquals("diff_commonSuffix: Non-nullptr case.", 4, - dmp.diff_commonSuffix("abcdef1234", "xyz1234")); - - assertEquals("diff_commonSuffix: Whole case.", 4, - dmp.diff_commonSuffix("1234", "xyz1234")); -} - -TEST_F(diff_match_patch_test, testDiffCommonOverlap) { - // Detect any suffix/prefix overlap. - assertEquals("diff_commonOverlap: nullptr case.", 0, - dmp.diff_commonOverlap("", "abcd")); + // DIFF TEST FUNCTIONS - assertEquals("diff_commonOverlap: Whole case.", 3, - dmp.diff_commonOverlap("abc", "abcd")); - - assertEquals("diff_commonOverlap: No overlap.", 0, - dmp.diff_commonOverlap("123456", "abcd")); - - assertEquals("diff_commonOverlap: Overlap.", 3, - dmp.diff_commonOverlap("123456xxx", "xxxabcd")); - - // Some overly clever languages (C#) may treat ligatures as equal to their - // component letters. E.g. U+FB01 == 'fi' - assertEquals("diff_commonOverlap: Unicode.", 0, - dmp.diff_commonOverlap(L"fi", std::wstring(L"\ufb01i"))); -} - -TEST_F(diff_match_patch_test, testDiffHalfmatch) { - // Detect a halfmatch. - dmp.Diff_Timeout = 1; - assertEmpty("diff_halfMatch: No match #1.", - dmp.diff_halfMatch("1234567890", "abcdef")); - - assertEmpty("diff_halfMatch: No match #2.", - dmp.diff_halfMatch("12345", "23")); - - assertEquals("diff_halfMatch: Single Match #1.", - TStringVector({L"12", L"90", L"a", L"z", L"345678"}), - dmp.diff_halfMatch("1234567890", "a345678z")); - - assertEquals("diff_halfMatch: Single Match #2.", - TStringVector({L"a", L"z", L"12", L"90", L"345678"}), - dmp.diff_halfMatch("a345678z", "1234567890")); - - assertEquals("diff_halfMatch: Single Match #3.", - TStringVector({L"abc", L"z", L"1234", L"0", L"56789"}), - dmp.diff_halfMatch("abc56789z", "1234567890")); - - assertEquals("diff_halfMatch: Single Match #4.", - TStringVector({L"a", L"xyz", L"1", L"7890", L"23456"}), - dmp.diff_halfMatch("a23456xyz", "1234567890")); - - assertEquals( - "diff_halfMatch: Multiple Matches #1.", - TStringVector({L"12123", L"123121", L"a", L"z", L"1234123451234"}), - dmp.diff_halfMatch("121231234123451234123121", "a1234123451234z")); - - assertEquals( - "diff_halfMatch: Multiple Matches #2.", - TStringVector({L"", L"-=-=-=-=-=", L"x", L"", L"x-=-=-=-=-=-=-="}), - dmp.diff_halfMatch("x-=-=-=-=-=-=-=-=-=-=-=-=", "xx-=-=-=-=-=-=-=")); - - assertEquals( - "diff_halfMatch: Multiple Matches #3.", - TStringVector({L"-=-=-=-=-=", L"", L"", L"y", L"-=-=-=-=-=-=-=y"}), - dmp.diff_halfMatch("-=-=-=-=-=-=-=-=-=-=-=-=y", "-=-=-=-=-=-=-=yy")); - - // Optimal diff would be -q+x=H-i+e=lloHe+Hu=llo-Hew+y not - // -qHillo+x=HelloHe-w+Hulloy - assertEquals("diff_halfMatch: Non-optimal halfmatch.", - TStringVector({L"qHillo", L"w", L"x", L"Hulloy", L"HelloHe"}), - dmp.diff_halfMatch("qHilloHelloHew", "xHelloHeHulloy")); - - dmp.Diff_Timeout = 0; - assertEmpty("diff_halfMatch: Optimal no halfmatch.", - dmp.diff_halfMatch(L"qHilloHelloHew", L"xHelloHeHulloy")); -} - -TEST_F(diff_match_patch_test, testDiffLinesToChars) { - // Convert lines down to characters. - TStringVector tmpVector = TStringVector({L"", L"alpha\n", L"beta\n"}); - TVariantVector tmpVarList; - tmpVarList.emplace_back( - NUtils::to_wstring({1, 2, 1})); //(("\u0001\u0002\u0001")); - tmpVarList.emplace_back( - NUtils::to_wstring({2, 1, 2})); // (("\u0002\u0001\u0002")); - tmpVarList.emplace_back(tmpVector); - assertEquals( - "diff_linesToChars:", tmpVarList, - dmp.diff_linesToChars("alpha\nbeta\nalpha\n", "beta\nalpha\nbeta\n")); - - tmpVector.clear(); - tmpVarList.clear(); - tmpVector.emplace_back(L""); - tmpVector.emplace_back(L"alpha\r\n"); - tmpVector.emplace_back(L"beta\r\n"); - tmpVector.emplace_back(L"\r\n"); - tmpVarList.emplace_back(std::wstring()); - tmpVarList.emplace_back( - NUtils::to_wstring({1, 2, 3, 3})); // (("\u0001\u0002\u0003\u0003")); - tmpVarList.emplace_back(tmpVector); - assertEquals("diff_linesToChars:", tmpVarList, - dmp.diff_linesToChars("", "alpha\r\nbeta\r\n\r\n\r\n")); - - tmpVector.clear(); - tmpVarList.clear(); - tmpVector.emplace_back(L""); - tmpVector.emplace_back(L"a"); - tmpVector.emplace_back(L"b"); - tmpVarList.emplace_back(NUtils::to_wstring(1)); // (("\u0001")); - tmpVarList.emplace_back(NUtils::to_wstring(2)); // (("\u0002")); - tmpVarList.emplace_back(tmpVector); - assertEquals("diff_linesToChars:", tmpVarList, - dmp.diff_linesToChars("a", "b")); - - // More than 256 to reveal any 8-bit limitations. - int n = 300; - tmpVector.clear(); - tmpVarList.clear(); - std::wstring lines; - std::wstring chars; - for (int x = 1; x < n + 1; x++) { - tmpVector.emplace_back(std::to_wstring(x) + L"\n"); - lines += std::to_wstring(x) + L"\n"; - chars += NUtils::to_wstring(x); - } - assertEquals("diff_linesToChars: More than 256 (setup).", n, - tmpVector.size()); - assertEquals("diff_linesToChars: More than 256 (setup).", n, chars.length()); - tmpVector.emplace(tmpVector.begin(), L""); - tmpVarList.emplace_back(chars); - tmpVarList.emplace_back(std::wstring()); - tmpVarList.emplace_back(tmpVector); - assertEquals("diff_linesToChars: More than 256.", tmpVarList, - dmp.diff_linesToChars(lines, {})); -} - -TEST_F(diff_match_patch_test, testDiffCharsToLines) { - // First check that Diff equality works. - assertTrue("diff_charsToLines:", Diff(EQUAL, "a") == Diff(EQUAL, "a")); - - assertEquals("diff_charsToLines:", Diff(EQUAL, "a"), Diff(EQUAL, "a")); - - // Convert chars up to lines. - TDiffVector diffs; - diffs.emplace_back(EQUAL, - NUtils::to_wstring({1, 2, 1})); // ("\u0001\u0002\u0001"); - diffs.emplace_back(INSERT, - NUtils::to_wstring({2, 1, 2})); // ("\u0002\u0001\u0002"); - TStringVector tmpVector; - tmpVector.emplace_back(L""); - tmpVector.emplace_back(L"alpha\n"); - tmpVector.emplace_back(L"beta\n"); - dmp.diff_charsToLines(diffs, tmpVector); - assertEquals("diff_charsToLines:", - TDiffVector({Diff(EQUAL, "alpha\nbeta\nalpha\n"), - Diff(INSERT, "beta\nalpha\nbeta\n")}), - diffs); - - // More than 256 to reveal any 8-bit limitations. - int n = 300; - tmpVector.clear(); - std::vector tmpVarList; - std::wstring lines; - std::wstring chars; - for (int x = 1; x < n + 1; x++) { - tmpVector.emplace_back(std::to_wstring(x) + L"\n"); - lines += std::to_wstring(x) + L"\n"; - chars += NUtils::to_wstring(x); - } - assertEquals("diff_linesToChars: More than 256 (setup).", n, - tmpVector.size()); - assertEquals("diff_linesToChars: More than 256 (setup).", n, chars.length()); - tmpVector.emplace(tmpVector.begin(), L""); - diffs = {Diff(DELETE, chars)}; - dmp.diff_charsToLines(diffs, tmpVector); - assertEquals("diff_charsToLines: More than 256.", - TDiffVector({Diff(DELETE, lines)}), diffs); -} - -TEST_F(diff_match_patch_test, testDiffCleanupMerge) { - // Cleanup a messy diff. - TDiffVector diffs; - dmp.diff_cleanupMerge(diffs); - assertEquals("diff_cleanupMerge: nullptr case.", TDiffVector(), diffs); - - diffs = {Diff(EQUAL, "a"), Diff(DELETE, "b"), Diff(INSERT, "c")}; - dmp.diff_cleanupMerge(diffs); - assertEquals( - "diff_cleanupMerge: No change case.", - TDiffVector({Diff(EQUAL, "a"), Diff(DELETE, "b"), Diff(INSERT, "c")}), - diffs); - - diffs = {Diff(EQUAL, "a"), Diff(EQUAL, "b"), Diff(EQUAL, "c")}; - dmp.diff_cleanupMerge(diffs); - assertEquals("diff_cleanupMerge: Merge equalities.", - TDiffVector({Diff(EQUAL, "abc")}), diffs); - - diffs = {Diff(DELETE, "a"), Diff(DELETE, "b"), Diff(DELETE, "c")}; - dmp.diff_cleanupMerge(diffs); - assertEquals("diff_cleanupMerge: Merge deletions.", - TDiffVector({Diff(DELETE, "abc")}), diffs); - - diffs = {Diff(INSERT, "a"), Diff(INSERT, "b"), Diff(INSERT, "c")}; - dmp.diff_cleanupMerge(diffs); - assertEquals("diff_cleanupMerge: Merge insertions.", - TDiffVector({Diff(INSERT, "abc")}), diffs); - - diffs = {Diff(DELETE, "a"), Diff(INSERT, "b"), Diff(DELETE, "c"), - Diff(INSERT, "d"), Diff(EQUAL, "e"), Diff(EQUAL, "f")}; - dmp.diff_cleanupMerge(diffs); - assertEquals( - "diff_cleanupMerge: Merge interweave.", - TDiffVector({Diff(DELETE, "ac"), Diff(INSERT, "bd"), Diff(EQUAL, "ef")}), - diffs); - - diffs = {Diff(DELETE, "a"), Diff(INSERT, "abc"), Diff(DELETE, "dc")}; - dmp.diff_cleanupMerge(diffs); - assertEquals("diff_cleanupMerge: Prefix and suffix detection.", - TDiffVector({Diff(EQUAL, "a"), Diff(DELETE, "d"), - Diff(INSERT, "b"), Diff(EQUAL, "c")}), - diffs); - - diffs = {Diff(EQUAL, "x"), Diff(DELETE, "a"), Diff(INSERT, "abc"), - Diff(DELETE, "dc"), Diff(EQUAL, "y")}; - dmp.diff_cleanupMerge(diffs); - assertEquals( - "diff_cleanupMerge: Prefix and suffix detection with equalities.", - TDiffVector({Diff(EQUAL, "xa"), Diff(DELETE, "d"), Diff(INSERT, "b"), - Diff(EQUAL, "cy")}), - diffs); - - diffs = {Diff(EQUAL, "a"), Diff(INSERT, "ba"), Diff(EQUAL, "c")}; - dmp.diff_cleanupMerge(diffs); - assertEquals("diff_cleanupMerge: Slide edit left.", - TDiffVector({Diff(INSERT, "ab"), Diff(EQUAL, "ac")}), diffs); - - diffs = {Diff(EQUAL, "c"), Diff(INSERT, "ab"), Diff(EQUAL, "a")}; - dmp.diff_cleanupMerge(diffs); - assertEquals("diff_cleanupMerge: Slide edit right.", - TDiffVector({Diff(EQUAL, "ca"), Diff(INSERT, "ba")}), diffs); - - diffs = {Diff(EQUAL, "a"), Diff(DELETE, "b"), Diff(EQUAL, "c"), - Diff(DELETE, "ac"), Diff(EQUAL, "x")}; - dmp.diff_cleanupMerge(diffs); - assertEquals("diff_cleanupMerge: Slide edit left recursive.", - TDiffVector({Diff(DELETE, "abc"), Diff(EQUAL, "acx")}), diffs); - - diffs = {Diff(EQUAL, "x"), Diff(DELETE, "ca"), Diff(EQUAL, "c"), - Diff(DELETE, "b"), Diff(EQUAL, "a")}; - dmp.diff_cleanupMerge(diffs); - assertEquals("diff_cleanupMerge: Slide edit right recursive.", - TDiffVector({Diff(EQUAL, "xca"), Diff(DELETE, "cba")}), diffs); -} + TEST_F( diff_match_patch_test, testDiffCommonPrefix ) + { + // Detect any common prefix. + assertEquals( "diff_commonPrefix: nullptr case.", 0, dmp.diff_commonPrefix( "abc", "xyz" ) ); -TEST_F(diff_match_patch_test, testDiffCleanupSemanticLossless) { - // Slide diffs to match logical boundaries. - auto diffs = TDiffVector(); - dmp.diff_cleanupSemanticLossless(diffs); - assertEquals("diff_cleanupSemantic: nullptr case.", TDiffVector(), diffs); - - diffs = {Diff(EQUAL, "AAA\r\n\r\nBBB"), Diff(INSERT, "\r\nDDD\r\n\r\nBBB"), - Diff(EQUAL, "\r\nEEE")}; - dmp.diff_cleanupSemanticLossless(diffs); - assertEquals("diff_cleanupSemanticLossless: Blank lines.", - TDiffVector({Diff(EQUAL, "AAA\r\n\r\n"), - Diff(INSERT, "BBB\r\nDDD\r\n\r\n"), - Diff(EQUAL, "BBB\r\nEEE")}), - diffs); - - diffs = {Diff(EQUAL, "AAA\r\nBBB"), Diff(INSERT, " DDD\r\nBBB"), - Diff(EQUAL, " EEE")}; - dmp.diff_cleanupSemanticLossless(diffs); - assertEquals("diff_cleanupSemanticLossless: Line boundaries.", - TDiffVector({Diff(EQUAL, "AAA\r\n"), Diff(INSERT, "BBB DDD\r\n"), - Diff(EQUAL, "BBB EEE")}), - diffs); - - diffs = {Diff(EQUAL, "The c"), Diff(INSERT, "ow and the c"), - Diff(EQUAL, "at.")}; - dmp.diff_cleanupSemanticLossless(diffs); - assertEquals("diff_cleanupSemantic: Word boundaries.", - TDiffVector({Diff(EQUAL, "The "), Diff(INSERT, "cow and the "), - Diff(EQUAL, "cat.")}), - diffs); - - diffs = {Diff(EQUAL, "The-c"), Diff(INSERT, "ow-and-the-c"), - Diff(EQUAL, "at.")}; - dmp.diff_cleanupSemanticLossless(diffs); - assertEquals("diff_cleanupSemantic: Alphanumeric boundaries.", - TDiffVector({Diff(EQUAL, "The-"), Diff(INSERT, "cow-and-the-"), - Diff(EQUAL, "cat.")}), - diffs); - - diffs = {Diff(EQUAL, "a"), Diff(DELETE, "a"), Diff(EQUAL, "ax")}; - dmp.diff_cleanupSemanticLossless(diffs); - assertEquals("diff_cleanupSemantic: Hitting the start.", - TDiffVector({Diff(DELETE, "a"), Diff(EQUAL, "aax")}), diffs); - - diffs = {Diff(EQUAL, "xa"), Diff(DELETE, "a"), Diff(EQUAL, "a")}; - dmp.diff_cleanupSemanticLossless(diffs); - assertEquals("diff_cleanupSemantic: Hitting the end.", - TDiffVector({Diff(EQUAL, "xaa"), Diff(DELETE, "a")}), diffs); - - diffs = {Diff(EQUAL, "The xxx. The "), Diff(INSERT, "zzz. The "), - Diff(EQUAL, "yyy.")}; - dmp.diff_cleanupSemanticLossless(diffs); - assertEquals("diff_cleanupSemantic: Sentence boundaries.", - TDiffVector({Diff(EQUAL, "The xxx."), Diff(INSERT, " The zzz."), - Diff(EQUAL, " The yyy.")}), - diffs); -} - -TEST_F(diff_match_patch_test, testDiffCleanupSemantic) { - // Cleanup semantically trivial equalities. - auto diffs = TDiffVector(); - dmp.diff_cleanupSemantic(diffs); - assertEquals("diff_cleanupSemantic: nullptr case.", TDiffVector(), diffs); - - diffs = {Diff(DELETE, "ab"), Diff(INSERT, "cd"), Diff(EQUAL, "12"), - Diff(DELETE, "e")}; - dmp.diff_cleanupSemantic(diffs); - assertEquals("diff_cleanupSemantic: No elimination #1.", - TDiffVector({Diff(DELETE, "ab"), Diff(INSERT, "cd"), - Diff(EQUAL, "12"), Diff(DELETE, "e")}), - diffs); - - diffs = {Diff(DELETE, "abc"), Diff(INSERT, "ABC"), Diff(EQUAL, "1234"), - Diff(DELETE, "wxyz")}; - dmp.diff_cleanupSemantic(diffs); - assertEquals("diff_cleanupSemantic: No elimination #2.", - TDiffVector({Diff(DELETE, "abc"), Diff(INSERT, "ABC"), - Diff(EQUAL, "1234"), Diff(DELETE, "wxyz")}), - diffs); - - diffs = {Diff(DELETE, "a"), Diff(EQUAL, "b"), Diff(DELETE, "c")}; - dmp.diff_cleanupSemantic(diffs); - assertEquals("diff_cleanupSemantic: Simple elimination.", - TDiffVector({Diff(DELETE, "abc"), Diff(INSERT, "b")}), diffs); - - diffs = {Diff(DELETE, "ab"), Diff(EQUAL, "cd"), Diff(DELETE, "e"), - Diff(EQUAL, "f"), Diff(INSERT, "g")}; - dmp.diff_cleanupSemantic(diffs); - assertEquals("diff_cleanupSemantic: Backpass elimination.", - TDiffVector({Diff(DELETE, "abcdef"), Diff(INSERT, "cdfg")}), - diffs); - - diffs = {Diff(INSERT, "1"), Diff(EQUAL, "A"), Diff(DELETE, "B"), - Diff(INSERT, "2"), Diff(EQUAL, "_"), Diff(INSERT, "1"), - Diff(EQUAL, "A"), Diff(DELETE, "B"), Diff(INSERT, "2")}; - dmp.diff_cleanupSemantic(diffs); - assertEquals("diff_cleanupSemantic: Multiple elimination.", - TDiffVector({Diff(DELETE, "AB_AB"), Diff(INSERT, "1A2_1A2")}), - diffs); - - diffs = {Diff(EQUAL, "The c"), Diff(DELETE, "ow and the c"), - Diff(EQUAL, "at.")}; - dmp.diff_cleanupSemantic(diffs); - assertEquals("diff_cleanupSemantic: Word boundaries.", - TDiffVector({Diff(EQUAL, "The "), Diff(DELETE, "cow and the "), - Diff(EQUAL, "cat.")}), - diffs); - - diffs = {Diff(DELETE, "abcxx"), Diff(INSERT, "xxdef")}; - dmp.diff_cleanupSemantic(diffs); - assertEquals("diff_cleanupSemantic: No overlap elimination.", - TDiffVector({Diff(DELETE, "abcxx"), Diff(INSERT, "xxdef")}), - diffs); - - diffs = {Diff(DELETE, "abcxxx"), Diff(INSERT, "xxxdef")}; - dmp.diff_cleanupSemantic(diffs); - assertEquals("diff_cleanupSemantic: Overlap elimination.", - TDiffVector({Diff(DELETE, "abc"), Diff(EQUAL, "xxx"), - Diff(INSERT, "def")}), - diffs); - - diffs = {Diff(DELETE, "xxxabc"), Diff(INSERT, "defxxx")}; - dmp.diff_cleanupSemantic(diffs); - assertEquals("diff_cleanupSemantic: Reverse overlap elimination.", - TDiffVector({Diff(INSERT, "def"), Diff(EQUAL, "xxx"), - Diff(DELETE, "abc")}), - diffs); - - diffs = {Diff(DELETE, "abcd1212"), Diff(INSERT, "1212efghi"), - Diff(EQUAL, "----"), Diff(DELETE, "A3"), Diff(INSERT, "3BC")}; - dmp.diff_cleanupSemantic(diffs); - assertEquals( - "diff_cleanupSemantic: Two overlap eliminations.", - TDiffVector({Diff(DELETE, "abcd"), Diff(EQUAL, "1212"), - Diff(INSERT, "efghi"), Diff(EQUAL, "----"), - Diff(DELETE, "A"), Diff(EQUAL, "3"), Diff(INSERT, "BC")}), - diffs); -} - -TEST_F(diff_match_patch_test, testDiffCleanupEfficiency) { - // Cleanup operationally trivial equalities. - dmp.Diff_EditCost = 4; - auto diffs = TDiffVector(); - dmp.diff_cleanupEfficiency(diffs); - assertEquals("diff_cleanupEfficiency: nullptr case.", TDiffVector(), diffs); - - diffs = {Diff(DELETE, "ab"), Diff(INSERT, "12"), Diff(EQUAL, "wxyz"), - Diff(DELETE, "cd"), Diff(INSERT, "34")}; - dmp.diff_cleanupEfficiency(diffs); - assertEquals( - "diff_cleanupEfficiency: No elimination.", - TDiffVector({Diff(DELETE, "ab"), Diff(INSERT, "12"), Diff(EQUAL, "wxyz"), - Diff(DELETE, "cd"), Diff(INSERT, "34")}), - diffs); - - diffs = {Diff(DELETE, "ab"), Diff(INSERT, "12"), Diff(EQUAL, "xyz"), - Diff(DELETE, "cd"), Diff(INSERT, "34")}; - dmp.diff_cleanupEfficiency(diffs); - assertEquals("diff_cleanupEfficiency: Four-edit elimination.", - TDiffVector({Diff(DELETE, "abxyzcd"), Diff(INSERT, "12xyz34")}), - diffs); - - diffs = {Diff(INSERT, "12"), Diff(EQUAL, "x"), Diff(DELETE, "cd"), - Diff(INSERT, "34")}; - dmp.diff_cleanupEfficiency(diffs); - assertEquals("diff_cleanupEfficiency: Three-edit elimination.", - TDiffVector({Diff(DELETE, "xcd"), Diff(INSERT, "12x34")}), - diffs); - - diffs = {Diff(DELETE, "ab"), Diff(INSERT, "12"), Diff(EQUAL, "xy"), - Diff(INSERT, "34"), Diff(EQUAL, "z"), Diff(DELETE, "cd"), - Diff(INSERT, "56")}; - dmp.diff_cleanupEfficiency(diffs); - assertEquals( - "diff_cleanupEfficiency: Backpass elimination.", - TDiffVector({Diff(DELETE, "abxyzcd"), Diff(INSERT, "12xy34z56")}), diffs); - - dmp.Diff_EditCost = 5; - diffs = {Diff(DELETE, "ab"), Diff(INSERT, "12"), Diff(EQUAL, "wxyz"), - Diff(DELETE, "cd"), Diff(INSERT, "34")}; - dmp.diff_cleanupEfficiency(diffs); - assertEquals( - "diff_cleanupEfficiency: High cost elimination.", - TDiffVector({Diff(DELETE, "abwxyzcd"), Diff(INSERT, "12wxyz34")}), diffs); - dmp.Diff_EditCost = 4; -} - -TEST_F(diff_match_patch_test, testDiffPrettyHtml) { - // Pretty print. - auto diffs = TDiffVector( - {Diff(EQUAL, "a\n"), Diff(DELETE, "b"), Diff(INSERT, "c&d")}); - assertEquals("diff_prettyHtml:", - L"
<B>b</B>c&d", - dmp.diff_prettyHtml(diffs)); -} - -TEST_F(diff_match_patch_test, testDiffPrettyConsole) { - // Pretty print. - static std::wstring kRed{L"\033[0;31m"}; - static std::wstring kGreen{L"\033[0;32m"}; - static std::wstring kYellow{L"\033[0;33m"}; - static std::wstring kReset{L"\033[m"}; - static std::wstring kEOL{NUtils::fromPercentEncoding(L"%C2%B6") + L"\n"}; - - auto diffs = TDiffVector( - {Diff(EQUAL, "a\n"), Diff(DELETE, "b"), Diff(INSERT, "c&d")}); - auto results = dmp.diff_prettyConsole(diffs); - assertEquals( - "diff_prettyConsole:", - L"a" + kEOL + kRed + L"b" + kReset + kGreen + L"c&d" + kReset, - results); -} - -TEST_F(diff_match_patch_test, testDiffText) { - // Compute the source and destination texts. - auto diffs = {Diff(EQUAL, "jump"), Diff(DELETE, "s"), Diff(INSERT, "ed"), - Diff(EQUAL, " over "), Diff(DELETE, "the"), Diff(INSERT, "a"), - Diff(EQUAL, " lazy")}; - assertEquals("diff_text1:", L"jumps over the lazy", dmp.diff_text1(diffs)); - assertEquals("diff_text2:", L"jumped over a lazy", dmp.diff_text2(diffs)); -} - -TEST_F(diff_match_patch_test, testDiffDelta) { - // Convert a diff into delta string. - auto diffs = TDiffVector({Diff(EQUAL, "jump"), Diff(DELETE, "s"), - Diff(INSERT, "ed"), Diff(EQUAL, " over "), - Diff(DELETE, "the"), Diff(INSERT, "a"), - Diff(EQUAL, " lazy"), Diff(INSERT, "old dog")}); - std::wstring text1 = dmp.diff_text1(diffs); - assertEquals("diff_text1: Base text.", L"jumps over the lazy", text1); - - std::wstring delta = dmp.diff_toDelta(diffs); - std::wstring golden = L"=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog"; - assertEquals("diff_toDelta:", L"=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog", - delta); - - // Convert delta string into a diff. - assertEquals("diff_fromDelta: Normal.", diffs, - dmp.diff_fromDelta(text1, delta)); - - // Generates error (19 < 20). - assertThrow("diff_fromDelta: Too long.", - dmp.diff_fromDelta(text1 + L"x", delta), std::wstring); - - // Generates error (19 > 18). - assertThrow("diff_fromDelta: Too short.", - dmp.diff_fromDelta(text1.substr(1), delta), std::wstring); - - // Generates error (%c3%xy invalid Unicode). - assertThrow("diff_fromDelta: Invalid character.", - dmp.diff_fromDelta("", "+%c3%xy"), std::wstring); - - // Test deltas with special characters. - diffs = { - Diff(EQUAL, std::wstring(L"\u0680 ") + kZero + std::wstring(L" \t %")), - Diff(DELETE, std::wstring(L"\u0681 ") + kOne + std::wstring(L" \n ^")), - Diff(INSERT, std::wstring(L"\u0682 ") + kTwo + std::wstring(L" \\ |"))}; - - text1 = dmp.diff_text1(diffs); - golden = std::wstring(L"\u0680 ") + kZero + std::wstring(L" \t %\u0681 ") + - kOne + std::wstring(L" \n ^"); - assertEquals("diff_text1: Unicode text", golden, text1); - - delta = dmp.diff_toDelta(diffs); - assertEquals("diff_toDelta: Unicode", L"=7\t-7\t+%DA%82 %02 %5C %7C", delta); - - assertEquals("diff_fromDelta: Unicode", diffs, - dmp.diff_fromDelta(text1, delta)); - - // Verify pool of unchanged characters. - diffs = { - Diff(INSERT, "A-Z a-z 0-9 - _ . ! ~ * ' ( ) ; / ? : @ & = + $ , # ")}; - std::wstring text2 = dmp.diff_text2(diffs); - assertEquals("diff_text2: Unchanged characters.", - L"A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", text2); - - delta = dmp.diff_toDelta(diffs); - assertEquals("diff_toDelta: Unchanged characters.", - L"+A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", - delta); - - // Convert delta string into a diff. - assertEquals("diff_fromDelta: Unchanged characters.", diffs, - dmp.diff_fromDelta({}, delta)); -} - -TEST_F(diff_match_patch_test, testDiffXIndex) { - // Translate a location in text1 to text2. - auto diffs = TDiffVector( - {Diff(DELETE, "a"), Diff(INSERT, "1234"), Diff(EQUAL, "xyz")}); - assertEquals("diff_xIndex: Translation on equality.", 5, - dmp.diff_xIndex(diffs, 2)); - - diffs = {Diff(EQUAL, "a"), Diff(DELETE, "1234"), Diff(EQUAL, "xyz")}; - assertEquals("diff_xIndex: Translation on deletion.", 1, - dmp.diff_xIndex(diffs, 3)); -} - -TEST_F(diff_match_patch_test, testDiffLevenshtein) { - auto diffs = TDiffVector( - {Diff(DELETE, "abc"), Diff(INSERT, "1234"), Diff(EQUAL, "xyz")}); - assertEquals("diff_levenshtein: Trailing equality.", 4, - dmp.diff_levenshtein(diffs)); - - diffs = {Diff(EQUAL, "xyz"), Diff(DELETE, "abc"), Diff(INSERT, "1234")}; - assertEquals("diff_levenshtein: Leading equality.", 4, - dmp.diff_levenshtein(diffs)); - - diffs = {Diff(DELETE, "abc"), Diff(EQUAL, "xyz"), Diff(INSERT, "1234")}; - assertEquals("diff_levenshtein: Middle equality.", 7, - dmp.diff_levenshtein(diffs)); -} - -TEST_F(diff_match_patch_test, testDiffBisect) { - // Normal. - std::wstring a = L"cat"; - std::wstring b = L"map"; - // Since the resulting diff hasn't been normalized, it would be ok if - // the insertion and deletion pairs are swapped. - // If the order changes, tweak this test as required. - auto diffs = - TDiffVector({Diff(DELETE, "c"), Diff(INSERT, "m"), Diff(EQUAL, "a"), - Diff(DELETE, "t"), Diff(INSERT, "p")}); - auto results = dmp.diff_bisect(a, b, std::numeric_limits::max()); - assertEquals("diff_bisect: Normal.", diffs, results); - - // Timeout. - diffs = {Diff(DELETE, "cat"), Diff(INSERT, "map")}; - results = dmp.diff_bisect(a, b, 0); - assertEquals("diff_bisect: Timeout.", diffs, results); -} - -TEST_F(diff_match_patch_test, testDiffMain) { - // Perform a trivial diff. - auto diffs = TDiffVector(); - assertEquals("diff_main: nullptr case.", diffs, dmp.diff_main("", "", false)); - - diffs = {Diff(DELETE, "abc")}; - assertEquals("diff_main: RHS side nullptr case.", diffs, - dmp.diff_main("abc", "", false)); - - diffs = {Diff(INSERT, "abc")}; - assertEquals("diff_main: LHS side nullptr case.", diffs, - dmp.diff_main("", "abc", false)); - - diffs = {Diff(EQUAL, "abc")}; - assertEquals("diff_main: Equality.", diffs, - dmp.diff_main("abc", "abc", false)); - - diffs = {Diff(EQUAL, "ab"), Diff(INSERT, "123"), Diff(EQUAL, "c")}; - assertEquals("diff_main: Simple insertion.", diffs, - dmp.diff_main("abc", "ab123c", false)); - - diffs = {Diff(EQUAL, "a"), Diff(DELETE, "123"), Diff(EQUAL, "bc")}; - assertEquals("diff_main: Simple deletion.", diffs, - dmp.diff_main("a123bc", "abc", false)); - - diffs = {Diff(EQUAL, "a"), Diff(INSERT, "123"), Diff(EQUAL, "b"), - Diff(INSERT, "456"), Diff(EQUAL, "c")}; - assertEquals("diff_main: Two insertions.", diffs, - dmp.diff_main("abc", "a123b456c", false)); - - diffs = {Diff(EQUAL, "a"), Diff(DELETE, "123"), Diff(EQUAL, "b"), - Diff(DELETE, "456"), Diff(EQUAL, "c")}; - assertEquals("diff_main: Two deletions.", diffs, - dmp.diff_main("a123b456c", "abc", false)); - - // Perform a real diff. - // Switch off the timeout. - dmp.Diff_Timeout = 0; - diffs = {Diff(DELETE, "a"), Diff(INSERT, "b")}; - assertEquals("diff_main: Simple case #1.", diffs, - dmp.diff_main("a", "b", false)); - - diffs = {Diff(DELETE, "Apple"), Diff(INSERT, "Banana"), - Diff(EQUAL, "s are a"), Diff(INSERT, "lso"), Diff(EQUAL, " fruit.")}; - assertEquals( - "diff_main: Simple case #2.", diffs, - dmp.diff_main("Apples are a fruit.", "Bananas are also fruit.", false)); - - diffs = {Diff(DELETE, "a"), Diff(INSERT, L"\u0680"), Diff(EQUAL, "x"), - Diff(DELETE, "\t"), Diff(INSERT, NUtils::to_wstring(kZero))}; - assertEquals("diff_main: Simple case #3.", diffs, - dmp.diff_main(L"ax\t", std::wstring(L"\u0680x") + kZero, false)); - - diffs = {Diff(DELETE, "1"), Diff(EQUAL, "a"), Diff(DELETE, "y"), - Diff(EQUAL, "b"), Diff(DELETE, "2"), Diff(INSERT, "xab")}; - assertEquals("diff_main: Overlap #1.", diffs, - dmp.diff_main("1ayb2", "abxab", false)); - - diffs = {Diff(INSERT, "xaxcx"), Diff(EQUAL, "abc"), Diff(DELETE, "y")}; - assertEquals("diff_main: Overlap #2.", diffs, - dmp.diff_main("abcy", "xaxcxabc", false)); - - diffs = {Diff(DELETE, "ABCD"), - Diff(EQUAL, "a"), - Diff(DELETE, "="), - Diff(INSERT, "-"), - Diff(EQUAL, "bcd"), - Diff(DELETE, "="), - Diff(INSERT, "-"), - Diff(EQUAL, "efghijklmnopqrs"), - Diff(DELETE, "EFGHIJKLMNOefg")}; - assertEquals("diff_main: Overlap #3.", diffs, - dmp.diff_main("ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg", - "a-bcd-efghijklmnopqrs", false)); - - diffs = {Diff(INSERT, " "), Diff(EQUAL, "a"), Diff(INSERT, "nd"), - Diff(EQUAL, " [[Pennsylvania]]"), Diff(DELETE, " and [[New")}; - assertEquals("diff_main: Large equality.", diffs, - dmp.diff_main("a [[Pennsylvania]] and [[New", - " and [[Pennsylvania]]", false)); - - dmp.Diff_Timeout = 0.1f; // 100ms - // This test may 'fail' on extremely fast computers. If so, just increase the - // text lengths. - std::wstring a = - L"`Twas brillig, and the slithy toves\nDid gyre and gimble in the " - L"wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n"; - std::wstring b = - L"I am the very model of a modern major general,\nI've information " - L"vegetable, animal, and mineral,\nI know the kings of England, and I " - L"quote the fights historical,\nFrom Marathon to Waterloo, in order " - L"categorical.\n"; - // Increase the text lengths by 1024 times to ensure a timeout. - for (int x = 0; x < 10; x++) { - a = a + a; - b = b + b; - } - clock_t startTime = clock(); - dmp.diff_main(a, b); - clock_t endTime = clock(); - // Test that we took at least the timeout period. - assertTrue("diff_main: Timeout min.", - (dmp.Diff_Timeout * CLOCKS_PER_SEC) <= (endTime - startTime)); - // Test that we didn't take forever (be forgiving). - // Theoretically this test could fail very occasionally if the - // OS task swaps or locks up for a second at the wrong moment. - // Java seems to overrun by ~80% (compared with 10% for other languages). - // Therefore use an upper limit of 0.5s instead of 0.2s. - assertTrue("diff_main: Timeout max.", - (dmp.Diff_Timeout * CLOCKS_PER_SEC * 2) > (endTime - startTime)); - dmp.Diff_Timeout = 0; - - // Test the linemode speedup. - // Must be long to pass the 100 char cutoff. - a = L"1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890" - L"\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n123456789" - L"0\n1234567890\n"; - b = L"abcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij" - L"\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghi" - L"j\nabcdefghij\n"; - assertEquals("diff_main: Simple line-mode.", dmp.diff_main(a, b, true), - dmp.diff_main(a, b, false)); - - a = L"12345678901234567890123456789012345678901234567890123456789012345678901" - L"23456789012345678901234567890123456789012345678901234567890"; - b = L"abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghija" - L"bcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij"; - assertEquals("diff_main: Single line-mode.", dmp.diff_main(a, b, true), - dmp.diff_main(a, b, false)); - - a = L"1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890" - L"\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n123456789" - L"0\n1234567890\n"; - b = L"abcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890" - L"\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n123456789" - L"0\nabcdefghij\n"; - TStringVector texts_linemode = diff_rebuildtexts(dmp.diff_main(a, b, true)); - TStringVector texts_textmode = diff_rebuildtexts(dmp.diff_main(a, b, false)); - assertEquals("diff_main: Overlap line-mode.", texts_textmode, texts_linemode); -} + assertEquals( "diff_commonPrefix: Non-nullptr case.", 4, dmp.diff_commonPrefix( "1234abcdef", "1234xyz" ) ); -// MATCH TEST FUNCTIONS - -TEST_F(diff_match_patch_test, testMatchAlphabet) { - // Initialise the bitmasks for Bitap. - TCharPosMap bitmask; - bitmask['a'] = 4; - bitmask['b'] = 2; - bitmask['c'] = 1; - assertEquals("match_alphabet: Unique.", bitmask, dmp.match_alphabet("abc")); - - bitmask = TCharPosMap(); - bitmask['a'] = 37; - bitmask['b'] = 18; - bitmask['c'] = 8; - assertEquals("match_alphabet: Duplicates.", bitmask, - dmp.match_alphabet("abcaba")); -} - -TEST_F(diff_match_patch_test, testMatchBitap) { - // Bitap algorithm. - dmp.Match_Distance = 100; - dmp.Match_Threshold = 0.5f; - assertEquals("match_bitap: Exact match #1.", 5, - dmp.match_bitap("abcdefghijk", "fgh", 5)); - - assertEquals("match_bitap: Exact match #2.", 5, - dmp.match_bitap("abcdefghijk", "fgh", 0)); - - assertEquals("match_bitap: Fuzzy match #1.", 4, - dmp.match_bitap("abcdefghijk", "efxhi", 0)); - - assertEquals("match_bitap: Fuzzy match #2.", 2, - dmp.match_bitap("abcdefghijk", "cdefxyhijk", 5)); - - assertEquals("match_bitap: Fuzzy match #3.", -1, - dmp.match_bitap("abcdefghijk", "bxy", 1)); - - assertEquals("match_bitap: Overflow.", 2, - dmp.match_bitap("123456789xx0", "3456789x0", 2)); - - assertEquals("match_bitap: Before start match.", 0, - dmp.match_bitap("abcdef", "xxabc", 4)); - - assertEquals("match_bitap: Beyond end match.", 3, - dmp.match_bitap("abcdef", "defyy", 4)); - - assertEquals("match_bitap: Oversized pattern.", 0, - dmp.match_bitap("abcdef", "xabcdefy", 0)); - - dmp.Match_Threshold = 0.4f; - assertEquals("match_bitap: Threshold #1.", 4, - dmp.match_bitap("abcdefghijk", "efxyhi", 1)); - - dmp.Match_Threshold = 0.3f; - assertEquals("match_bitap: Threshold #2.", -1, - dmp.match_bitap("abcdefghijk", "efxyhi", 1)); - - dmp.Match_Threshold = 0.0f; - assertEquals("match_bitap: Threshold #3.", 1, - dmp.match_bitap("abcdefghijk", "bcdef", 1)); - - dmp.Match_Threshold = 0.5f; - assertEquals("match_bitap: Multiple select #1.", 0, - dmp.match_bitap("abcdexyzabcde", "abccde", 3)); - - assertEquals("match_bitap: Multiple select #2.", 8, - dmp.match_bitap("abcdexyzabcde", "abccde", 5)); - - dmp.Match_Distance = 10; // Strict location. - assertEquals("match_bitap: Distance test #1.", -1, - dmp.match_bitap("abcdefghijklmnopqrstuvwxyz", "abcdefg", 24)); - - assertEquals("match_bitap: Distance test #2.", 0, - dmp.match_bitap("abcdefghijklmnopqrstuvwxyz", "abcdxxefg", 1)); - - dmp.Match_Distance = 1000; // Loose location. - assertEquals("match_bitap: Distance test #3.", 0, - dmp.match_bitap("abcdefghijklmnopqrstuvwxyz", "abcdefg", 24)); -} - -TEST_F(diff_match_patch_test, testMatchMain) { - // Full match. - assertEquals("match_main: Equality.", 0, - dmp.match_main("abcdef", "abcdef", 1000)); - - assertEquals("match_main: nullptr text.", -1, - dmp.match_main("", "abcdef", 1)); - - assertEquals("match_main: nullptr pattern.", 3, - dmp.match_main("abcdef", "", 3)); - - assertEquals("match_main: Exact match.", 3, - dmp.match_main("abcdef", "de", 3)); - - dmp.Match_Threshold = 0.7f; - assertEquals("match_main: Complex match.", 4, - dmp.match_main("I am the very model of a modern major general.", - " that berry ", 5)); - dmp.Match_Threshold = 0.5f; -} - -// PATCH TEST FUNCTIONS - -TEST_F(diff_match_patch_test, testPatchObj) { - // Patch Object. - Patch p; - p.start1 = 20; - p.start2 = 21; - p.length1 = 18; - p.length2 = 17; - p.diffs = {Diff(EQUAL, "jump"), Diff(DELETE, "s"), Diff(INSERT, "ed"), - Diff(EQUAL, " over "), Diff(DELETE, "the"), Diff(INSERT, "a"), - Diff(EQUAL, "\nlaz")}; - std::wstring strp = - L"@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n"; - assertEquals("patch: toString.", strp, p.toString()); -} - -TEST_F(diff_match_patch_test, testPatchFromText) { - assertTrue("patch_fromText: #0.", dmp.patch_fromText("").empty()); - - std::wstring strp = - L"@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n"; - assertEquals("patch_fromText: #1.", strp, - dmp.patch_fromText(strp)[0].toString()); - - assertEquals("patch_fromText: #2.", L"@@ -1 +1 @@\n-a\n+b\n", - dmp.patch_fromText("@@ -1 +1 @@\n-a\n+b\n")[0].toString()); - - assertEquals("patch_fromText: #3.", L"@@ -1,3 +0,0 @@\n-abc\n", - dmp.patch_fromText("@@ -1,3 +0,0 @@\n-abc\n")[0].toString()); - - assertEquals("patch_fromText: #4.", L"@@ -0,0 +1,3 @@\n+abc\n", - dmp.patch_fromText("@@ -0,0 +1,3 @@\n+abc\n")[0].toString()); - - // Generates error. - assertThrow("patch_fromText: #5.", dmp.patch_fromText("Bad\nPatch\n"), - std::wstring); -} - -TEST_F(diff_match_patch_test, testPatchToText) { - std::wstring strp = - L"@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n"; - auto patches = dmp.patch_fromText(strp); - assertEquals("patch_toText: Single", strp, dmp.patch_toText(patches)); - - strp = - L"@@ -1,9 +1,9 @@\n-f\n+F\n oo+fooba\n@@ -7,9 +7,9 @@\n obar\n-,\n+.\n " - L"tes\n"; - patches = dmp.patch_fromText(strp); - assertEquals("patch_toText: Dua", strp, dmp.patch_toText(patches)); -} - -TEST_F(diff_match_patch_test, testPatchAddContext) { - dmp.Patch_Margin = 4; - auto p = dmp.patch_fromText("@@ -21,4 +21,10 @@\n-jump\n+somersault\n")[0]; - dmp.patch_addContext(p, "The quick brown fox jumps over the lazy dog."); - assertEquals("patch_addContext: Simple case.", - L"@@ -17,12 +17,18 @@\n fox \n-jump\n+somersault\n s ov\n", - p.toString()); - - p = dmp.patch_fromText("@@ -21,4 +21,10 @@\n-jump\n+somersault\n")[0]; - dmp.patch_addContext(p, "The quick brown fox jumps."); - assertEquals("patch_addContext: Not enough trailing context.", - L"@@ -17,10 +17,16 @@\n fox \n-jump\n+somersault\n s.\n", - p.toString()); - - p = dmp.patch_fromText("@@ -3 +3,2 @@\n-e\n+at\n")[0]; - dmp.patch_addContext(p, "The quick brown fox jumps."); - assertEquals("patch_addContext: Not enough leading context.", - L"@@ -1,7 +1,8 @@\n Th\n-e\n+at\n qui\n", p.toString()); - - p = dmp.patch_fromText("@@ -3 +3,2 @@\n-e\n+at\n")[0]; - dmp.patch_addContext( - p, "The quick brown fox jumps. The quick brown fox crashes."); - assertEquals("patch_addContext: Ambiguity.", - L"@@ -1,27 +1,28 @@\n Th\n-e\n+at\n quick brown fox jumps. \n", - p.toString()); -} - -TEST_F(diff_match_patch_test, testPatchMake) { - TPatchVector patches; - patches = dmp.patch_make("", ""); - assertEquals("patch_make: nullptr case", L"", dmp.patch_toText(patches)); - - std::wstring text1 = L"The quick brown fox jumps over the lazy dog."; - std::wstring text2 = L"That quick brown fox jumped over a lazy dog."; - std::wstring expectedPatch = - L"@@ -1,8 +1,7 @@\n Th\n-at\n+e\n qui\n@@ -21,17 +21,18 @@\n " - L"jump\n-ed\n+s\n over \n-a\n+the\n laz\n"; - // The second patch must be "-21,17 +21,18", not "-22,17 +21,18" due to - // rolling context. - patches = dmp.patch_make(text2, text1); - assertEquals("patch_make: Text2+Text1 inputs", expectedPatch, - dmp.patch_toText(patches)); - - expectedPatch = - L"@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n " - L"jump\n-s\n+ed\n over \n-the\n+a\n laz\n"; - patches = dmp.patch_make(text1, text2); - assertEquals("patch_make: Text1+Text2 inputs", expectedPatch, - dmp.patch_toText(patches)); - - auto diffs = dmp.diff_main(text1, text2, false); - patches = dmp.patch_make(diffs); - assertEquals("patch_make: Diff input", expectedPatch, - dmp.patch_toText(patches)); - - patches = dmp.patch_make(text1, diffs); - assertEquals("patch_make: Text1+Diff inputs", expectedPatch, - dmp.patch_toText(patches)); - - patches = dmp.patch_make(text1, text2, diffs); - assertEquals("patch_make: Text1+Text2+Diff inputs (deprecated)", - expectedPatch, dmp.patch_toText(patches)); - - patches = dmp.patch_make("`1234567890-=[]\\;',./", "~!@#$%^&*()_+{}|:\"<>?"); - assertEquals("patch_toText: Character encoding.", - L"@@ -1,21 +1,21 " - L"@@\n-%601234567890-=%5B%5D%5C;',./" - L"\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n", - dmp.patch_toText(patches)); - - diffs = {Diff(DELETE, "`1234567890-=[]\\;',./"), - Diff(INSERT, "~!@#$%^&*()_+{}|:\"<>?")}; - assertEquals( - "patch_fromText: Character decoding.", diffs, - dmp.patch_fromText("@@ -1,21 +1,21 " - "@@\n-%601234567890-=%5B%5D%5C;',./" - "\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n")[0] - .diffs); - - text1 = {}; - for (int x = 0; x < 100; x++) { - text1 += L"abcdef"; - } - text2 = text1 + L"123"; - expectedPatch = - L"@@ -573,28 +573,31 @@\n cdefabcdefabcdefabcdefabcdef\n+123\n"; - patches = dmp.patch_make(text1, text2); - assertEquals("patch_make: Long string with repeats.", expectedPatch, - dmp.patch_toText(patches)); -} - -TEST_F(diff_match_patch_test, testPatchSplitMax) { - // Confirm Match_MaxBits is 32. - TPatchVector patches; - patches = dmp.patch_make( - "abcdefghijklmnopqrstuvwxyz01234567890", - "XabXcdXefXghXijXklXmnXopXqrXstXuvXwxXyzX01X23X45X67X89X0"); - dmp.patch_splitMax(patches); - assertEquals("patch_splitMax: #1.", - L"@@ -1,32 +1,46 @@\n+X\n ab\n+X\n cd\n+X\n ef\n+X\n gh\n+X\n " - L"ij\n+X\n kl\n+X\n mn\n+X\n op\n+X\n qr\n+X\n st\n+X\n " - L"uv\n+X\n wx\n+X\n yz\n+X\n 012345\n@@ -25,13 +39,18 @@\n " - L"zX01\n+X\n 23\n+X\n 45\n+X\n 67\n+X\n 89\n+X\n 0\n", - dmp.patch_toText(patches)); - - patches = dmp.patch_make( - "abcdef123456789012345678901234567890123456789012345678901234567890123456" - "7890uvwxyz", - "abcdefuvwxyz"); - std::wstring oldToText = dmp.patch_toText(patches); - dmp.patch_splitMax(patches); - assertEquals("patch_splitMax: #2.", oldToText, dmp.patch_toText(patches)); - - patches = dmp.patch_make( - "1234567890123456789012345678901234567890123456789012345678901234567890", - "abc"); - dmp.patch_splitMax(patches); - assertEquals("patch_splitMax: #3.", - L"@@ -1,32 +1,4 @@\n-1234567890123456789012345678\n 9012\n@@ " - L"-29,32 +1,4 @@\n-9012345678901234567890123456\n 7890\n@@ " - L"-57,14 +1,3 @@\n-78901234567890\n+abc\n", - dmp.patch_toText(patches)); - - patches = dmp.patch_make( - "abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1 abcdefghij , h : " - "0 , t : 1", - "abcdefghij , h : 1 , t : 1 abcdefghij , h : 1 , t : 1 abcdefghij , h : " - "0 , t : 1"); - dmp.patch_splitMax(patches); - assertEquals( - "patch_splitMax: #4.", - L"@@ -2,32 +2,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n@@ " - L"-29,32 +29,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n", - dmp.patch_toText(patches)); -} - -TEST_F(diff_match_patch_test, testPatchAddPadding) { - TPatchVector patches; - patches = dmp.patch_make("", "test"); - assertEquals("patch_addPadding: Both edges ful", L"@@ -0,0 +1,4 @@\n+test\n", - dmp.patch_toText(patches)); - dmp.patch_addPadding(patches); - assertEquals("patch_addPadding: Both edges full.", - L"@@ -1,8 +1,12 @@\n %01%02%03%04\n+test\n %01%02%03%04\n", - dmp.patch_toText(patches)); - - patches = dmp.patch_make("XY", "XtestY"); - assertEquals("patch_addPadding: Both edges partial.", - L"@@ -1,2 +1,6 @@\n X\n+test\n Y\n", dmp.patch_toText(patches)); - dmp.patch_addPadding(patches); - assertEquals("patch_addPadding: Both edges partial.", - L"@@ -2,8 +2,12 @@\n %02%03%04X\n+test\n Y%01%02%03\n", - dmp.patch_toText(patches)); - - patches = dmp.patch_make("XXXXYYYY", "XXXXtestYYYY"); - assertEquals("patch_addPadding: Both edges none.", - L"@@ -1,8 +1,12 @@\n XXXX\n+test\n YYYY\n", - dmp.patch_toText(patches)); - dmp.patch_addPadding(patches); - assertEquals("patch_addPadding: Both edges none.", - L"@@ -5,8 +5,12 @@\n XXXX\n+test\n YYYY\n", - dmp.patch_toText(patches)); -} - -TEST_F(diff_match_patch_test, testPatchApply) { - dmp.Match_Distance = 1000; - dmp.Match_Threshold = 0.5f; - dmp.Patch_DeleteThreshold = 0.5f; - TPatchVector patches; - patches = dmp.patch_make("", ""); - auto results = dmp.patch_apply(patches, "Hello world."); - auto &&boolArray = results.second; - - std::wstring resultStr = - results.first + L"\t" + std::to_wstring(boolArray.size()); - assertEquals("patch_apply: nullptr case.", L"Hello world.\t0", resultStr); - - patches = dmp.patch_make("The quick brown fox jumps over the lazy dog.", - "That quick brown fox jumped over a lazy dog."); - assertEquals("patch_apply: Exact match.", - L"@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 " - L"@@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n", - dmp.patch_toText(patches)); - - results = - dmp.patch_apply(patches, "The quick brown fox jumps over the lazy dog."); - boolArray = results.second; - resultStr = results.first + NUtils::to_wstring(boolArray); - - assertEquals("patch_apply: Exact match.", - L"That quick brown fox jumped over a lazy dog.\ttrue\ttrue", - resultStr); - - results = dmp.patch_apply(patches, - "The quick red rabbit jumps over the tired tiger."); - boolArray = results.second; - resultStr = results.first + NUtils::to_wstring(boolArray); - assertEquals("patch_apply: Partial match.", - L"That quick red rabbit jumped over a tired tiger.\ttrue\ttrue", - resultStr); - - results = dmp.patch_apply(patches, - "I am the very model of a modern major general."); - boolArray = results.second; - resultStr = results.first + NUtils::to_wstring(boolArray); - assertEquals("patch_apply: Failed match.", - L"I am the very model of a modern major general.\tfalse\tfalse", - resultStr); - - patches = dmp.patch_make( - "x1234567890123456789012345678901234567890123456789012345678901234567890" - "y", - "xabcy"); - results = dmp.patch_apply(patches, - "x123456789012345678901234567890-----++++++++++----" - "-123456789012345678901234567890y"); - boolArray = results.second; - resultStr = results.first + NUtils::to_wstring(boolArray); - assertEquals("patch_apply: Big delete, small change.", L"xabcy\ttrue\ttrue", - resultStr); - - patches = dmp.patch_make( - "x1234567890123456789012345678901234567890123456789012345678901234567890" - "y", - "xabcy"); - results = dmp.patch_apply(patches, - "x12345678901234567890---------------++++++++++----" - "-----------12345678901234567890y"); - boolArray = results.second; - resultStr = results.first + NUtils::to_wstring(boolArray); - assertEquals("patch_apply: Big delete, large change 1.", - L"xabc12345678901234567890---------------++++++++++-------------" - L"--12345678901234567890y\tfalse\ttrue", - resultStr); - - dmp.Patch_DeleteThreshold = 0.6f; - patches = dmp.patch_make( - "x1234567890123456789012345678901234567890123456789012345678901234567890" - "y", - "xabcy"); - results = dmp.patch_apply(patches, - "x12345678901234567890---------------++++++++++----" - "-----------12345678901234567890y"); - boolArray = results.second; - resultStr = results.first + NUtils::to_wstring(boolArray); - assertEquals("patch_apply: Big delete, large change 2.", L"xabcy\ttrue\ttrue", - resultStr); - dmp.Patch_DeleteThreshold = 0.5f; - - dmp.Match_Threshold = 0.0f; - dmp.Match_Distance = 0; - patches = - dmp.patch_make("abcdefghijklmnopqrstuvwxyz--------------------1234567890", - "abcXXXXXXXXXXdefghijklmnopqrstuvwxyz--------------------" - "1234567YYYYYYYYYY890"); - results = dmp.patch_apply( - patches, "ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567890"); - boolArray = results.second; - resultStr = results.first + NUtils::to_wstring(boolArray); - assertEquals("patch_apply: Compensate for failed patch.", - L"ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------" - L"1234567YYYYYYYYYY890\tfalse\ttrue", - resultStr); - dmp.Match_Threshold = 0.5f; - dmp.Match_Distance = 1000; - - patches = dmp.patch_make("", "test"); - std::wstring patchStr = dmp.patch_toText(patches); - dmp.patch_apply(patches, ""); - assertEquals("patch_apply: No side effects.", patchStr, - dmp.patch_toText(patches)); - - patches = - dmp.patch_make("The quick brown fox jumps over the lazy dog.", "Woof"); - patchStr = dmp.patch_toText(patches); - dmp.patch_apply(patches, "The quick brown fox jumps over the lazy dog."); - assertEquals("patch_apply: No side effects with major delete.", patchStr, - dmp.patch_toText(patches)); - - patches = dmp.patch_make("", "test"); - results = dmp.patch_apply(patches, ""); - boolArray = results.second; - resultStr = results.first + L"\t" + NUtils::to_wstring(boolArray[0], false); - assertEquals("patch_apply: Edge exact match.", L"test\ttrue", resultStr); - - patches = dmp.patch_make("XY", "XtestY"); - results = dmp.patch_apply(patches, "XY"); - boolArray = results.second; - resultStr = results.first + L"\t" + NUtils::to_wstring(boolArray[0], false); - assertEquals("patch_apply: Near edge exact match.", L"XtestY\ttrue", - resultStr); - - patches = dmp.patch_make("y", "y123"); - results = dmp.patch_apply(patches, "x"); - boolArray = results.second; - resultStr = results.first + L"\t" + NUtils::to_wstring(boolArray[0]); - assertEquals("patch_apply: Edge partial match.", L"x123\ttrue", resultStr); -} - -TEST_F(diff_match_patch_test, fromGitHubExamples) { - auto lhs = - L"I am the very model of a modern Major-General, I've information " - L"vegetable, animal, and mineral, I know the kings of England, and I " - L"quote the fights historical, From Marathon to Waterloo, in order " - L"categorical."; - auto rhs = - L"I am the very model of a cartoon individual, My animation's comical, " - L"unusual, and whimsical, I'm quite adept at funny gags, comedic theory " - L"I have read, From wicked puns and stupid jokes to anvils that drop on " - L"your head."; - auto diffs = dmp.diff_main(lhs, rhs); - dmp.diff_cleanupSemantic(diffs); - auto console = dmp.diff_prettyConsole(diffs); - auto html = dmp.diff_prettyHtml(diffs); - auto delta = dmp.diff_toDelta(diffs); - - auto consoleGolden = - L"I am the very model of a \x1B[0;31mmodern Major-General, I've " - L"information vegetable, animal, and mineral, I know the kings of " - L"England, and I quote the fights historical, From Marathon to Waterloo, " - L"in order categorical\x1B[m\x1B[0;32mcartoon individual, My animation's " - L"comical, unusual, and whimsical, I'm quite adept at funny gags, " - L"comedic theory I have read, From wicked puns and stupid jokes to " - L"anvils that drop on your head\x1B[m."; - assertEquals("gitHubDemos", consoleGolden, console); - - auto htmlGolden = - LR"(I am the very model of a modern Major-General, I've information vegetable, animal, and mineral, I know the kings of England, and I quote the fights historical, From Marathon to Waterloo, in order categoricalcartoon individual, My animation's comical, unusual, and whimsical, I'm quite adept at funny gags, comedic theory I have read, From wicked puns and stupid jokes to anvils that drop on your head.)"; - assertEquals("gitHubDemos", htmlGolden, html); - - auto deltaGolden = - L"=25\t-182\t+cartoon individual, My animation's comical, unusual, and " - L"whimsical, I'm quite adept at funny gags, comedic theory I have read, " - L"From wicked puns and stupid jokes to anvils that drop on your head\t=1"; - assertEquals("gitHubDemos", deltaGolden, delta); - - auto patches = dmp.patch_make(lhs, rhs); - auto patch = dmp.patch_toText(patches); - auto patchGolden = - L"@@ -22,187 +22,198 @@\n f a \n-modern Major-General, I've information " - L"vegetable, animal, and mineral, I know the kings of England, and I " - L"quote the fights historical, From Marathon to Waterloo, in order " - L"categorical\n+cartoon individual, My animation's comical, unusual, and " - L"whimsical, I'm quite adept at funny gags, comedic theory I have read, " - L"From wicked puns and stupid jokes to anvils that drop on your head\n " - L".\n"; - assertEquals("gitHubDemos", patchGolden, patch); -} + assertEquals( "diff_commonPrefix: Whole case.", 4, dmp.diff_commonPrefix( "1234", "1234xyz" ) ); + } + + TEST_F( diff_match_patch_test, testDiffCommonSuffix ) + { + // Detect any common suffix. + assertEquals( "diff_commonSuffix: nullptr case.", 0, dmp.diff_commonSuffix( "abc", "xyz" ) ); + + assertEquals( "diff_commonSuffix: Non-nullptr case.", 4, dmp.diff_commonSuffix( "abcdef1234", "xyz1234" ) ); + + assertEquals( "diff_commonSuffix: Whole case.", 4, dmp.diff_commonSuffix( "1234", "xyz1234" ) ); + } + + TEST_F( diff_match_patch_test, testDiffCommonOverlap ) + { + // Detect any suffix/prefix overlap. + assertEquals( "diff_commonOverlap: nullptr case.", 0, dmp.diff_commonOverlap( "", "abcd" ) ); + + assertEquals( "diff_commonOverlap: Whole case.", 3, dmp.diff_commonOverlap( "abc", "abcd" ) ); + + assertEquals( "diff_commonOverlap: No overlap.", 0, dmp.diff_commonOverlap( "123456", "abcd" ) ); + + assertEquals( "diff_commonOverlap: Overlap.", 3, dmp.diff_commonOverlap( "123456xxx", "xxxabcd" ) ); + + // Some overly clever languages (C#) may treat ligatures as equal to their + // component letters. E.g. U+FB01 == 'fi' + assertEquals( "diff_commonOverlap: Unicode.", 0, dmp.diff_commonOverlap( L"fi", std::wstring( L"\ufb01i" ) ) ); + } + + TEST_F( diff_match_patch_test, testDiffHalfmatch ) + { + // Detect a halfmatch. + dmp.Diff_Timeout = 1; + assertEmpty( "diff_halfMatch: No match #1.", dmp.diff_halfMatch( "1234567890", "abcdef" ) ); + + assertEmpty( "diff_halfMatch: No match #2.", dmp.diff_halfMatch( "12345", "23" ) ); + + assertEquals( "diff_halfMatch: Single Match #1.", TStringVector( { L"12", L"90", L"a", L"z", L"345678" } ), dmp.diff_halfMatch( "1234567890", "a345678z" ) ); + + assertEquals( "diff_halfMatch: Single Match #2.", TStringVector( { L"a", L"z", L"12", L"90", L"345678" } ), dmp.diff_halfMatch( "a345678z", "1234567890" ) ); + + assertEquals( "diff_halfMatch: Single Match #3.", TStringVector( { L"abc", L"z", L"1234", L"0", L"56789" } ), dmp.diff_halfMatch( "abc56789z", "1234567890" ) ); + + assertEquals( "diff_halfMatch: Single Match #4.", TStringVector( { L"a", L"xyz", L"1", L"7890", L"23456" } ), dmp.diff_halfMatch( "a23456xyz", "1234567890" ) ); + + assertEquals( "diff_halfMatch: Multiple Matches #1.", TStringVector( { L"12123", L"123121", L"a", L"z", L"1234123451234" } ), dmp.diff_halfMatch( "121231234123451234123121", "a1234123451234z" ) ); + + assertEquals( "diff_halfMatch: Multiple Matches #2.", TStringVector( { L"", L"-=-=-=-=-=", L"x", L"", L"x-=-=-=-=-=-=-=" } ), dmp.diff_halfMatch( "x-=-=-=-=-=-=-=-=-=-=-=-=", "xx-=-=-=-=-=-=-=" ) ); + + assertEquals( "diff_halfMatch: Multiple Matches #3.", TStringVector( { L"-=-=-=-=-=", L"", L"", L"y", L"-=-=-=-=-=-=-=y" } ), dmp.diff_halfMatch( "-=-=-=-=-=-=-=-=-=-=-=-=y", "-=-=-=-=-=-=-=yy" ) ); + + // Optimal diff would be -q+x=H-i+e=lloHe+Hu=llo-Hew+y not + // -qHillo+x=HelloHe-w+Hulloy + assertEquals( "diff_halfMatch: Non-optimal halfmatch.", TStringVector( { L"qHillo", L"w", L"x", L"Hulloy", L"HelloHe" } ), dmp.diff_halfMatch( "qHilloHelloHew", "xHelloHeHulloy" ) ); + + dmp.Diff_Timeout = 0; + assertEmpty( "diff_halfMatch: Optimal no halfmatch.", dmp.diff_halfMatch( L"qHilloHelloHew", L"xHelloHeHulloy" ) ); + } + + TEST_F( diff_match_patch_test, testDiffLinesToChars ) + { + // Convert lines down to characters. + TStringVector tmpVector = TStringVector( { L"", L"alpha\n", L"beta\n" } ); + TVariantVector tmpVarList; + tmpVarList.emplace_back( NUtils::to_wstring( { 1, 2, 1 }, false ) ); //(("\u0001\u0002\u0001")); + tmpVarList.emplace_back( NUtils::to_wstring( { 2, 1, 2 }, false ) ); // (("\u0002\u0001\u0002")); + tmpVarList.emplace_back( tmpVector ); + assertEquals( "diff_linesToChars:", tmpVarList, dmp.diff_linesToChars( "alpha\nbeta\nalpha\n", "beta\nalpha\nbeta\n" ) ); + + tmpVector.clear(); + tmpVarList.clear(); + tmpVector.emplace_back( L"" ); + tmpVector.emplace_back( L"alpha\r\n" ); + tmpVector.emplace_back( L"beta\r\n" ); + tmpVector.emplace_back( L"\r\n" ); + tmpVarList.emplace_back( std::wstring() ); + tmpVarList.emplace_back( NUtils::to_wstring( { 1, 2, 3, 3 }, false ) ); // (("\u0001\u0002\u0003\u0003")); + tmpVarList.emplace_back( tmpVector ); + assertEquals( "diff_linesToChars:", tmpVarList, dmp.diff_linesToChars( "", "alpha\r\nbeta\r\n\r\n\r\n" ) ); + + tmpVector.clear(); + tmpVarList.clear(); + tmpVector.emplace_back( L"" ); + tmpVector.emplace_back( L"a" ); + tmpVector.emplace_back( L"b" ); + tmpVarList.emplace_back( NUtils::to_wstring( kOne ) ); // (("\u0001")); + tmpVarList.emplace_back( NUtils::to_wstring( kTwo ) ); // (("\u0002")); + tmpVarList.emplace_back( tmpVector ); + assertEquals( "diff_linesToChars:", tmpVarList, dmp.diff_linesToChars( "a", "b" ) ); + + // More than 256 to reveal any 8-bit limitations. + int n = 300; + tmpVector.clear(); + tmpVarList.clear(); + std::wstring lines; + std::wstring chars; + for ( int x = 1; x < n + 1; x++ ) + { + tmpVector.emplace_back( std::to_wstring( x ) + L"\n" ); + lines += std::to_wstring( x ) + L"\n"; + chars += NUtils::to_wstring( (wchar_t)x ); + } + assertEquals( "diff_linesToChars: More than 256 (setup).", n, tmpVector.size() ); + assertEquals( "diff_linesToChars: More than 256 (setup).", n, chars.length() ); + tmpVector.emplace( tmpVector.begin(), L"" ); + tmpVarList.emplace_back( chars ); + tmpVarList.emplace_back( std::wstring() ); + tmpVarList.emplace_back( tmpVector ); + assertEquals( "diff_linesToChars: More than 256.", tmpVarList, dmp.diff_linesToChars( lines, {} ) ); + } + + TEST_F( diff_match_patch_test, testDiffCharsToLines ) + { + // First check that Diff equality works. + assertTrue( "diff_charsToLines:", Diff( EOperation::eEQUAL, "a" ) == Diff( EOperation::eEQUAL, "a" ) ); + + assertEquals( "diff_charsToLines:", Diff( EOperation::eEQUAL, "a" ), Diff( EOperation::eEQUAL, "a" ) ); + + // Convert chars up to lines. + TDiffVector diffs; + diffs.emplace_back( EOperation::eEQUAL, NUtils::to_wstring( { 1, 2, 1 }, false ) ); // ("\u0001\u0002\u0001"); + diffs.emplace_back( EOperation::eINSERT, NUtils::to_wstring( { 2, 1, 2 }, false ) ); // ("\u0002\u0001\u0002"); + TStringVector tmpVector; + tmpVector.emplace_back( L"" ); + tmpVector.emplace_back( L"alpha\n" ); + tmpVector.emplace_back( L"beta\n" ); + dmp.diff_charsToLines( diffs, tmpVector ); + assertEquals( "diff_charsToLines:", TDiffVector( { Diff( EOperation::eEQUAL, "alpha\nbeta\nalpha\n" ), Diff( EOperation::eINSERT, "beta\nalpha\nbeta\n" ) } ), diffs ); + + // More than 256 to reveal any 8-bit limitations. + int n = 300; + tmpVector.clear(); + std::vector< TVariant > tmpVarList; + std::wstring lines; + std::wstring chars; + for ( int x = 1; x < n + 1; x++ ) + { + tmpVector.emplace_back( std::to_wstring( x ) + L"\n" ); + lines += std::to_wstring( x ) + L"\n"; + chars += NUtils::to_wstring( (wchar_t)x ); + } + assertEquals( "diff_linesToChars: More than 256 (setup).", n, tmpVector.size() ); + assertEquals( "diff_linesToChars: More than 256 (setup).", n, chars.length() ); + tmpVector.emplace( tmpVector.begin(), L"" ); + diffs = { Diff( EOperation::eDELETE, chars ) }; + dmp.diff_charsToLines( diffs, tmpVector ); + assertEquals( "diff_charsToLines: More than 256.", TDiffVector( { Diff( EOperation::eDELETE, lines ) } ), diffs ); + } + + TEST_F( diff_match_patch_test, testDiffCleanupMerge ) + { + // Cleanup a messy diff. + TDiffVector diffs; + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: nullptr case.", TDiffVector(), diffs ); + + diffs = { Diff( EOperation::eEQUAL, "a" ), Diff( EOperation::eDELETE, "b" ), Diff( EOperation::eINSERT, "c" ) }; + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: No change case.", TDiffVector( { Diff( EOperation::eEQUAL, "a" ), Diff( EOperation::eDELETE, "b" ), Diff( EOperation::eINSERT, "c" ) } ), diffs ); + + diffs = { Diff( EOperation::eEQUAL, "a" ), Diff( EOperation::eEQUAL, "b" ), Diff( EOperation::eEQUAL, "c" ) }; + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Merge equalities.", TDiffVector( { Diff( EOperation::eEQUAL, "abc" ) } ), diffs ); + + diffs = { Diff( EOperation::eDELETE, "a" ), Diff( EOperation::eDELETE, "b" ), Diff( EOperation::eDELETE, "c" ) }; + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Merge deletions.", TDiffVector( { Diff( EOperation::eDELETE, "abc" ) } ), diffs ); + + diffs = { Diff( EOperation::eINSERT, "a" ), Diff( EOperation::eINSERT, "b" ), Diff( EOperation::eINSERT, "c" ) }; + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Merge insertions.", TDiffVector( { Diff( EOperation::eINSERT, "abc" ) } ), diffs ); + + diffs = { Diff( EOperation::eDELETE, "a" ), Diff( EOperation::eINSERT, "b" ), Diff( EOperation::eDELETE, "c" ), Diff( EOperation::eINSERT, "d" ), Diff( EOperation::eEQUAL, "e" ), Diff( EOperation::eEQUAL, "f" ) }; + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Merge interweave.", TDiffVector( { Diff( EOperation::eDELETE, "ac" ), Diff( EOperation::eINSERT, "bd" ), Diff( EOperation::eEQUAL, "ef" ) } ), diffs ); + + diffs = { Diff( EOperation::eDELETE, "a" ), Diff( EOperation::eINSERT, "abc" ), Diff( EOperation::eDELETE, "dc" ) }; + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Prefix and suffix detection.", TDiffVector( { Diff( EOperation::eEQUAL, "a" ), Diff( EOperation::eDELETE, "d" ), Diff( EOperation::eINSERT, "b" ), Diff( EOperation::eEQUAL, "c" ) } ), diffs ); + + diffs = { Diff( EOperation::eEQUAL, "x" ), Diff( EOperation::eDELETE, "a" ), Diff( EOperation::eINSERT, "abc" ), Diff( EOperation::eDELETE, "dc" ), Diff( EOperation::eEQUAL, "y" ) }; + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Prefix and suffix detection with equalities.", TDiffVector( { Diff( EOperation::eEQUAL, "xa" ), Diff( EOperation::eDELETE, "d" ), Diff( EOperation::eINSERT, "b" ), Diff( EOperation::eEQUAL, "cy" ) } ), diffs ); + + diffs = { Diff( EOperation::eEQUAL, "a" ), Diff( EOperation::eINSERT, "ba" ), Diff( EOperation::eEQUAL, "c" ) }; + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Slide edit left.", TDiffVector( { Diff( EOperation::eINSERT, "ab" ), Diff( EOperation::eEQUAL, "ac" ) } ), diffs ); + + diffs = { Diff( EOperation::eEQUAL, "c" ), Diff( EOperation::eINSERT, "ab" ), Diff( EOperation::eEQUAL, "a" ) }; + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Slide edit right.", TDiffVector( { Diff( EOperation::eEQUAL, "ca" ), Diff( EOperation::eINSERT, "ba" ) } ), diffs ); + + diffs = { Diff( EOperation::eEQUAL, "a" ), Diff( EOperation::eDELETE, "b" ), Diff( EOperation::eEQUAL, "c" ), Diff( EOperation::eDELETE, "ac" ), Diff( EOperation::eEQUAL, "x" ) }; + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Slide edit left recursive.", TDiffVector( { Diff( EOperation::eDELETE, "abc" ), Diff( EOperation::eEQUAL, "acx" ) } ), diffs ); + + diffs = { Diff( EOperation::eEQUAL, "x" ), Diff( EOperation::eDELETE, "ca" ), Diff( EOperation::eEQUAL, "c" ), Diff( EOperation::eDELETE, "b" ), Diff( EOperation::eEQUAL, "a" ) }; + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Slide edit right recursive.", TDiffVector( { Diff( EOperation::eEQUAL, "xca" ), Diff( EOperation::eDELETE, "cba" ) } ), diffs ); + } + + TEST_F( diff_match_patch_test, testDiffCleanupSemanticLossless ) + { + // Slide diffs to match logical boundaries. + auto diffs = TDiffVector(); + dmp.diff_cleanupSemanticLossless( diffs ); + assertEquals( "diff_cleanupSemantic: nullptr case.", TDiffVector(), diffs ); + + diffs = { Diff( EOperation::eEQUAL, "AAA\r\n\r\nBBB" ), Diff( EOperation::eINSERT, "\r\nDDD\r\n\r\nBBB" ), Diff( EOperation::eEQUAL, "\r\nEEE" ) }; + dmp.diff_cleanupSemanticLossless( diffs ); + assertEquals( "diff_cleanupSemanticLossless: Blank lines.", TDiffVector( { Diff( EOperation::eEQUAL, "AAA\r\n\r\n" ), Diff( EOperation::eINSERT, "BBB\r\nDDD\r\n\r\n" ), Diff( EOperation::eEQUAL, "BBB\r\nEEE" ) } ), diffs ); + + diffs = { Diff( EOperation::eEQUAL, "AAA\r\nBBB" ), Diff( EOperation::eINSERT, " DDD\r\nBBB" ), Diff( EOperation::eEQUAL, " EEE" ) }; + dmp.diff_cleanupSemanticLossless( diffs ); + assertEquals( "diff_cleanupSemanticLossless: Line boundaries.", TDiffVector( { Diff( EOperation::eEQUAL, "AAA\r\n" ), Diff( EOperation::eINSERT, "BBB DDD\r\n" ), Diff( EOperation::eEQUAL, "BBB EEE" ) } ), diffs ); + + diffs = { Diff( EOperation::eEQUAL, "The c" ), Diff( EOperation::eINSERT, "ow and the c" ), Diff( EOperation::eEQUAL, "at." ) }; + dmp.diff_cleanupSemanticLossless( diffs ); + assertEquals( "diff_cleanupSemantic: Word boundaries.", TDiffVector( { Diff( EOperation::eEQUAL, "The " ), Diff( EOperation::eINSERT, "cow and the " ), Diff( EOperation::eEQUAL, "cat." ) } ), diffs ); + + diffs = { Diff( EOperation::eEQUAL, "The-c" ), Diff( EOperation::eINSERT, "ow-and-the-c" ), Diff( EOperation::eEQUAL, "at." ) }; + dmp.diff_cleanupSemanticLossless( diffs ); + assertEquals( "diff_cleanupSemantic: Alphanumeric boundaries.", TDiffVector( { Diff( EOperation::eEQUAL, "The-" ), Diff( EOperation::eINSERT, "cow-and-the-" ), Diff( EOperation::eEQUAL, "cat." ) } ), diffs ); + + diffs = { Diff( EOperation::eEQUAL, "a" ), Diff( EOperation::eDELETE, "a" ), Diff( EOperation::eEQUAL, "ax" ) }; + dmp.diff_cleanupSemanticLossless( diffs ); + assertEquals( "diff_cleanupSemantic: Hitting the start.", TDiffVector( { Diff( EOperation::eDELETE, "a" ), Diff( EOperation::eEQUAL, "aax" ) } ), diffs ); + + diffs = { Diff( EOperation::eEQUAL, "xa" ), Diff( EOperation::eDELETE, "a" ), Diff( EOperation::eEQUAL, "a" ) }; + dmp.diff_cleanupSemanticLossless( diffs ); + assertEquals( "diff_cleanupSemantic: Hitting the end.", TDiffVector( { Diff( EOperation::eEQUAL, "xaa" ), Diff( EOperation::eDELETE, "a" ) } ), diffs ); + + diffs = { Diff( EOperation::eEQUAL, "The xxx. The " ), Diff( EOperation::eINSERT, "zzz. The " ), Diff( EOperation::eEQUAL, "yyy." ) }; + dmp.diff_cleanupSemanticLossless( diffs ); + assertEquals( "diff_cleanupSemantic: Sentence boundaries.", TDiffVector( { Diff( EOperation::eEQUAL, "The xxx." ), Diff( EOperation::eINSERT, " The zzz." ), Diff( EOperation::eEQUAL, " The yyy." ) } ), diffs ); + } + + TEST_F( diff_match_patch_test, testDiffCleanupSemantic ) + { + // Cleanup semantically trivial equalities. + auto diffs = TDiffVector(); + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: nullptr case.", TDiffVector(), diffs ); + + diffs = { Diff( EOperation::eDELETE, "ab" ), Diff( EOperation::eINSERT, "cd" ), Diff( EOperation::eEQUAL, "12" ), Diff( EOperation::eDELETE, "e" ) }; + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: No elimination #1.", TDiffVector( { Diff( EOperation::eDELETE, "ab" ), Diff( EOperation::eINSERT, "cd" ), Diff( EOperation::eEQUAL, "12" ), Diff( EOperation::eDELETE, "e" ) } ), diffs ); + + diffs = { Diff( EOperation::eDELETE, "abc" ), Diff( EOperation::eINSERT, "ABC" ), Diff( EOperation::eEQUAL, "1234" ), Diff( EOperation::eDELETE, "wxyz" ) }; + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: No elimination #2.", TDiffVector( { Diff( EOperation::eDELETE, "abc" ), Diff( EOperation::eINSERT, "ABC" ), Diff( EOperation::eEQUAL, "1234" ), Diff( EOperation::eDELETE, "wxyz" ) } ), diffs ); + + diffs = { Diff( EOperation::eDELETE, "a" ), Diff( EOperation::eEQUAL, "b" ), Diff( EOperation::eDELETE, "c" ) }; + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: Simple elimination.", TDiffVector( { Diff( EOperation::eDELETE, "abc" ), Diff( EOperation::eINSERT, "b" ) } ), diffs ); + + diffs = { Diff( EOperation::eDELETE, "ab" ), Diff( EOperation::eEQUAL, "cd" ), Diff( EOperation::eDELETE, "e" ), Diff( EOperation::eEQUAL, "f" ), Diff( EOperation::eINSERT, "g" ) }; + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: Backpass elimination.", TDiffVector( { Diff( EOperation::eDELETE, "abcdef" ), Diff( EOperation::eINSERT, "cdfg" ) } ), diffs ); + + diffs = { Diff( EOperation::eINSERT, "1" ), Diff( EOperation::eEQUAL, "A" ), Diff( EOperation::eDELETE, "B" ), Diff( EOperation::eINSERT, "2" ), Diff( EOperation::eEQUAL, "_" ), Diff( EOperation::eINSERT, "1" ), Diff( EOperation::eEQUAL, "A" ), Diff( EOperation::eDELETE, "B" ), Diff( EOperation::eINSERT, "2" ) }; + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: Multiple elimination.", TDiffVector( { Diff( EOperation::eDELETE, "AB_AB" ), Diff( EOperation::eINSERT, "1A2_1A2" ) } ), diffs ); + + diffs = { Diff( EOperation::eEQUAL, "The c" ), Diff( EOperation::eDELETE, "ow and the c" ), Diff( EOperation::eEQUAL, "at." ) }; + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: Word boundaries.", TDiffVector( { Diff( EOperation::eEQUAL, "The " ), Diff( EOperation::eDELETE, "cow and the " ), Diff( EOperation::eEQUAL, "cat." ) } ), diffs ); + + diffs = { Diff( EOperation::eDELETE, "abcxx" ), Diff( EOperation::eINSERT, "xxdef" ) }; + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: No overlap elimination.", TDiffVector( { Diff( EOperation::eDELETE, "abcxx" ), Diff( EOperation::eINSERT, "xxdef" ) } ), diffs ); + + diffs = { Diff( EOperation::eDELETE, "abcxxx" ), Diff( EOperation::eINSERT, "xxxdef" ) }; + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: Overlap elimination.", TDiffVector( { Diff( EOperation::eDELETE, "abc" ), Diff( EOperation::eEQUAL, "xxx" ), Diff( EOperation::eINSERT, "def" ) } ), diffs ); + + diffs = { Diff( EOperation::eDELETE, "xxxabc" ), Diff( EOperation::eINSERT, "defxxx" ) }; + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: Reverse overlap elimination.", TDiffVector( { Diff( EOperation::eINSERT, "def" ), Diff( EOperation::eEQUAL, "xxx" ), Diff( EOperation::eDELETE, "abc" ) } ), diffs ); + + diffs = { Diff( EOperation::eDELETE, "abcd1212" ), Diff( EOperation::eINSERT, "1212efghi" ), Diff( EOperation::eEQUAL, "----" ), Diff( EOperation::eDELETE, "A3" ), Diff( EOperation::eINSERT, "3BC" ) }; + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: Two overlap eliminations.", TDiffVector( { Diff( EOperation::eDELETE, "abcd" ), Diff( EOperation::eEQUAL, "1212" ), Diff( EOperation::eINSERT, "efghi" ), Diff( EOperation::eEQUAL, "----" ), Diff( EOperation::eDELETE, "A" ), Diff( EOperation::eEQUAL, "3" ), Diff( EOperation::eINSERT, "BC" ) } ), diffs ); + } + + TEST_F( diff_match_patch_test, testDiffCleanupEfficiency ) + { + // Cleanup operationally trivial equalities. + dmp.Diff_EditCost = 4; + auto diffs = TDiffVector(); + dmp.diff_cleanupEfficiency( diffs ); + assertEquals( "diff_cleanupEfficiency: nullptr case.", TDiffVector(), diffs ); + + diffs = { Diff( EOperation::eDELETE, "ab" ), Diff( EOperation::eINSERT, "12" ), Diff( EOperation::eEQUAL, "wxyz" ), Diff( EOperation::eDELETE, "cd" ), Diff( EOperation::eINSERT, "34" ) }; + dmp.diff_cleanupEfficiency( diffs ); + assertEquals( "diff_cleanupEfficiency: No elimination.", TDiffVector( { Diff( EOperation::eDELETE, "ab" ), Diff( EOperation::eINSERT, "12" ), Diff( EOperation::eEQUAL, "wxyz" ), Diff( EOperation::eDELETE, "cd" ), Diff( EOperation::eINSERT, "34" ) } ), diffs ); + + diffs = { Diff( EOperation::eDELETE, "ab" ), Diff( EOperation::eINSERT, "12" ), Diff( EOperation::eEQUAL, "xyz" ), Diff( EOperation::eDELETE, "cd" ), Diff( EOperation::eINSERT, "34" ) }; + dmp.diff_cleanupEfficiency( diffs ); + assertEquals( "diff_cleanupEfficiency: Four-edit elimination.", TDiffVector( { Diff( EOperation::eDELETE, "abxyzcd" ), Diff( EOperation::eINSERT, "12xyz34" ) } ), diffs ); + + diffs = { Diff( EOperation::eINSERT, "12" ), Diff( EOperation::eEQUAL, "x" ), Diff( EOperation::eDELETE, "cd" ), Diff( EOperation::eINSERT, "34" ) }; + dmp.diff_cleanupEfficiency( diffs ); + assertEquals( "diff_cleanupEfficiency: Three-edit elimination.", TDiffVector( { Diff( EOperation::eDELETE, "xcd" ), Diff( EOperation::eINSERT, "12x34" ) } ), diffs ); + + diffs = { Diff( EOperation::eDELETE, "ab" ), Diff( EOperation::eINSERT, "12" ), Diff( EOperation::eEQUAL, "xy" ), Diff( EOperation::eINSERT, "34" ), Diff( EOperation::eEQUAL, "z" ), Diff( EOperation::eDELETE, "cd" ), Diff( EOperation::eINSERT, "56" ) }; + dmp.diff_cleanupEfficiency( diffs ); + assertEquals( "diff_cleanupEfficiency: Backpass elimination.", TDiffVector( { Diff( EOperation::eDELETE, "abxyzcd" ), Diff( EOperation::eINSERT, "12xy34z56" ) } ), diffs ); + + dmp.Diff_EditCost = 5; + diffs = { Diff( EOperation::eDELETE, "ab" ), Diff( EOperation::eINSERT, "12" ), Diff( EOperation::eEQUAL, "wxyz" ), Diff( EOperation::eDELETE, "cd" ), Diff( EOperation::eINSERT, "34" ) }; + dmp.diff_cleanupEfficiency( diffs ); + assertEquals( "diff_cleanupEfficiency: High cost elimination.", TDiffVector( { Diff( EOperation::eDELETE, "abwxyzcd" ), Diff( EOperation::eINSERT, "12wxyz34" ) } ), diffs ); + dmp.Diff_EditCost = 4; + } + + TEST_F( diff_match_patch_test, testDiffPrettyHtml ) + { + // Pretty print. + auto diffs = TDiffVector( { Diff( EOperation::eEQUAL, "a\n" ), Diff( EOperation::eDELETE, "b" ), Diff( EOperation::eINSERT, "c&d" ) } ); + assertEquals( + "diff_prettyHtml:", + L"
<B>b</B>c&d", + dmp.diff_prettyHtml( diffs ) ); + } + + TEST_F( diff_match_patch_test, testDiffPrettyConsole ) + { + // Pretty print. + static std::wstring kRed{ L"\033[0;31m" }; + static std::wstring kGreen{ L"\033[0;32m" }; + static std::wstring kYellow{ L"\033[0;33m" }; + static std::wstring kReset{ L"\033[m" }; + static std::wstring kEOL{ NUtils::fromPercentEncoding( L"%C2%B6" ) + L"\n" }; + + auto diffs = TDiffVector( { Diff( EOperation::eEQUAL, "a\n" ), Diff( EOperation::eDELETE, "b" ), Diff( EOperation::eINSERT, "c&d" ) } ); + auto results = dmp.diff_prettyConsole( diffs ); + assertEquals( "diff_prettyConsole:", L"a" + kEOL + kRed + L"b" + kReset + kGreen + L"c&d" + kReset, results ); + } + + TEST_F( diff_match_patch_test, testDiffText ) + { + // Compute the source and destination texts. + auto diffs = { Diff( EOperation::eEQUAL, "jump" ), Diff( EOperation::eDELETE, "s" ), Diff( EOperation::eINSERT, "ed" ), Diff( EOperation::eEQUAL, " over " ), Diff( EOperation::eDELETE, "the" ), Diff( EOperation::eINSERT, "a" ), Diff( EOperation::eEQUAL, " lazy" ) }; + assertEquals( "diff_text1:", L"jumps over the lazy", dmp.diff_text1( diffs ) ); + assertEquals( "diff_text2:", L"jumped over a lazy", dmp.diff_text2( diffs ) ); + } + + TEST_F( diff_match_patch_test, testDiffDelta ) + { + // Convert a diff into delta string. + auto diffs = TDiffVector( { Diff( EOperation::eEQUAL, "jump" ), Diff( EOperation::eDELETE, "s" ), Diff( EOperation::eINSERT, "ed" ), Diff( EOperation::eEQUAL, " over " ), Diff( EOperation::eDELETE, "the" ), Diff( EOperation::eINSERT, "a" ), Diff( EOperation::eEQUAL, " lazy" ), Diff( EOperation::eINSERT, "old dog" ) } ); + std::wstring text1 = dmp.diff_text1( diffs ); + assertEquals( "diff_text1: Base text.", L"jumps over the lazy", text1 ); + + std::wstring delta = dmp.diff_toDelta( diffs ); + std::wstring golden = L"=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog"; + assertEquals( "diff_toDelta:", L"=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog", delta ); + + // Convert delta string into a diff. + assertEquals( "diff_fromDelta: Normal.", diffs, dmp.diff_fromDelta( text1, delta ) ); + + // Generates error (19 < 20). + assertThrow( "diff_fromDelta: Too long.", dmp.diff_fromDelta( text1 + L"x", delta ), std::wstring ); + + // Generates error (19 > 18). + assertThrow( "diff_fromDelta: Too short.", dmp.diff_fromDelta( text1.substr( 1 ), delta ), std::wstring ); + + // Generates error (%c3%xy invalid Unicode). + assertThrow( "diff_fromDelta: Invalid character.", dmp.diff_fromDelta( "", "+%c3%xy" ), std::wstring ); + + // Test deltas with special characters. + diffs = { Diff( EOperation::eEQUAL, std::wstring( L"\u0680 " ) + kZero + std::wstring( L" \t %" ) ), Diff( EOperation::eDELETE, std::wstring( L"\u0681 " ) + kOne + std::wstring( L" \n ^" ) ), Diff( EOperation::eINSERT, std::wstring( L"\u0682 " ) + kTwo + std::wstring( L" \\ |" ) ) }; + + text1 = dmp.diff_text1( diffs ); + golden = std::wstring( L"\u0680 " ) + kZero + std::wstring( L" \t %\u0681 " ) + kOne + std::wstring( L" \n ^" ); + assertEquals( "diff_text1: Unicode text", golden, text1 ); + + delta = dmp.diff_toDelta( diffs ); + assertEquals( "diff_toDelta: Unicode", L"=7\t-7\t+%DA%82 %02 %5C %7C", delta ); + + assertEquals( "diff_fromDelta: Unicode", diffs, dmp.diff_fromDelta( text1, delta ) ); + + // Verify pool of unchanged characters. + diffs = { Diff( EOperation::eINSERT, "A-Z a-z 0-9 - _ . ! ~ * ' ( ) ; / ? : @ & = + $ , # " ) }; + std::wstring text2 = dmp.diff_text2( diffs ); + assertEquals( "diff_text2: Unchanged characters.", L"A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", text2 ); + + delta = dmp.diff_toDelta( diffs ); + assertEquals( "diff_toDelta: Unchanged characters.", L"+A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", delta ); + + // Convert delta string into a diff. + assertEquals( "diff_fromDelta: Unchanged characters.", diffs, dmp.diff_fromDelta( {}, delta ) ); + } + + TEST_F( diff_match_patch_test, testDiffXIndex ) + { + // Translate a location in text1 to text2. + auto diffs = TDiffVector( { Diff( EOperation::eDELETE, "a" ), Diff( EOperation::eINSERT, "1234" ), Diff( EOperation::eEQUAL, "xyz" ) } ); + assertEquals( "diff_xIndex: Translation on equality.", 5, dmp.diff_xIndex( diffs, 2 ) ); + + diffs = { Diff( EOperation::eEQUAL, "a" ), Diff( EOperation::eDELETE, "1234" ), Diff( EOperation::eEQUAL, "xyz" ) }; + assertEquals( "diff_xIndex: Translation on deletion.", 1, dmp.diff_xIndex( diffs, 3 ) ); + } + + TEST_F( diff_match_patch_test, testDiffLevenshtein ) + { + auto diffs = TDiffVector( { Diff( EOperation::eDELETE, "abc" ), Diff( EOperation::eINSERT, "1234" ), Diff( EOperation::eEQUAL, "xyz" ) } ); + assertEquals( "diff_levenshtein: Trailing equality.", 4, dmp.diff_levenshtein( diffs ) ); + + diffs = { Diff( EOperation::eEQUAL, "xyz" ), Diff( EOperation::eDELETE, "abc" ), Diff( EOperation::eINSERT, "1234" ) }; + assertEquals( "diff_levenshtein: Leading equality.", 4, dmp.diff_levenshtein( diffs ) ); + + diffs = { Diff( EOperation::eDELETE, "abc" ), Diff( EOperation::eEQUAL, "xyz" ), Diff( EOperation::eINSERT, "1234" ) }; + assertEquals( "diff_levenshtein: Middle equality.", 7, dmp.diff_levenshtein( diffs ) ); + } + + TEST_F( diff_match_patch_test, testDiffBisect ) + { + // Normal. + std::wstring a = L"cat"; + std::wstring b = L"map"; + // Since the resulting diff hasn't been normalized, it would be ok if + // the insertion and deletion pairs are swapped. + // If the order changes, tweak this test as required. + auto diffs = TDiffVector( { Diff( EOperation::eDELETE, "c" ), Diff( EOperation::eINSERT, "m" ), Diff( EOperation::eEQUAL, "a" ), Diff( EOperation::eDELETE, "t" ), Diff( EOperation::eINSERT, "p" ) } ); + auto results = dmp.diff_bisect( a, b, std::numeric_limits< clock_t >::max() ); + assertEquals( "diff_bisect: Normal.", diffs, results ); + + // Timeout. + diffs = { Diff( EOperation::eDELETE, "cat" ), Diff( EOperation::eINSERT, "map" ) }; + results = dmp.diff_bisect( a, b, 0 ); + assertEquals( "diff_bisect: Timeout.", diffs, results ); + } + + TEST_F( diff_match_patch_test, testDiffMain ) + { + // Perform a trivial diff. + auto diffs = TDiffVector(); + assertEquals( "diff_main: nullptr case.", diffs, dmp.diff_main( "", "", false ) ); + + diffs = { Diff( EOperation::eDELETE, "abc" ) }; + assertEquals( "diff_main: RHS side nullptr case.", diffs, dmp.diff_main( "abc", "", false ) ); + + diffs = { Diff( EOperation::eINSERT, "abc" ) }; + assertEquals( "diff_main: LHS side nullptr case.", diffs, dmp.diff_main( "", "abc", false ) ); + + diffs = { Diff( EOperation::eEQUAL, "abc" ) }; + assertEquals( "diff_main: Equality.", diffs, dmp.diff_main( "abc", "abc", false ) ); + + diffs = { Diff( EOperation::eEQUAL, "ab" ), Diff( EOperation::eINSERT, "123" ), Diff( EOperation::eEQUAL, "c" ) }; + assertEquals( "diff_main: Simple insertion.", diffs, dmp.diff_main( "abc", "ab123c", false ) ); + + diffs = { Diff( EOperation::eEQUAL, "a" ), Diff( EOperation::eDELETE, "123" ), Diff( EOperation::eEQUAL, "bc" ) }; + assertEquals( "diff_main: Simple deletion.", diffs, dmp.diff_main( "a123bc", "abc", false ) ); + + diffs = { Diff( EOperation::eEQUAL, "a" ), Diff( EOperation::eINSERT, "123" ), Diff( EOperation::eEQUAL, "b" ), Diff( EOperation::eINSERT, "456" ), Diff( EOperation::eEQUAL, "c" ) }; + assertEquals( "diff_main: Two insertions.", diffs, dmp.diff_main( "abc", "a123b456c", false ) ); + + diffs = { Diff( EOperation::eEQUAL, "a" ), Diff( EOperation::eDELETE, "123" ), Diff( EOperation::eEQUAL, "b" ), Diff( EOperation::eDELETE, "456" ), Diff( EOperation::eEQUAL, "c" ) }; + assertEquals( "diff_main: Two deletions.", diffs, dmp.diff_main( "a123b456c", "abc", false ) ); + + // Perform a real diff. + // Switch off the timeout. + dmp.Diff_Timeout = 0; + diffs = { Diff( EOperation::eDELETE, "a" ), Diff( EOperation::eINSERT, "b" ) }; + assertEquals( "diff_main: Simple case #1.", diffs, dmp.diff_main( "a", "b", false ) ); + + diffs = { Diff( EOperation::eDELETE, "Apple" ), Diff( EOperation::eINSERT, "Banana" ), Diff( EOperation::eEQUAL, "s are a" ), Diff( EOperation::eINSERT, "lso" ), Diff( EOperation::eEQUAL, " fruit." ) }; + assertEquals( "diff_main: Simple case #2.", diffs, dmp.diff_main( "Apples are a fruit.", "Bananas are also fruit.", false ) ); + + diffs = { Diff( EOperation::eDELETE, "a" ), Diff( EOperation::eINSERT, L"\u0680" ), Diff( EOperation::eEQUAL, "x" ), Diff( EOperation::eDELETE, "\t" ), Diff( EOperation::eINSERT, NUtils::to_wstring( kZero ) ) }; + assertEquals( "diff_main: Simple case #3.", diffs, dmp.diff_main( L"ax\t", std::wstring( L"\u0680x" ) + kZero, false ) ); + + diffs = { Diff( EOperation::eDELETE, "1" ), Diff( EOperation::eEQUAL, "a" ), Diff( EOperation::eDELETE, "y" ), Diff( EOperation::eEQUAL, "b" ), Diff( EOperation::eDELETE, "2" ), Diff( EOperation::eINSERT, "xab" ) }; + assertEquals( "diff_main: Overlap #1.", diffs, dmp.diff_main( "1ayb2", "abxab", false ) ); + + diffs = { Diff( EOperation::eINSERT, "xaxcx" ), Diff( EOperation::eEQUAL, "abc" ), Diff( EOperation::eDELETE, "y" ) }; + assertEquals( "diff_main: Overlap #2.", diffs, dmp.diff_main( "abcy", "xaxcxabc", false ) ); + + diffs = { Diff( EOperation::eDELETE, "ABCD" ), Diff( EOperation::eEQUAL, "a" ), Diff( EOperation::eDELETE, "=" ), Diff( EOperation::eINSERT, "-" ), Diff( EOperation::eEQUAL, "bcd" ), Diff( EOperation::eDELETE, "=" ), Diff( EOperation::eINSERT, "-" ), Diff( EOperation::eEQUAL, "efghijklmnopqrs" ), Diff( EOperation::eDELETE, "EFGHIJKLMNOefg" ) }; + assertEquals( "diff_main: Overlap #3.", diffs, dmp.diff_main( "ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg", "a-bcd-efghijklmnopqrs", false ) ); + + diffs = { Diff( EOperation::eINSERT, " " ), Diff( EOperation::eEQUAL, "a" ), Diff( EOperation::eINSERT, "nd" ), Diff( EOperation::eEQUAL, " [[Pennsylvania]]" ), Diff( EOperation::eDELETE, " and [[New" ) }; + assertEquals( "diff_main: Large equality.", diffs, dmp.diff_main( "a [[Pennsylvania]] and [[New", " and [[Pennsylvania]]", false ) ); + + dmp.Diff_Timeout = 0.1f; // 100ms + // This test may 'fail' on extremely fast computers. If so, just increase the + // text lengths. + std::wstring a = L"`Twas brillig, and the slithy toves\nDid gyre and gimble in the " + L"wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n"; + std::wstring b = L"I am the very model of a modern major general,\nI've information " + L"vegetable, animal, and mineral,\nI know the kings of England, and I " + L"quote the fights historical,\nFrom Marathon to Waterloo, in order " + L"categorical.\n"; + // Increase the text lengths by 1024 times to ensure a timeout. + for ( int x = 0; x < 10; x++ ) + { + a = a + a; + b = b + b; + } + clock_t startTime = clock(); + dmp.diff_main( a, b ); + clock_t endTime = clock(); + // Test that we took at least the timeout period. + assertTrue( "diff_main: Timeout min.", ( dmp.Diff_Timeout * CLOCKS_PER_SEC ) <= ( endTime - startTime ) ); + // Test that we didn't take forever (be forgiving). + // Theoretically this test could fail very occasionally if the + // OS task swaps or locks up for a second at the wrong moment. + // Java seems to overrun by ~80% (compared with 10% for other languages). + // Therefore use an upper limit of 0.5s instead of 0.2s. + assertTrue( "diff_main: Timeout max.", ( dmp.Diff_Timeout * CLOCKS_PER_SEC * 2 ) > ( endTime - startTime ) ); + dmp.Diff_Timeout = 0; + + // Test the linemode speedup. + // Must be long to pass the 100 char cutoff. + a = L"1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890" + L"\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n123456789" + L"0\n1234567890\n"; + b = L"abcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij" + L"\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghi" + L"j\nabcdefghij\n"; + assertEquals( "diff_main: Simple line-mode.", dmp.diff_main( a, b, true ), dmp.diff_main( a, b, false ) ); + + a = L"12345678901234567890123456789012345678901234567890123456789012345678901" + L"23456789012345678901234567890123456789012345678901234567890"; + b = L"abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghija" + L"bcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij"; + assertEquals( "diff_main: Single line-mode.", dmp.diff_main( a, b, true ), dmp.diff_main( a, b, false ) ); + + a = L"1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890" + L"\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n123456789" + L"0\n1234567890\n"; + b = L"abcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890" + L"\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n123456789" + L"0\nabcdefghij\n"; + TStringVector texts_linemode = diff_rebuildtexts( dmp.diff_main( a, b, true ) ); + TStringVector texts_textmode = diff_rebuildtexts( dmp.diff_main( a, b, false ) ); + assertEquals( "diff_main: Overlap line-mode.", texts_textmode, texts_linemode ); + } + + // MATCH TEST FUNCTIONS + + TEST_F( diff_match_patch_test, testMatchAlphabet ) + { + // Initialise the bitmasks for Bitap. + TCharPosMap bitmask; + bitmask[ 'a' ] = 4; + bitmask[ 'b' ] = 2; + bitmask[ 'c' ] = 1; + assertEquals( "match_alphabet: Unique.", bitmask, dmp.match_alphabet( "abc" ) ); + + bitmask = TCharPosMap(); + bitmask[ 'a' ] = 37; + bitmask[ 'b' ] = 18; + bitmask[ 'c' ] = 8; + assertEquals( "match_alphabet: Duplicates.", bitmask, dmp.match_alphabet( "abcaba" ) ); + } + + TEST_F( diff_match_patch_test, testMatchBitap ) + { + // Bitap algorithm. + dmp.Match_Distance = 100; + dmp.Match_Threshold = 0.5f; + assertEquals( "match_bitap: Exact match #1.", 5, dmp.match_bitap( "abcdefghijk", "fgh", 5 ) ); + + assertEquals( "match_bitap: Exact match #2.", 5, dmp.match_bitap( "abcdefghijk", "fgh", 0 ) ); + + assertEquals( "match_bitap: Fuzzy match #1.", 4, dmp.match_bitap( "abcdefghijk", "efxhi", 0 ) ); + + assertEquals( "match_bitap: Fuzzy match #2.", 2, dmp.match_bitap( "abcdefghijk", "cdefxyhijk", 5 ) ); + + assertEquals( "match_bitap: Fuzzy match #3.", -1, dmp.match_bitap( "abcdefghijk", "bxy", 1 ) ); + + assertEquals( "match_bitap: Overflow.", 2, dmp.match_bitap( "123456789xx0", "3456789x0", 2 ) ); + + assertEquals( "match_bitap: Before start match.", 0, dmp.match_bitap( "abcdef", "xxabc", 4 ) ); + + assertEquals( "match_bitap: Beyond end match.", 3, dmp.match_bitap( "abcdef", "defyy", 4 ) ); + + assertEquals( "match_bitap: Oversized pattern.", 0, dmp.match_bitap( "abcdef", "xabcdefy", 0 ) ); + + dmp.Match_Threshold = 0.4f; + assertEquals( "match_bitap: Threshold #1.", 4, dmp.match_bitap( "abcdefghijk", "efxyhi", 1 ) ); + + dmp.Match_Threshold = 0.3f; + assertEquals( "match_bitap: Threshold #2.", -1, dmp.match_bitap( "abcdefghijk", "efxyhi", 1 ) ); + + dmp.Match_Threshold = 0.0f; + assertEquals( "match_bitap: Threshold #3.", 1, dmp.match_bitap( "abcdefghijk", "bcdef", 1 ) ); + + dmp.Match_Threshold = 0.5f; + assertEquals( "match_bitap: Multiple select #1.", 0, dmp.match_bitap( "abcdexyzabcde", "abccde", 3 ) ); + + assertEquals( "match_bitap: Multiple select #2.", 8, dmp.match_bitap( "abcdexyzabcde", "abccde", 5 ) ); + + dmp.Match_Distance = 10; // Strict location. + assertEquals( "match_bitap: Distance test #1.", -1, dmp.match_bitap( "abcdefghijklmnopqrstuvwxyz", "abcdefg", 24 ) ); + + assertEquals( "match_bitap: Distance test #2.", 0, dmp.match_bitap( "abcdefghijklmnopqrstuvwxyz", "abcdxxefg", 1 ) ); + + dmp.Match_Distance = 1000; // Loose location. + assertEquals( "match_bitap: Distance test #3.", 0, dmp.match_bitap( "abcdefghijklmnopqrstuvwxyz", "abcdefg", 24 ) ); + } + + TEST_F( diff_match_patch_test, testMatchMain ) + { + // Full match. + assertEquals( "match_main: Equality.", 0, dmp.match_main( "abcdef", "abcdef", 1000 ) ); + + assertEquals( "match_main: nullptr text.", -1, dmp.match_main( "", "abcdef", 1 ) ); + + assertEquals( "match_main: nullptr pattern.", 3, dmp.match_main( "abcdef", "", 3 ) ); + + assertEquals( "match_main: Exact match.", 3, dmp.match_main( "abcdef", "de", 3 ) ); + + dmp.Match_Threshold = 0.7f; + assertEquals( "match_main: Complex match.", 4, dmp.match_main( "I am the very model of a modern major general.", " that berry ", 5 ) ); + dmp.Match_Threshold = 0.5f; + } + + // PATCH TEST FUNCTIONS + + TEST_F( diff_match_patch_test, testPatchObj ) + { + // Patch Object. + Patch p; + p.start1 = 20; + p.start2 = 21; + p.length1 = 18; + p.length2 = 17; + p.diffs = { Diff( EOperation::eEQUAL, "jump" ), Diff( EOperation::eDELETE, "s" ), Diff( EOperation::eINSERT, "ed" ), Diff( EOperation::eEQUAL, " over " ), Diff( EOperation::eDELETE, "the" ), Diff( EOperation::eINSERT, "a" ), Diff( EOperation::eEQUAL, "\nlaz" ) }; + std::wstring strp = L"@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n"; + assertEquals( "patch: toString.", strp, p.toString() ); + } + + TEST_F( diff_match_patch_test, testPatchFromText ) + { + assertTrue( "patch_fromText: #0.", dmp.patch_fromText( "" ).empty() ); + + std::wstring strp = L"@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n"; + assertEquals( "patch_fromText: #1.", strp, dmp.patch_fromText( strp )[ 0 ].toString() ); + + assertEquals( "patch_fromText: #2.", L"@@ -1 +1 @@\n-a\n+b\n", dmp.patch_fromText( "@@ -1 +1 @@\n-a\n+b\n" )[ 0 ].toString() ); + + assertEquals( "patch_fromText: #3.", L"@@ -1,3 +0,0 @@\n-abc\n", dmp.patch_fromText( "@@ -1,3 +0,0 @@\n-abc\n" )[ 0 ].toString() ); + + assertEquals( "patch_fromText: #4.", L"@@ -0,0 +1,3 @@\n+abc\n", dmp.patch_fromText( "@@ -0,0 +1,3 @@\n+abc\n" )[ 0 ].toString() ); + + // Generates error. + assertThrow( "patch_fromText: #5.", dmp.patch_fromText( "Bad\nPatch\n" ), std::wstring ); + } + + TEST_F( diff_match_patch_test, testPatchToText ) + { + std::wstring strp = L"@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n"; + auto patches = dmp.patch_fromText( strp ); + assertEquals( "patch_toText: Single", strp, dmp.patch_toText( patches ) ); + + strp = L"@@ -1,9 +1,9 @@\n-f\n+F\n oo+fooba\n@@ -7,9 +7,9 @@\n obar\n-,\n+.\n " + L"tes\n"; + patches = dmp.patch_fromText( strp ); + assertEquals( "patch_toText: Dua", strp, dmp.patch_toText( patches ) ); + } + + TEST_F( diff_match_patch_test, testPatchAddContext ) + { + dmp.Patch_Margin = 4; + auto p = dmp.patch_fromText( "@@ -21,4 +21,10 @@\n-jump\n+somersault\n" )[ 0 ]; + dmp.patch_addContext( p, "The quick brown fox jumps over the lazy dog." ); + assertEquals( "patch_addContext: Simple case.", L"@@ -17,12 +17,18 @@\n fox \n-jump\n+somersault\n s ov\n", p.toString() ); + + p = dmp.patch_fromText( "@@ -21,4 +21,10 @@\n-jump\n+somersault\n" )[ 0 ]; + dmp.patch_addContext( p, "The quick brown fox jumps." ); + assertEquals( "patch_addContext: Not enough trailing context.", L"@@ -17,10 +17,16 @@\n fox \n-jump\n+somersault\n s.\n", p.toString() ); + + p = dmp.patch_fromText( "@@ -3 +3,2 @@\n-e\n+at\n" )[ 0 ]; + dmp.patch_addContext( p, "The quick brown fox jumps." ); + assertEquals( "patch_addContext: Not enough leading context.", L"@@ -1,7 +1,8 @@\n Th\n-e\n+at\n qui\n", p.toString() ); + + p = dmp.patch_fromText( "@@ -3 +3,2 @@\n-e\n+at\n" )[ 0 ]; + dmp.patch_addContext( p, "The quick brown fox jumps. The quick brown fox crashes." ); + assertEquals( "patch_addContext: Ambiguity.", L"@@ -1,27 +1,28 @@\n Th\n-e\n+at\n quick brown fox jumps. \n", p.toString() ); + } + + TEST_F( diff_match_patch_test, testPatchMake ) + { + TPatchVector patches; + patches = dmp.patch_make( "", "" ); + assertEquals( "patch_make: nullptr case", L"", dmp.patch_toText( patches ) ); + + std::wstring text1 = L"The quick brown fox jumps over the lazy dog."; + std::wstring text2 = L"That quick brown fox jumped over a lazy dog."; + std::wstring expectedPatch = L"@@ -1,8 +1,7 @@\n Th\n-at\n+e\n qui\n@@ -21,17 +21,18 @@\n " + L"jump\n-ed\n+s\n over \n-a\n+the\n laz\n"; + // The second patch must be "-21,17 +21,18", not "-22,17 +21,18" due to + // rolling context. + patches = dmp.patch_make( text2, text1 ); + assertEquals( "patch_make: Text2+Text1 inputs", expectedPatch, dmp.patch_toText( patches ) ); + + expectedPatch = L"@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n " + L"jump\n-s\n+ed\n over \n-the\n+a\n laz\n"; + patches = dmp.patch_make( text1, text2 ); + assertEquals( "patch_make: Text1+Text2 inputs", expectedPatch, dmp.patch_toText( patches ) ); + + auto diffs = dmp.diff_main( text1, text2, false ); + patches = dmp.patch_make( diffs ); + assertEquals( "patch_make: Diff input", expectedPatch, dmp.patch_toText( patches ) ); + + patches = dmp.patch_make( text1, diffs ); + assertEquals( "patch_make: Text1+Diff inputs", expectedPatch, dmp.patch_toText( patches ) ); + + patches = dmp.patch_make( text1, text2, diffs ); + assertEquals( "patch_make: Text1+Text2+Diff inputs (deprecated)", expectedPatch, dmp.patch_toText( patches ) ); + + patches = dmp.patch_make( "`1234567890-=[]\\;',./", "~!@#$%^&*()_+{}|:\"<>?" ); + assertEquals( + "patch_toText: Character encoding.", + L"@@ -1,21 +1,21 " + L"@@\n-%601234567890-=%5B%5D%5C;',./" + L"\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n", + dmp.patch_toText( patches ) ); + + diffs = { Diff( EOperation::eDELETE, "`1234567890-=[]\\;',./" ), Diff( EOperation::eINSERT, "~!@#$%^&*()_+{}|:\"<>?" ) }; + assertEquals( + "patch_fromText: Character decoding.", diffs, + dmp.patch_fromText( "@@ -1,21 +1,21 " + "@@\n-%601234567890-=%5B%5D%5C;',./" + "\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n" )[ 0 ] + .diffs ); + + text1 = {}; + for ( int x = 0; x < 100; x++ ) + { + text1 += L"abcdef"; + } + text2 = text1 + L"123"; + expectedPatch = L"@@ -573,28 +573,31 @@\n cdefabcdefabcdefabcdefabcdef\n+123\n"; + patches = dmp.patch_make( text1, text2 ); + assertEquals( "patch_make: Long string with repeats.", expectedPatch, dmp.patch_toText( patches ) ); + } + + TEST_F( diff_match_patch_test, testPatchSplitMax ) + { + dmp.Diff_Timeout = 0; + // Confirm Match_MaxBits is 32. + TPatchVector patches; + patches = dmp.patch_make( "abcdefghijklmnopqrstuvwxyz01234567890", "XabXcdXefXghXijXklXmnXopXqrXstXuvXwxXyzX01X23X45X67X89X0" ); + dmp.patch_splitMax( patches ); + assertEquals( + "patch_splitMax: #1.", + L"@@ -1,32 +1,46 @@\n+X\n ab\n+X\n cd\n+X\n ef\n+X\n gh\n+X\n " + L"ij\n+X\n kl\n+X\n mn\n+X\n op\n+X\n qr\n+X\n st\n+X\n " + L"uv\n+X\n wx\n+X\n yz\n+X\n 012345\n@@ -25,13 +39,18 @@\n " + L"zX01\n+X\n 23\n+X\n 45\n+X\n 67\n+X\n 89\n+X\n 0\n", + dmp.patch_toText( patches ) ); + + patches = dmp.patch_make( + "abcdef123456789012345678901234567890123456789012345678901234567890123456" + "7890uvwxyz", + "abcdefuvwxyz" ); + std::wstring oldToText = dmp.patch_toText( patches ); + dmp.patch_splitMax( patches ); + assertEquals( "patch_splitMax: #2.", oldToText, dmp.patch_toText( patches ) ); + + patches = dmp.patch_make( "1234567890123456789012345678901234567890123456789012345678901234567890", "abc" ); + dmp.patch_splitMax( patches ); + auto golden = L"@@ -1,32 +1,4 @@\n-1234567890123456789012345678\n 9012\n@@ " + L"-29,32 +1,4 @@\n-9012345678901234567890123456\n 7890\n@@ " + L"-57,14 +1,3 @@\n-78901234567890\n+abc\n"; + auto results = dmp.patch_toText( patches ); + assertEquals( "patch_splitMax: #3.", golden, results ); + + patches = dmp.patch_make( + "abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1 abcdefghij , h : " + "0 , t : 1", + "abcdefghij , h : 1 , t : 1 abcdefghij , h : 1 , t : 1 abcdefghij , h : " + "0 , t : 1" ); + dmp.patch_splitMax( patches ); + assertEquals( + "patch_splitMax: #4.", + L"@@ -2,32 +2,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n@@ " + L"-29,32 +29,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n", + dmp.patch_toText( patches ) ); + } + + TEST_F( diff_match_patch_test, testPatchAddPadding ) + { + TPatchVector patches; + patches = dmp.patch_make( "", "test" ); + assertEquals( "patch_addPadding: Both edges ful", L"@@ -0,0 +1,4 @@\n+test\n", dmp.patch_toText( patches ) ); + dmp.patch_addPadding( patches ); + assertEquals( "patch_addPadding: Both edges full.", L"@@ -1,8 +1,12 @@\n %01%02%03%04\n+test\n %01%02%03%04\n", dmp.patch_toText( patches ) ); + + patches = dmp.patch_make( "XY", "XtestY" ); + assertEquals( "patch_addPadding: Both edges partial.", L"@@ -1,2 +1,6 @@\n X\n+test\n Y\n", dmp.patch_toText( patches ) ); + dmp.patch_addPadding( patches ); + assertEquals( "patch_addPadding: Both edges partial.", L"@@ -2,8 +2,12 @@\n %02%03%04X\n+test\n Y%01%02%03\n", dmp.patch_toText( patches ) ); + + patches = dmp.patch_make( "XXXXYYYY", "XXXXtestYYYY" ); + assertEquals( "patch_addPadding: Both edges none.", L"@@ -1,8 +1,12 @@\n XXXX\n+test\n YYYY\n", dmp.patch_toText( patches ) ); + dmp.patch_addPadding( patches ); + assertEquals( "patch_addPadding: Both edges none.", L"@@ -5,8 +5,12 @@\n XXXX\n+test\n YYYY\n", dmp.patch_toText( patches ) ); + } + + TEST_F( diff_match_patch_test, testPatchApply ) + { + dmp.Match_Distance = 1000; + dmp.Match_Threshold = 0.5f; + dmp.Patch_DeleteThreshold = 0.5f; + TPatchVector patches; + patches = dmp.patch_make( "", "" ); + auto results = dmp.patch_apply( patches, "Hello world." ); + auto &&boolArray = results.second; + + std::wstring resultStr = results.first + L"\t" + std::to_wstring( boolArray.size() ); + assertEquals( "patch_apply: nullptr case.", L"Hello world.\t0", resultStr ); + + patches = dmp.patch_make( "The quick brown fox jumps over the lazy dog.", "That quick brown fox jumped over a lazy dog." ); + assertEquals( + "patch_apply: Exact match.", + L"@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 " + L"@@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n", + dmp.patch_toText( patches ) ); + + results = dmp.patch_apply( patches, "The quick brown fox jumps over the lazy dog." ); + boolArray = results.second; + resultStr = results.first + NUtils::to_wstring( boolArray, false ); + + assertEquals( "patch_apply: Exact match.", L"That quick brown fox jumped over a lazy dog.(true, true)", resultStr ); + + results = dmp.patch_apply( patches, "The quick red rabbit jumps over the tired tiger." ); + boolArray = results.second; + resultStr = results.first + NUtils::to_wstring( boolArray, false ); + assertEquals( "patch_apply: Partial match.", L"That quick red rabbit jumped over a tired tiger.(true, true)", resultStr ); + + results = dmp.patch_apply( patches, "I am the very model of a modern major general." ); + boolArray = results.second; + resultStr = results.first + NUtils::to_wstring( boolArray, false ); + assertEquals( "patch_apply: Failed match.", L"I am the very model of a modern major general.(false, false)", resultStr ); + + patches = dmp.patch_make( + "x1234567890123456789012345678901234567890123456789012345678901234567890" + "y", + "xabcy" ); + results = dmp.patch_apply( + patches, "x123456789012345678901234567890-----++++++++++----" + "-123456789012345678901234567890y" ); + boolArray = results.second; + resultStr = results.first + NUtils::to_wstring( boolArray, false ); + assertEquals( "patch_apply: Big delete, small change.", L"xabcy(true, true)", resultStr ); + + patches = dmp.patch_make( + "x1234567890123456789012345678901234567890123456789012345678901234567890" + "y", + "xabcy" ); + results = dmp.patch_apply( + patches, "x12345678901234567890---------------++++++++++----" + "-----------12345678901234567890y" ); + boolArray = results.second; + resultStr = results.first + NUtils::to_wstring( boolArray, false ); + assertEquals( + "patch_apply: Big delete, large change 1.", + L"xabc12345678901234567890---------------++++++++++-------------" + L"--12345678901234567890y(false, true)", + resultStr ); + + dmp.Patch_DeleteThreshold = 0.6f; + patches = dmp.patch_make( + "x1234567890123456789012345678901234567890123456789012345678901234567890" + "y", + "xabcy" ); + results = dmp.patch_apply( + patches, "x12345678901234567890---------------++++++++++----" + "-----------12345678901234567890y" ); + boolArray = results.second; + resultStr = results.first + NUtils::to_wstring( boolArray, false ); + assertEquals( "patch_apply: Big delete, large change 2.", L"xabcy(true, true)", resultStr ); + dmp.Patch_DeleteThreshold = 0.5f; + + dmp.Match_Threshold = 0.0f; + dmp.Match_Distance = 0; + patches = dmp.patch_make( + "abcdefghijklmnopqrstuvwxyz--------------------1234567890", "abcXXXXXXXXXXdefghijklmnopqrstuvwxyz--------------------" + "1234567YYYYYYYYYY890" ); + results = dmp.patch_apply( patches, "ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567890" ); + boolArray = results.second; + resultStr = results.first + NUtils::to_wstring( boolArray, false ); + assertEquals( + "patch_apply: Compensate for failed patch.", + L"ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------" + L"1234567YYYYYYYYYY890(false, true)", + resultStr ); + dmp.Match_Threshold = 0.5f; + dmp.Match_Distance = 1000; + + patches = dmp.patch_make( "", "test" ); + std::wstring patchStr = dmp.patch_toText( patches ); + dmp.patch_apply( patches, "" ); + assertEquals( "patch_apply: No side effects.", patchStr, dmp.patch_toText( patches ) ); + + patches = dmp.patch_make( "The quick brown fox jumps over the lazy dog.", "Woof" ); + patchStr = dmp.patch_toText( patches ); + dmp.patch_apply( patches, "The quick brown fox jumps over the lazy dog." ); + assertEquals( "patch_apply: No side effects with major delete.", patchStr, dmp.patch_toText( patches ) ); + + patches = dmp.patch_make( "", "test" ); + results = dmp.patch_apply( patches, "" ); + boolArray = results.second; + resultStr = results.first + L"\t" + NUtils::to_wstring( boolArray[ 0 ], false ); + assertEquals( "patch_apply: Edge exact match.", L"test\ttrue", resultStr ); + + patches = dmp.patch_make( "XY", "XtestY" ); + results = dmp.patch_apply( patches, "XY" ); + boolArray = results.second; + resultStr = results.first + L"\t" + NUtils::to_wstring( boolArray[ 0 ], false ); + assertEquals( "patch_apply: Near edge exact match.", L"XtestY\ttrue", resultStr ); + + patches = dmp.patch_make( "y", "y123" ); + results = dmp.patch_apply( patches, "x" ); + boolArray = results.second; + resultStr = results.first + L"\t" + NUtils::to_wstring( boolArray[ 0 ], false ); + assertEquals( "patch_apply: Edge partial match.", L"x123\ttrue", resultStr ); + } + + TEST_F( diff_match_patch_test, fromGitHubExamples ) + { + auto lhs = L"I am the very model of a modern Major-General, I've information " + L"vegetable, animal, and mineral, I know the kings of England, and I " + L"quote the fights historical, From Marathon to Waterloo, in order " + L"categorical."; + auto rhs = L"I am the very model of a cartoon individual, My animation's comical, " + L"unusual, and whimsical, I'm quite adept at funny gags, comedic theory " + L"I have read, From wicked puns and stupid jokes to anvils that drop on " + L"your head."; + auto diffs = dmp.diff_main( lhs, rhs ); + dmp.diff_cleanupSemantic( diffs ); + auto console = dmp.diff_prettyConsole( diffs ); + auto html = dmp.diff_prettyHtml( diffs ); + auto delta = dmp.diff_toDelta( diffs ); + + auto consoleGolden = L"I am the very model of a \x1B[0;31mmodern Major-General, I've " + L"information vegetable, animal, and mineral, I know the kings of " + L"England, and I quote the fights historical, From Marathon to Waterloo, " + L"in order categorical\x1B[m\x1B[0;32mcartoon individual, My animation's " + L"comical, unusual, and whimsical, I'm quite adept at funny gags, " + L"comedic theory I have read, From wicked puns and stupid jokes to " + L"anvils that drop on your head\x1B[m."; + assertEquals( "gitHubDemos", consoleGolden, console ); + + auto htmlGolden = LR"(I am the very model of a modern Major-General, I've information vegetable, animal, and mineral, I know the kings of England, and I quote the fights historical, From Marathon to Waterloo, in order categoricalcartoon individual, My animation's comical, unusual, and whimsical, I'm quite adept at funny gags, comedic theory I have read, From wicked puns and stupid jokes to anvils that drop on your head.)"; + assertEquals( "gitHubDemos", htmlGolden, html ); + + auto deltaGolden = L"=25\t-182\t+cartoon individual, My animation's comical, unusual, and " + L"whimsical, I'm quite adept at funny gags, comedic theory I have read, " + L"From wicked puns and stupid jokes to anvils that drop on your head\t=1"; + assertEquals( "gitHubDemos", deltaGolden, delta ); + + auto patches = dmp.patch_make( lhs, rhs ); + auto patch = dmp.patch_toText( patches ); + auto patchGolden = L"@@ -22,187 +22,198 @@\n f a \n-modern Major-General, I've information " + L"vegetable, animal, and mineral, I know the kings of England, and I " + L"quote the fights historical, From Marathon to Waterloo, in order " + L"categorical\n+cartoon individual, My animation's comical, unusual, and " + L"whimsical, I'm quite adept at funny gags, comedic theory I have read, " + L"From wicked puns and stupid jokes to anvils that drop on your head\n " + L".\n"; + assertEquals( "gitHubDemos", patchGolden, patch ); + } +} \ No newline at end of file diff --git a/cpp17/diff_match_patch_test.h b/cpp17/diff_match_patch_test.h index 07678d2a..b808b26a 100644 --- a/cpp17/diff_match_patch_test.h +++ b/cpp17/diff_match_patch_test.h @@ -20,150 +20,155 @@ #define DIFF_MATCH_PATCH_TEST_H #ifdef USE_GTEST -#include "gtest/gtest.h" -#define assertEquals(msg, GOLDEN, COMPUTED) EXPECT_EQ(GOLDEN, COMPUTED) << msg -#define assertEmpty(msg, COMPUTED) EXPECT_TRUE(COMPUTED.empty()) << msg -#define assertTrue(msg, COMPUTED) EXPECT_TRUE(COMPUTED) << msg -#define assertFalse(msg, COMPUTED) EXPECT_FALSE(COMPUTED) << msg -#define PUBLIC_TESTING : public testing::Test -#define assertThrow(msg, STATEMENT, EXCEPTION_TYPE) \ - EXPECT_THROW(STATEMENT, EXCEPTION_TYPE) << msg + #include "gtest/gtest.h" + #define assertEquals( msg, GOLDEN, COMPUTED ) EXPECT_EQ( GOLDEN, COMPUTED ) << msg + #define assertEmpty( msg, COMPUTED ) EXPECT_TRUE( COMPUTED.empty() ) << msg + #define assertTrue( msg, COMPUTED ) EXPECT_TRUE( COMPUTED ) << msg + #define assertFalse( msg, COMPUTED ) EXPECT_FALSE( COMPUTED ) << msg + #define PUBLIC_TESTING : public testing::Test + #define assertThrow( msg, STATEMENT, EXCEPTION_TYPE ) EXPECT_THROW( STATEMENT, EXCEPTION_TYPE ) << msg #else -#include -#define PUBLIC_TESTING -#define TEST_F(className, funcName) void diff_match_patch_test::funcName() + #include + #define PUBLIC_TESTING + #define TEST_F( className, funcName ) void diff_match_patch_test::funcName() #endif #include "diff_match_patch_utils.h" -class diff_match_patch_test PUBLIC_TESTING { - public: - using TStringVector = diff_match_patch::TStringVector; - using TCharPosMap = diff_match_patch::TCharPosMap; - using TVariant = diff_match_patch::TVariant; - using TVariantVector = diff_match_patch::TVariantVector; +namespace NDiffMatchPatch +{ + class diff_match_patch_test PUBLIC_TESTING + { + public: + using TStringVector = NDiffMatchPatch::diff_match_patch::TStringVector; + using TCharPosMap = NDiffMatchPatch::diff_match_patch::TCharPosMap; + using TVariant = NDiffMatchPatch::diff_match_patch::TVariant; + using TVariantVector = NDiffMatchPatch::diff_match_patch::TVariantVector; - diff_match_patch_test(); + diff_match_patch_test(); #ifndef USE_GTEST - public: - int run_all_tests(); - - // DIFF TEST FUNCTIONS - void testDiffCommonPrefix(); - void testDiffCommonSuffix(); - void testDiffCommonOverlap(); - void testDiffHalfmatch(); - void testDiffLinesToChars(); - void testDiffCharsToLines(); - void testDiffCleanupMerge(); - void testDiffCleanupSemanticLossless(); - void testDiffCleanupSemantic(); - void testDiffCleanupEfficiency(); - void testDiffPrettyHtml(); - void testDiffPrettyConsole(); - void testDiffText(); - void testDiffDelta(); - void testDiffXIndex(); - void testDiffLevenshtein(); - void testDiffBisect(); - void testDiffMain(); - - // MATCH TEST FUNCTIONS - void testMatchAlphabet(); - void testMatchBitap(); - void testMatchMain(); - - // PATCH TEST FUNCTIONS - void testPatchObj(); - void testPatchFromText(); - void testPatchToText(); - void testPatchAddContext(); - void testPatchMake(); - void testPatchSplitMax(); - void testPatchAddPadding(); - void testPatchApply(); - - void fromGitHubExamples(); - - private: - bool runTest(std::function test); - std::size_t numPassedTests{0}; - std::size_t numFailedTests{0}; - - // Define equality. - template - void assertEquals(const std::string &strCase, const T &lhs, const T &rhs) { - bool failed = (lhs.size() != rhs.size()); - if (!failed) { - for (auto ii = 0ULL; !failed && (ii < lhs.size()); ++ii) { - auto &&t1 = lhs[ii]; - auto &&t2 = rhs[ii]; - failed = t1 != t2; - } - } - - if (failed) { - // Build human readable description of both lists. - auto lhsString = NUtils::to_wstring(lhs, true); - auto rhsString = NUtils::to_wstring(rhs, true); - reportFailure(strCase, lhsString, rhsString); - return; - } - reportPassed(strCase); - } - - void assertEquals(const std::string &strCase, bool lhs, bool rhs); - void assertEquals(const std::string &strCase, std::size_t n1, std::size_t n2); - void assertEquals(const std::string &strCase, const std::wstring &s1, - const std::wstring &s2); - void assertEquals(const std::string &strCase, const std::string &s1, - const std::string &s2); - void assertEquals(const std::string &strCase, const Diff &d1, const Diff &d2); - void assertEquals(const std::string &strCase, const TVariant &var1, - const TVariant &var2); - void assertEquals(const std::string &strCase, const TCharPosMap &m1, - const TCharPosMap &m2); - - void assertTrue(const std::string &strCase, bool value); - void assertFalse(const std::string &strCase, bool value); - void assertEmpty(const std::string &strCase, const TStringVector &list); - - void reportFailure(const std::string &strCase, const std::wstring &expected, - const std::wstring &actual); - void reportPassed(const std::string &strCase); - -#define assertThrow(msg, COMMAND, EXCEPTION_TYPE) \ - { \ - bool exceptionTriggered = false; \ - try { \ - COMMAND; \ - assertFalse(msg, true); \ - } catch (const EXCEPTION_TYPE &ex) { \ - exceptionTriggered = true; \ - } \ - assertTrue(std::string(msg) + std::string(" - Exception triggered"), \ - exceptionTriggered); \ - } + public: + int run_all_tests(); + + // DIFF TEST FUNCTIONS + void testDiffCommonPrefix(); + void testDiffCommonSuffix(); + void testDiffCommonOverlap(); + void testDiffHalfmatch(); + void testDiffLinesToChars(); + void testDiffCharsToLines(); + void testDiffCleanupMerge(); + void testDiffCleanupSemanticLossless(); + void testDiffCleanupSemantic(); + void testDiffCleanupEfficiency(); + void testDiffPrettyHtml(); + void testDiffPrettyConsole(); + void testDiffText(); + void testDiffDelta(); + void testDiffXIndex(); + void testDiffLevenshtein(); + void testDiffBisect(); + void testDiffMain(); + + // MATCH TEST FUNCTIONS + void testMatchAlphabet(); + void testMatchBitap(); + void testMatchMain(); + + // PATCH TEST FUNCTIONS + void testPatchObj(); + void testPatchFromText(); + void testPatchToText(); + void testPatchAddContext(); + void testPatchMake(); + void testPatchSplitMax(); + void testPatchAddPadding(); + void testPatchApply(); + + void fromGitHubExamples(); + + private: + bool runTest( std::function< void() > test ); + std::size_t numPassedTests{ 0 }; + std::size_t numFailedTests{ 0 }; + + // Define equality. + template< typename T > + void assertEquals( const std::string &strCase, const T &lhs, const T &rhs ) + { + bool failed = ( lhs.size() != rhs.size() ); + if ( !failed ) + { + for ( auto ii = 0ULL; !failed && ( ii < lhs.size() ); ++ii ) + { + auto &&t1 = lhs[ ii ]; + auto &&t2 = rhs[ ii ]; + failed = t1 != t2; + } + } + + if ( failed ) + { + // Build human readable description of both lists. + auto lhsString = NUtils::to_wstring( lhs, true ); + auto rhsString = NUtils::to_wstring( rhs, true ); + reportFailure( strCase, lhsString, rhsString ); + return; + } + reportPassed( strCase ); + } + + void assertEquals( const std::string &strCase, bool lhs, bool rhs ); + void assertEquals( const std::string &strCase, std::size_t n1, std::size_t n2 ); + void assertEquals( const std::string &strCase, const std::wstring &s1, const std::wstring &s2 ); + void assertEquals( const std::string &strCase, const std::string &s1, const std::string &s2 ); + void assertEquals( const std::string &strCase, const Diff &d1, const Diff &d2 ); + void assertEquals( const std::string &strCase, const TVariant &var1, const TVariant &var2 ); + void assertEquals( const std::string &strCase, const TCharPosMap &m1, const TCharPosMap &m2 ); + + void assertTrue( const std::string &strCase, bool value ); + void assertFalse( const std::string &strCase, bool value ); + void assertEmpty( const std::string &strCase, const TStringVector &list ); + + void reportFailure( const std::string &strCase, const std::wstring &expected, const std::wstring &actual ); + void reportPassed( const std::string &strCase ); + + #define assertThrow( msg, COMMAND, EXCEPTION_TYPE ) \ + { \ + bool exceptionTriggered = false; \ + try \ + { \ + COMMAND; \ + assertFalse( msg, true ); \ + } \ + catch ( const EXCEPTION_TYPE &ex ) \ + { \ + exceptionTriggered = true; \ + } \ + assertTrue( std::string( msg ) + std::string( " - Exception triggered" ), exceptionTriggered ); \ + } #endif - public: - bool equals(const TVariant &var1, const TVariant &var2); - - template - bool equals(const T &lhs, const T &rhs) { - bool equal = (lhs.size() == rhs.size()); - for (auto ii = 0ULL; equal && (ii < lhs.size()); ++ii) { - auto &&t1 = lhs[ii]; - auto &&t2 = rhs[ii]; - equal = t1 == t2; - } - return equal; - } - diff_match_patch dmp; - - // Construct the two texts which made up the diff originally. - TStringVector diff_rebuildtexts(const TDiffVector &diffs); -}; - -#endif // DIFF_MATCH_PATCH_TEST_H + public: + bool equals( const TVariant &var1, const TVariant &var2 ); + + template< typename T > + bool equals( const T &lhs, const T &rhs ) + { + bool equal = ( lhs.size() == rhs.size() ); + for ( auto ii = 0ULL; equal && ( ii < lhs.size() ); ++ii ) + { + auto &&t1 = lhs[ ii ]; + auto &&t2 = rhs[ ii ]; + equal = t1 == t2; + } + return equal; + } + NDiffMatchPatch::diff_match_patch dmp; + + // Construct the two texts which made up the diff originally. + TStringVector diff_rebuildtexts( const NDiffMatchPatch::TDiffVector &diffs ); + }; +} +#endif // DIFF_MATCH_PATCH_TEST_H diff --git a/cpp17/diff_match_patch_test_assertEquals.cpp b/cpp17/diff_match_patch_test_assertEquals.cpp index 4e01f687..c1cfd1ac 100644 --- a/cpp17/diff_match_patch_test_assertEquals.cpp +++ b/cpp17/diff_match_patch_test_assertEquals.cpp @@ -21,132 +21,138 @@ #include "diff_match_patch.h" #include "diff_match_patch_test.h" #include "diff_match_patch_utils.h" +namespace NDiffMatchPatch +{ #ifndef USE_GTEST -void diff_match_patch_test::reportFailure(const std::string &strCase, - const std::wstring &expected, - const std::wstring &actual) { - std::cout << "FAILED : " + strCase + "\n"; - std::wcerr << " Expected: " << expected << "\n Actual: " << actual - << "\n"; - numFailedTests++; - // throw strCase; -} - -void diff_match_patch_test::reportPassed(const std::string &strCase) { - std::cout << "PASSED: " + strCase + "\n"; -} - -void diff_match_patch_test::assertEquals(const std::string &strCase, - std::size_t n1, std::size_t n2) { - if (n1 != n2) { - reportFailure(strCase, std::to_wstring(n1), std::to_wstring(n2)); - } - reportPassed(strCase); -} - -void diff_match_patch_test::assertEquals(const std::string &strCase, - const std::wstring &s1, - const std::wstring &s2) { - if (s1 != s2) { - reportFailure(strCase, s1, s2); - } - reportPassed(strCase); -} - -void diff_match_patch_test::assertEquals(const std::string &strCase, - const std::string &s1, - const std::string &s2) { - return assertEquals(strCase, NUtils::to_wstring(s1), NUtils::to_wstring(s2)); -} - -void diff_match_patch_test::assertEquals(const std::string &strCase, - const Diff &d1, const Diff &d2) { - if (d1 != d2) { - reportFailure(strCase, d1.toString(), d2.toString()); - } - reportPassed(strCase); -} - -void diff_match_patch_test::assertEquals(const std::string &strCase, - const TVariant &var1, - const TVariant &var2) { - if (var1 != var2) { - reportFailure(strCase, NUtils::to_wstring(var1), NUtils::to_wstring(var2)); - } - reportPassed(strCase); -} - -void diff_match_patch_test::assertEquals(const std::string &strCase, - const TCharPosMap &m1, - const TCharPosMap &m2) { - for (auto &&ii : m1) { - auto rhs = m2.find(ii.first); - if (rhs == m2.end()) { - reportFailure(strCase, - L"(" + NUtils::to_wstring(ii.first) + L"," + - std::to_wstring(ii.second) + L")", - L""); + void diff_match_patch_test::reportFailure( const std::string &strCase, const std::wstring &expected, const std::wstring &actual ) + { + std::cout << "FAILED : " + strCase + "\n"; + std::wcerr << " Expected: " << expected << "\n Actual: " << actual << "\n"; + numFailedTests++; + // throw strCase; } - } - - for (auto &&ii : m2) { - auto rhs = m1.find(ii.first); - if (rhs == m1.end()) { - reportFailure(strCase, - L"(" + NUtils::to_wstring(ii.first) + L"," + - std::to_wstring(ii.second) + L")", - L""); + + void diff_match_patch_test::reportPassed( const std::string &strCase ) + { + std::cout << "PASSED: " + strCase + "\n"; + } + + void diff_match_patch_test::assertEquals( const std::string &strCase, std::size_t n1, std::size_t n2 ) + { + if ( n1 != n2 ) + { + reportFailure( strCase, std::to_wstring( n1 ), std::to_wstring( n2 ) ); + } + reportPassed( strCase ); + } + + void diff_match_patch_test::assertEquals( const std::string &strCase, const std::wstring &s1, const std::wstring &s2 ) + { + if ( s1 != s2 ) + { + reportFailure( strCase, s1, s2 ); + } + reportPassed( strCase ); + } + + void diff_match_patch_test::assertEquals( const std::string &strCase, const std::string &s1, const std::string &s2 ) + { + return assertEquals( strCase, NUtils::to_wstring( s1 ), NUtils::to_wstring( s2 ) ); + } + + void diff_match_patch_test::assertEquals( const std::string &strCase, const Diff &d1, const Diff &d2 ) + { + if ( d1 != d2 ) + { + reportFailure( strCase, d1.toString(), d2.toString() ); + } + reportPassed( strCase ); + } + + void diff_match_patch_test::assertEquals( const std::string &strCase, const TVariant &var1, const TVariant &var2 ) + { + if ( var1 != var2 ) + { + reportFailure( strCase, NUtils::to_wstring( var1 ), NUtils::to_wstring( var2 ) ); + } + reportPassed( strCase ); + } + + void diff_match_patch_test::assertEquals( const std::string &strCase, const TCharPosMap &m1, const TCharPosMap &m2 ) + { + for ( auto &&ii : m1 ) + { + auto rhs = m2.find( ii.first ); + if ( rhs == m2.end() ) + { + reportFailure( strCase, L"(" + NUtils::to_wstring( ii.first ) + L"," + std::to_wstring( ii.second ) + L")", L"" ); + } + } + + for ( auto &&ii : m2 ) + { + auto rhs = m1.find( ii.first ); + if ( rhs == m1.end() ) + { + reportFailure( strCase, L"(" + NUtils::to_wstring( ii.first ) + L"," + std::to_wstring( ii.second ) + L")", L"" ); + } + } + + reportPassed( strCase ); } - } - - reportPassed(strCase); -} - -void diff_match_patch_test::assertEquals(const std::string &strCase, bool lhs, - bool rhs) { - if (lhs != rhs) { - reportFailure(strCase, NUtils::to_wstring(lhs, false), - NUtils::to_wstring(rhs, false)); - } - reportPassed(strCase); -} - -void diff_match_patch_test::assertTrue(const std::string &strCase, bool value) { - if (!value) { - reportFailure(strCase, NUtils::to_wstring(true, false), - NUtils::to_wstring(false, false)); - } - reportPassed(strCase); -} - -void diff_match_patch_test::assertFalse(const std::string &strCase, - bool value) { - if (value) { - reportFailure(strCase, NUtils::to_wstring(false, false), - NUtils::to_wstring(true, false)); - } - reportPassed(strCase); -} - -void diff_match_patch_test::assertEmpty(const std::string &strCase, - const TStringVector &list) { - if (!list.empty()) { - throw strCase; - } -} -#endif -// Construct the two texts which made up the diff originally. -diff_match_patch_test::TStringVector diff_match_patch_test::diff_rebuildtexts( - const TDiffVector &diffs) { - TStringVector text(2, std::wstring()); - for (auto &&myDiff : diffs) { - if (myDiff.operation != INSERT) { - text[0] += myDiff.text; + void diff_match_patch_test::assertEquals( const std::string &strCase, bool lhs, bool rhs ) + { + if ( lhs != rhs ) + { + reportFailure( strCase, NUtils::to_wstring( lhs, false ), NUtils::to_wstring( rhs, false ) ); + } + reportPassed( strCase ); } - if (myDiff.operation != DELETE) { - text[1] += myDiff.text; + + void diff_match_patch_test::assertTrue( const std::string &strCase, bool value ) + { + if ( !value ) + { + reportFailure( strCase, NUtils::to_wstring( true, false ), NUtils::to_wstring( false, false ) ); + } + reportPassed( strCase ); + } + + void diff_match_patch_test::assertFalse( const std::string &strCase, bool value ) + { + if ( value ) + { + reportFailure( strCase, NUtils::to_wstring( false, false ), NUtils::to_wstring( true, false ) ); + } + reportPassed( strCase ); + } + + void diff_match_patch_test::assertEmpty( const std::string &strCase, const TStringVector &list ) + { + if ( !list.empty() ) + { + throw strCase; + } + } +#endif + + // Construct the two texts which made up the diff originally. + diff_match_patch_test::TStringVector diff_match_patch_test::diff_rebuildtexts( const TDiffVector &diffs ) + { + TStringVector text( 2, std::wstring() ); + for ( auto &&myDiff : diffs ) + { + if ( !myDiff.isInsert() ) + { + text[ 0 ] += myDiff.text(); + } + + if ( !myDiff.isDelete() ) + { + text[ 1 ] += myDiff.text(); + } + } + return text; } - } - return text; -} +} \ No newline at end of file diff --git a/cpp17/diff_match_patch_test_utils.cpp b/cpp17/diff_match_patch_test_utils.cpp new file mode 100644 index 00000000..14deb663 --- /dev/null +++ b/cpp17/diff_match_patch_test_utils.cpp @@ -0,0 +1,157 @@ +/* + * Diff Match and Patch -- Test Harness + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "diff_match_patch.h" +#include "diff_match_patch_test.h" +#include "diff_match_patch_utils.h" +namespace NDiffMatchPatch +{ +#ifndef USE_GTEST + void diff_match_patch_test::reportFailure( const std::string &strCase, const std::wstring &expected, const std::wstring &actual ) + { + std::cout << "FAILED : " + strCase + "\n"; + std::wcerr << " Expected: " << expected << "\n Actual: " << actual << "\n"; + numFailedTests++; + // throw strCase; + } + + void diff_match_patch_test::reportPassed( const std::string &strCase ) + { + std::cout << "PASSED: " + strCase + "\n"; + } + + void diff_match_patch_test::assertEquals( const std::string &strCase, std::size_t n1, std::size_t n2 ) + { + if ( n1 != n2 ) + { + reportFailure( strCase, std::to_wstring( n1 ), std::to_wstring( n2 ) ); + } + reportPassed( strCase ); + } + + void diff_match_patch_test::assertEquals( const std::string &strCase, const std::wstring &s1, const std::wstring &s2 ) + { + if ( s1 != s2 ) + { + reportFailure( strCase, s1, s2 ); + } + reportPassed( strCase ); + } + + void diff_match_patch_test::assertEquals( const std::string &strCase, const std::string &s1, const std::string &s2 ) + { + return assertEquals( strCase, NUtils::to_wstring( s1 ), NUtils::to_wstring( s2 ) ); + } + + void diff_match_patch_test::assertEquals( const std::string &strCase, const Diff &d1, const Diff &d2 ) + { + if ( d1 != d2 ) + { + reportFailure( strCase, d1.toString(), d2.toString() ); + } + reportPassed( strCase ); + } + + void diff_match_patch_test::assertEquals( const std::string &strCase, const TVariant &var1, const TVariant &var2 ) + { + if ( var1 != var2 ) + { + reportFailure( strCase, NUtils::to_wstring( var1 ), NUtils::to_wstring( var2 ) ); + } + reportPassed( strCase ); + } + + void diff_match_patch_test::assertEquals( const std::string &strCase, const TCharPosMap &m1, const TCharPosMap &m2 ) + { + for ( auto &&ii : m1 ) + { + auto rhs = m2.find( ii.first ); + if ( rhs == m2.end() ) + { + reportFailure( strCase, L"(" + NUtils::to_wstring( ii.first ) + L"," + std::to_wstring( ii.second ) + L")", L"" ); + } + } + + for ( auto &&ii : m2 ) + { + auto rhs = m1.find( ii.first ); + if ( rhs == m1.end() ) + { + reportFailure( strCase, L"(" + NUtils::to_wstring( ii.first ) + L"," + std::to_wstring( ii.second ) + L")", L"" ); + } + } + + reportPassed( strCase ); + } + + void diff_match_patch_test::assertEquals( const std::string &strCase, bool lhs, bool rhs ) + { + if ( lhs != rhs ) + { + reportFailure( strCase, NUtils::to_wstring( lhs, false ), NUtils::to_wstring( rhs, false ) ); + } + reportPassed( strCase ); + } + + void diff_match_patch_test::assertTrue( const std::string &strCase, bool value ) + { + if ( !value ) + { + reportFailure( strCase, NUtils::to_wstring( true, false ), NUtils::to_wstring( false, false ) ); + } + reportPassed( strCase ); + } + + void diff_match_patch_test::assertFalse( const std::string &strCase, bool value ) + { + if ( value ) + { + reportFailure( strCase, NUtils::to_wstring( false, false ), NUtils::to_wstring( true, false ) ); + } + reportPassed( strCase ); + } + + void diff_match_patch_test::assertEmpty( const std::string &strCase, const TStringVector &list ) + { + if ( !list.empty() ) + { + throw strCase; + } + } +#endif + + // Construct the two texts which made up the diff originally. + diff_match_patch_test::TStringVector diff_match_patch_test::diff_rebuildtexts( const TDiffVector &diffs ) + { + TStringVector text( 2, std::wstring() ); + for ( auto &&myDiff : diffs ) + { + if ( !myDiff.isInsert() ) + { + text[ 0 ] += myDiff.text(); + } + if ( !myDiff.isDelete() ) + { + text[ 1 ] += myDiff.text(); + } + } + return text; + } +} \ No newline at end of file diff --git a/cpp17/diff_match_patch_utils.cpp b/cpp17/diff_match_patch_utils.cpp index dfd89519..c916edc4 100644 --- a/cpp17/diff_match_patch_utils.cpp +++ b/cpp17/diff_match_patch_utils.cpp @@ -21,148 +21,181 @@ #include #include -namespace NUtils { -std::wstring safeMid(const std::wstring &str, std::size_t pos) { - return safeMid(str, pos, std::string::npos); -} - -std::wstring safeMid(const std::wstring &str, std::size_t pos, - std::size_t len) { - return (pos == str.length()) ? std::wstring() : str.substr(pos, len); -} - -void replace(std::wstring &inString, const std::wstring &from, - const std::wstring &to) { - std::size_t pos = inString.find(from); - while (pos != std::wstring::npos) { - inString.replace(pos, from.length(), to); - pos = inString.find(from, pos + to.length()); - } -} - -wchar_t toHexUpper(wchar_t value) { return L"0123456789ABCDEF"[value & 0xF]; } - -std::wstring toPercentEncoding(wchar_t c, const std::wstring &exclude, - const std::wstring &include) { - std::wstring retVal; - - if (((c >= 0x61 && c <= 0x7A) // ALPHA - || (c >= 0x41 && c <= 0x5A) // ALPHA - || (c >= 0x30 && c <= 0x39) // DIGIT - || c == 0x2D // - - || c == 0x2E // . - || c == 0x5F // _ - || c == 0x7E // ~ - || (exclude.find(c) != std::string::npos)) && - (include.find(c) == std::string::npos)) { - retVal = std::wstring(1, c); - } else { - retVal = L'%'; - retVal += toHexUpper((c & 0xf0) >> 4); - retVal += toHexUpper(c & 0xf); - } - return retVal; -} - -std::wstring toPercentEncoding( - const std::wstring &input, const std::wstring &exclude /*= std::wstring()*/, - const std::wstring &include /*= std::wstring() */) { - if (input.empty()) return {}; - std::wstring retVal; - retVal.reserve(input.length() * 3); - - static_assert(sizeof(wchar_t) <= 4, "wchar_t is greater that 32 bit"); - - std::wstring_convert > utf8_conv; - for (auto &&c : input) { - auto currStr = std::wstring(1, c); - auto asBytes = utf8_conv.to_bytes(currStr); - for (auto &&ii : asBytes) { - if (ii) retVal += toPercentEncoding(ii, exclude, include); - } - } - return retVal; -} - -wchar_t getValue(wchar_t ch) { - if (ch >= '0' && ch <= '9') - ch -= '0'; - else if (ch >= 'a' && ch <= 'f') - ch = ch - 'a' + 10; - else if (ch >= 'A' && ch <= 'F') - ch = ch - 'A' + 10; - else - throw std::wstring(L"Invalid Character %") + ch; - - return ch; -} - -std::wstring fromPercentEncoding(const std::wstring &input) { - if (input.empty()) return {}; - std::string retVal; - retVal.reserve(input.length()); - for (auto ii = 0ULL; ii < input.length(); ++ii) { - auto c = input[ii]; - if (c == L'%' && (ii + 2) < input.length()) { - auto a = input[++ii]; - auto b = input[++ii]; - a = getValue(a); - b = getValue(b); - a = a << 4; - auto value = a | b; - retVal += std::string(1, value); - } else if (c == '+') - retVal += ' '; - else { - retVal += c; - } - } - std::wstring_convert > utf8_conv; - auto asBytes = utf8_conv.from_bytes(retVal); - - return asBytes; -} - -bool endsWith(const std::wstring &string, const std::wstring &suffix) { - if (suffix.length() > string.length()) return false; - - return string.compare(string.length() - suffix.length(), suffix.length(), - suffix) == 0; -} - -TStringVector splitString(const std::wstring &string, - const std::wstring &separator, bool skipEmptyParts) { - if (separator.empty()) { - if (!skipEmptyParts || !string.empty()) return {string}; - return {}; - } - - TStringVector strings; - auto prevPos = 0ULL; - auto startPos = string.find_first_of(separator); - while (startPos != std::string::npos) { - auto start = prevPos ? prevPos + 1 : prevPos; - auto len = prevPos ? (startPos - prevPos - 1) : startPos; - auto curr = string.substr(start, len); - prevPos = startPos; - if (!skipEmptyParts || !curr.empty()) strings.emplace_back(curr); - startPos = string.find_first_of(separator, prevPos + 1); - } - auto remainder = string.substr(prevPos ? prevPos + 1 : prevPos); - if (!skipEmptyParts || !remainder.empty()) strings.emplace_back(remainder); - - return strings; -} - -int64_t toInt(const std::wstring &string) { - int64_t retVal = 0; - try { - std::size_t lastPos{}; - retVal = std::stoul(string, &lastPos); - if (lastPos != string.length()) return 0; - } catch (...) { - } - return retVal; -} - -} // namespace NUtils +namespace NDiffMatchPatch +{ + namespace NUtils + { + std::wstring safeMid( const std::wstring &str, std::size_t pos ) + { + return safeMid( str, pos, std::string::npos ); + } + + std::wstring safeMid( const std::wstring &str, std::size_t pos, std::size_t len ) + { + return ( pos == str.length() ) ? std::wstring() : str.substr( pos, len ); + } + + void replace( std::wstring &inString, const std::wstring &from, const std::wstring &to ) + { + std::size_t pos = inString.find( from ); + while ( pos != std::wstring::npos ) + { + inString.replace( pos, from.length(), to ); + pos = inString.find( from, pos + to.length() ); + } + } + + wchar_t toHexUpper( wchar_t value ) + { + return L"0123456789ABCDEF"[ value & 0xF ]; + } + + std::wstring toPercentEncoding( wchar_t c, const std::wstring &exclude, const std::wstring &include ) + { + std::wstring retVal; + + if ( ( ( c >= 0x61 && c <= 0x7A ) // ALPHA + || ( c >= 0x41 && c <= 0x5A ) // ALPHA + || ( c >= 0x30 && c <= 0x39 ) // DIGIT + || c == 0x2D // - + || c == 0x2E // . + || c == 0x5F // _ + || c == 0x7E // ~ + || ( exclude.find( c ) != std::string::npos ) ) + && ( include.find( c ) == std::string::npos ) ) + { + retVal = std::wstring( 1, c ); + } + else + { + retVal = L'%'; + retVal += toHexUpper( ( c & 0xf0 ) >> 4 ); + retVal += toHexUpper( c & 0xf ); + } + return retVal; + } + + std::wstring toPercentEncoding( const std::wstring &input, const std::wstring &exclude /*= std::wstring()*/, const std::wstring &include /*= std::wstring() */ ) + { + if ( input.empty() ) + return {}; + std::wstring retVal; + retVal.reserve( input.length() * 3 ); + + static_assert( sizeof( wchar_t ) <= 4, "wchar_t is greater that 32 bit" ); + + std::wstring_convert< std::codecvt_utf8< wchar_t > > utf8_conv; + for ( auto &&c : input ) + { + auto currStr = std::wstring( 1, c ); + auto asBytes = utf8_conv.to_bytes( currStr ); + for ( auto &&ii : asBytes ) + { + if ( ii ) + retVal += toPercentEncoding( ii, exclude, include ); + } + } + return retVal; + } + + wchar_t getValue( wchar_t ch ) + { + if ( ch >= '0' && ch <= '9' ) + ch -= '0'; + else if ( ch >= 'a' && ch <= 'f' ) + ch = ch - 'a' + 10; + else if ( ch >= 'A' && ch <= 'F' ) + ch = ch - 'A' + 10; + else + throw std::wstring( L"Invalid Character %" ) + ch; + + return ch; + } + + std::wstring fromPercentEncoding( const std::wstring &input ) + { + if ( input.empty() ) + return {}; + std::string retVal; + retVal.reserve( input.length() ); + for ( auto ii = 0ULL; ii < input.length(); ++ii ) + { + auto c = input[ ii ]; + if ( c == L'%' && ( ii + 2 ) < input.length() ) + { + auto a = input[ ++ii ]; + auto b = input[ ++ii ]; + a = getValue( a ); + b = getValue( b ); + a = a << 4; + auto value = a | b; + retVal += std::string( 1, value ); + } + else if ( c == '+' ) + retVal += ' '; + else + { + retVal += c; + } + } + std::wstring_convert< std::codecvt_utf8< wchar_t > > utf8_conv; + auto asBytes = utf8_conv.from_bytes( retVal ); + + return asBytes; + } + + bool endsWith( const std::wstring &string, const std::wstring &suffix ) + { + if ( suffix.length() > string.length() ) + return false; + + return string.compare( string.length() - suffix.length(), suffix.length(), suffix ) == 0; + } + + TStringVector splitString( const std::wstring &string, const std::wstring &separator, bool skipEmptyParts ) + { + if ( separator.empty() ) + { + if ( !skipEmptyParts || !string.empty() ) + return { string }; + return {}; + } + + TStringVector strings; + auto prevPos = 0ULL; + auto startPos = string.find_first_of( separator ); + while ( startPos != std::string::npos ) + { + auto start = prevPos ? prevPos + 1 : prevPos; + auto len = prevPos ? ( startPos - prevPos - 1 ) : startPos; + auto curr = string.substr( start, len ); + prevPos = startPos; + if ( !skipEmptyParts || !curr.empty() ) + strings.emplace_back( curr ); + startPos = string.find_first_of( separator, prevPos + 1 ); + } + auto remainder = string.substr( prevPos ? prevPos + 1 : prevPos ); + if ( !skipEmptyParts || !remainder.empty() ) + strings.emplace_back( remainder ); + + return strings; + } + + int64_t toInt( const std::wstring &string ) + { + int64_t retVal = 0; + try + { + std::size_t lastPos{}; + retVal = std::stoul( string, &lastPos ); + if ( lastPos != string.length() ) + return 0; + } + catch ( ... ) + { + } + return retVal; + } + + } // namespace NUtils +} \ No newline at end of file diff --git a/cpp17/diff_match_patch_utils.h b/cpp17/diff_match_patch_utils.h index 7362a32f..12a1dfce 100644 --- a/cpp17/diff_match_patch_utils.h +++ b/cpp17/diff_match_patch_utils.h @@ -23,23 +23,26 @@ #include #include #include -namespace NUtils { -using TStringVector = std::vector; +namespace NDiffMatchPatch +{ + namespace NUtils + { + using TStringVector = std::vector< std::wstring >; -/* + /* * Utility functions to replace Qt built in methods */ -/** + /** * A safer version of std::wstring.mid(pos). This one returns "" instead of * null when the postion equals the string length. * @param str String to take a substring from. * @param pos Position to start the substring from. * @return Substring. */ -std::wstring safeMid(const std::wstring &str, std::size_t pos); + std::wstring safeMid( const std::wstring &str, std::size_t pos ); -/** + /** * A safer version of std::wstring.mid(pos, len). This one returns "" instead * of null when the postion equals the string length. * @param str String to take a substring from. @@ -47,82 +50,76 @@ std::wstring safeMid(const std::wstring &str, std::size_t pos); * @param len Length of substring. * @return Substring. */ -std::wstring safeMid(const std::wstring &str, std::size_t pos, std::size_t len); + std::wstring safeMid( const std::wstring &str, std::size_t pos, std::size_t len ); -/** + /** * replaces QString::replace * @param haystack String to replace all needles with to * @param needle Substring to search for in the haystack * @param to replacement string * @return void. */ -void replace(std::wstring &haystack, const std::wstring &needle, - const std::wstring &to); + void replace( std::wstring &haystack, const std::wstring &needle, const std::wstring &to ); -/** + /** * replaces returns the html percent encoded character equivalent * @param c the input Character to return the encoded string of * @param exclude The list of chars that are NOT to be encoded * @param include The list of chars that are to be encoded * @return the encoded string */ -std::wstring toPercentEncoding(wchar_t c, - const std::wstring &exclude = std::wstring(), - const std::wstring &include = std::wstring()); + std::wstring toPercentEncoding( wchar_t c, const std::wstring &exclude = std::wstring(), const std::wstring &include = std::wstring() ); -/** + /** * return the html percent encoded string equivalent * @param input the input String to return the encoded string of * @param exclude The list of chars that are NOT to be encoded * @param include The list of chars that are to be encoded * @return the encoded string */ -std::wstring toPercentEncoding(const std::wstring &input, - const std::wstring &exclude = std::wstring(), - const std::wstring &include = std::wstring()); + std::wstring toPercentEncoding( const std::wstring &input, const std::wstring &exclude = std::wstring(), const std::wstring &include = std::wstring() ); -/** + /** * returns the string equivalent removing any percent encoding and replacing it * with the correct character * @param input the input String to return the encoded string of * @return the decoded string */ -std::wstring fromPercentEncoding(const std::wstring &input); + std::wstring fromPercentEncoding( const std::wstring &input ); -/** + /** * replaces returns integer value of the character, '0'-'9' = 0-9, 'A'-'F' = * 10-15, 'a'-'f' = 10-15 * @param input the value to return the integer value of * @return the integer value of the character */ -wchar_t getIntValue(wchar_t ch); + wchar_t getIntValue( wchar_t ch ); -/** + /** * return the integer value of the string * @param string the String to be converted to an integer * @return the integer version, on an invalid input returns 0 */ -int64_t toInt(const std::wstring &string); + int64_t toInt( const std::wstring &string ); -/** + /** * return true if the string has the suffix * @param string the String to check to see if it ends with suffix * @param suffix the String to see if the input string ends with * @return True if the string ends with suffix */ -bool endsWith(const std::wstring &string, const std::wstring &suffix); + bool endsWith( const std::wstring &string, const std::wstring &suffix ); -/** + /** * return a TStringVector of the string split by separator * @param string the String to be split * @param separator the String to search in the input string to split on * @param if true, empty values will be removed * @return the split string */ -TStringVector splitString(const std::wstring &string, - const std::wstring &separator, bool skipEmptyParts); + TStringVector splitString( const std::wstring &string, const std::wstring &separator, bool skipEmptyParts ); -/** + /** * splices the objects vector into the input vector * @param input The input vector to splice out from * @param start The position of the first item to remove from the input vector @@ -131,19 +128,17 @@ TStringVector splitString(const std::wstring &string, * removed * @return the character as a single character string */ -template -static std::vector Splice(std::vector &input, std::size_t start, - std::size_t count, - const std::vector &objects = {}) { - auto deletedRange = - std::vector({input.begin() + start, input.begin() + start + count}); - input.erase(input.begin() + start, input.begin() + start + count); - input.insert(input.begin() + start, objects.begin(), objects.end()); - - return deletedRange; -} + template< typename T > + static std::vector< T > Splice( std::vector< T > &input, std::size_t start, std::size_t count, const std::vector< T > &objects = {} ) + { + auto deletedRange = std::vector< T >( { input.begin() + start, input.begin() + start + count } ); + input.erase( input.begin() + start, input.begin() + start + count ); + input.insert( input.begin() + start, objects.begin(), objects.end() ); + + return deletedRange; + } -/** + /** * splices the objects vector into the input vector * @param input The input vector to splice out from * @param start The position of the first item to remove from the input vector @@ -152,135 +147,203 @@ static std::vector Splice(std::vector &input, std::size_t start, * removed * @return the character as a single character string */ -template -static std::vector Splice(std::vector &input, std::size_t start, - std::size_t count, const T &object) { - return Splice(input, start, count, std::vector({object})); -} - -template -std::wstring to_wstring(const T & /*value*/, bool /*doubleQuoteEmpty*/) { - assert(false); - return {}; -} - -/** - * return the single character wide string for the given character - * @param value the char to be converted to an wstring - * @param doubleQuoteEmpty, if the return value would be empty, return "" - * @return the character as a single character string - */ -inline std::wstring to_wstring(const char &value, bool doubleQuoteEmpty) { - if (doubleQuoteEmpty && (value == 0)) return LR"("")"; + template< typename T1 > + using base_type = typename std::remove_reference< std::remove_cv_t< std::decay_t< T1 > > >; - return std::wstring(1, static_cast(value)); -} + template< typename T1 > + using base_type_t = typename base_type< T1 >::type; -template <> -inline std::wstring to_wstring(const bool &value, bool /*doubleQuoteOnEmpty*/) { - std::wstring retVal = std::wstring(value ? L"true" : L"false"); - return retVal; -} + template< typename STRING_TYPE > + using is_wstring = typename std::is_same< std::wstring, base_type_t< STRING_TYPE > >; -template <> -inline std::wstring to_wstring(const std::vector::reference &value, - bool /*doubleQuoteOnEmpty*/) { - std::wstring retVal = std::wstring(value ? L"true" : L"false"); - return retVal; -} + template< typename STRING_TYPE > + using is_wcharstar = typename std::is_same< wchar_t *, base_type_t< STRING_TYPE > >; -template <> -inline std::wstring to_wstring(const std::string &string, - bool doubleQuoteEmpty) { - if (doubleQuoteEmpty && string.empty()) return LR"("")"; + template< typename STRING_TYPE > + using is_string = typename std::is_same< std::string, base_type_t< STRING_TYPE > >; - std::wstring wstring(string.size(), - L' '); // Overestimate number of code points. - wstring.resize(std::mbstowcs(&wstring[0], string.c_str(), - string.size())); // Shrink to fit. - return wstring; -} + template< typename STRING_TYPE > + using is_charstar = typename std::is_same< char *, base_type_t< STRING_TYPE > >; -template <> -inline std::wstring to_wstring(const wchar_t &value, bool doubleQuoteEmpty) { - if (doubleQuoteEmpty && (value == 0)) return LR"("")"; + template< typename T > + static std::vector< T > Splice( std::vector< T > &input, std::size_t start, std::size_t count, const T &object ) + { + return Splice( input, start, count, std::vector< T >( { object } ) ); + } - return std::wstring(1, value); -} + template< typename T > + std::wstring to_wstring( const T & /*value*/, bool /*doubleQuoteEmpty*/ ) + { + assert( false ); + return {}; + } -template <> -inline std::wstring to_wstring(const int &value, bool doubleQuoteEmpty) { - return to_wstring(static_cast(value), doubleQuoteEmpty); -} - -template <> -inline std::wstring to_wstring(const std::wstring &value, - bool doubleQuoteEmpty) { - if (doubleQuoteEmpty && value.empty()) return LR"("")"; - - return value; -} - -template -inline std::wstring to_wstring(const std::vector &values, - bool doubleQuoteEmpty) { - std::wstring retVal = L"("; - bool first = true; - for (auto &&curr : values) { - if (!first) { - retVal += L", "; - } - retVal += to_wstring(curr, doubleQuoteEmpty); - first = false; - } - retVal += L")"; - return retVal; -} - -template <> -inline std::wstring to_wstring(const std::vector &boolArray, - bool doubleQuoteOnEmpty) { - std::wstring retVal; - for (auto &&curr : boolArray) { - retVal += L"\t" + to_wstring(curr, doubleQuoteOnEmpty); - } - return retVal; -} - -template -inline typename std::enable_if_t, std::wstring> -to_wstring(const std::initializer_list &values, - bool doubleQuoteEmpty = false) { - if (doubleQuoteEmpty && (values.size() == 0)) return LR"(\"\")"; - - std::wstring retVal; - for (auto &&curr : values) { - retVal += to_wstring(curr, false); - } - return retVal; -} + /** + * return the single character wide string for the given character + * @param value the char to be converted to an wstring + * @param doubleQuoteEmpty, if the return value would be empty, return "" + * @return the character as a single character string + */ + inline std::wstring to_wstring( const char &value, bool doubleQuoteEmpty ) + { + if ( doubleQuoteEmpty && ( value == 0 ) ) + return LR"("")"; + + return std::wstring( 1, static_cast< wchar_t >( value ) ); + } + + template<> + inline std::wstring to_wstring( const bool &value, bool /*doubleQuoteOnEmpty*/ ) + { + std::wstring retVal = std::wstring( value ? L"true" : L"false" ); + return retVal; + } + + template<> + inline std::wstring to_wstring( const std::vector< bool >::reference &value, bool /*doubleQuoteOnEmpty*/ ) + { + std::wstring retVal = std::wstring( value ? L"true" : L"false" ); + return retVal; + } + + template<> + inline std::wstring to_wstring( const std::string &string, bool doubleQuoteEmpty ) + { + if ( doubleQuoteEmpty && string.empty() ) + return LR"("")"; + + std::wstring wstring( string.size(), + L' ' ); // Overestimate number of code points. + wstring.resize( std::mbstowcs( &wstring[ 0 ], string.c_str(), + string.size() ) ); // Shrink to fit. + return wstring; + } + + template<> + inline std::wstring to_wstring( const wchar_t &value, bool doubleQuoteEmpty ) + { + if ( doubleQuoteEmpty && ( value == 0 ) ) + return LR"("")"; + + return std::wstring( 1, value ); + } + + template<> + inline std::wstring to_wstring( const int &value, bool doubleQuoteEmpty ) + { + return to_wstring( static_cast< wchar_t >( value ), doubleQuoteEmpty ); + } + + template<> + inline std::wstring to_wstring( const std::wstring &value, bool doubleQuoteEmpty ) + { + if ( doubleQuoteEmpty && value.empty() ) + return LR"("")"; + + return value; + } + + template< typename T > + inline std::wstring to_wstring( const std::vector< T > &values, bool doubleQuoteEmpty ) + { + std::wstring retVal = L"("; + bool first = true; + for ( auto &&curr : values ) + { + if ( !first ) + { + retVal += L", "; + } + retVal += to_wstring( curr, doubleQuoteEmpty ); + first = false; + } + retVal += L")"; + return retVal; + } + + template<> + inline std::wstring to_wstring( const std::vector< bool > &boolArray, bool doubleQuoteOnEmpty ) + { + if ( boolArray.empty() && doubleQuoteOnEmpty ) + return LR"("")"; + std::wstring retVal = L"("; + bool first = true; + for ( auto &&curr : boolArray ) + { + if ( !first ) + retVal += L", "; + first = false; + retVal += to_wstring( curr, doubleQuoteOnEmpty ); + } + retVal += L")"; + return retVal; + } + + template< typename T > + inline typename std::enable_if_t< std::is_integral_v< T >, std::wstring > to_wstring( const std::initializer_list< T > &values, bool doubleQuoteEmpty ) + { + if ( doubleQuoteEmpty && ( values.size() == 0 ) ) + return LR"("")"; + + std::wstring retVal; + for ( auto &&curr : values ) + { + retVal += to_wstring( curr, false ); + } + return retVal; + } + + template< typename T > + inline typename std::enable_if_t< !std::is_integral_v< T >, std::wstring > to_wstring( const std::initializer_list< T > &values, bool doubleQuoteEmpty ) + { + std::wstring retVal = L"("; + bool first = true; + for ( auto &&curr : values ) + { + if ( !first ) + { + retVal += L", "; + } + retVal += to_wstring( curr, doubleQuoteEmpty ); + first = false; + } + retVal += L")"; + return retVal; + } + + //template< typename T > + //std::wstring to_wstring( const T &value ) + //{ + // return to_wstring( value, false ); + //} + + template< typename STRING_TYPE > + std::wstring to_wstring( const STRING_TYPE &string ) + { + static_assert( + is_wstring< STRING_TYPE >::value // + || is_wcharstar< STRING_TYPE >::value // + || is_string< STRING_TYPE >::value // + || is_charstar< STRING_TYPE >::value // + || std::is_same_v< char, STRING_TYPE > // + || std::is_same_v< wchar_t, STRING_TYPE > // + ); + + if constexpr ( is_wstring< STRING_TYPE >::value ) + return string; + else if constexpr ( is_wcharstar< STRING_TYPE >::value ) + return std::wstring( string ? string : L"" ); + else if constexpr ( is_string< STRING_TYPE >::value ) + return to_wstring( string, false ); + else if constexpr ( is_charstar< STRING_TYPE >::value ) + return to_wstring( std::string( string ? string : "" ), false ); + else if constexpr( std::is_same_v< char, STRING_TYPE > ) + return to_wstring( string, false ); + else if constexpr( std::is_same_v< wchar_t, STRING_TYPE > ) + return to_wstring( string, false ); + } -template -inline typename std::enable_if_t, std::wstring> -to_wstring(const std::initializer_list &values, - bool doubleQuoteEmpty = false) { - std::wstring retVal = L"("; - bool first = true; - for (auto &&curr : values) { - if (!first) { - retVal += L", "; } - retVal += to_wstring(curr, doubleQuoteEmpty); - first = false; - } - retVal += L")"; - return retVal; -} - -template -std::wstring to_wstring(const T &value) { - return to_wstring(value, false); } -}; // namespace NUtils #endif From be7ccecc11d5371ce986aefccbc701ee036bc715 Mon Sep 17 00:00:00 2001 From: Scott Aron Bloom Date: Tue, 14 Jan 2025 09:14:35 -0800 Subject: [PATCH 14/15] Reformating --- cpp17/diff_match_patch.cpp | 3 +-- cpp17/diff_match_patch_test.cpp | 2 +- cpp17/diff_match_patch_test.h | 1 + cpp17/diff_match_patch_test_assertEquals.cpp | 2 +- cpp17/diff_match_patch_utils.cpp | 2 +- cpp17/diff_match_patch_utils.h | 2 ++ 6 files changed, 7 insertions(+), 5 deletions(-) diff --git a/cpp17/diff_match_patch.cpp b/cpp17/diff_match_patch.cpp index eaeb8cdf..b410845f 100644 --- a/cpp17/diff_match_patch.cpp +++ b/cpp17/diff_match_patch.cpp @@ -72,7 +72,6 @@ namespace NDiffMatchPatch std::wstring Diff::toString( EStringType stringType ) const { std::wstring retVal; - if ( stringType == EStringType::ePatch ) { switch ( fOperation ) @@ -2394,4 +2393,4 @@ namespace NDiffMatchPatch return LR"("")"; return retVal; } -} \ No newline at end of file +} diff --git a/cpp17/diff_match_patch_test.cpp b/cpp17/diff_match_patch_test.cpp index efd9c2fc..a71faeb5 100644 --- a/cpp17/diff_match_patch_test.cpp +++ b/cpp17/diff_match_patch_test.cpp @@ -1106,4 +1106,4 @@ namespace NDiffMatchPatch L".\n"; assertEquals( "gitHubDemos", patchGolden, patch ); } -} \ No newline at end of file +} diff --git a/cpp17/diff_match_patch_test.h b/cpp17/diff_match_patch_test.h index b808b26a..7114ecf4 100644 --- a/cpp17/diff_match_patch_test.h +++ b/cpp17/diff_match_patch_test.h @@ -171,4 +171,5 @@ namespace NDiffMatchPatch TStringVector diff_rebuildtexts( const NDiffMatchPatch::TDiffVector &diffs ); }; } + #endif // DIFF_MATCH_PATCH_TEST_H diff --git a/cpp17/diff_match_patch_test_assertEquals.cpp b/cpp17/diff_match_patch_test_assertEquals.cpp index c1cfd1ac..0ae6453d 100644 --- a/cpp17/diff_match_patch_test_assertEquals.cpp +++ b/cpp17/diff_match_patch_test_assertEquals.cpp @@ -155,4 +155,4 @@ namespace NDiffMatchPatch } return text; } -} \ No newline at end of file +} diff --git a/cpp17/diff_match_patch_utils.cpp b/cpp17/diff_match_patch_utils.cpp index c916edc4..324fc526 100644 --- a/cpp17/diff_match_patch_utils.cpp +++ b/cpp17/diff_match_patch_utils.cpp @@ -198,4 +198,4 @@ namespace NDiffMatchPatch } } // namespace NUtils -} \ No newline at end of file +} diff --git a/cpp17/diff_match_patch_utils.h b/cpp17/diff_match_patch_utils.h index 12a1dfce..326abf5f 100644 --- a/cpp17/diff_match_patch_utils.h +++ b/cpp17/diff_match_patch_utils.h @@ -53,6 +53,7 @@ namespace NDiffMatchPatch std::wstring safeMid( const std::wstring &str, std::size_t pos, std::size_t len ); /** + * replaces QString::replace * @param haystack String to replace all needles with to * @param needle Substring to search for in the haystack @@ -346,4 +347,5 @@ namespace NDiffMatchPatch } } + #endif From 173157beb9058f9a8e9cbd5ff976afad76b57b78 Mon Sep 17 00:00:00 2001 From: Scott Aron Bloom Date: Tue, 14 Jan 2025 10:19:22 -0800 Subject: [PATCH 15/15] Formatting --- cpp/diff_match_patch.cpp | 3925 ++++++++++-------- cpp/diff_match_patch.h | 423 +- cpp/diff_match_patch_test.cpp | 1953 ++++----- cpp/diff_match_patch_test.h | 117 +- cpp17/diff_match_patch_test_assertEquals.cpp | 2 +- cpp17/diff_match_patch_utils.h | 5 +- objectivec/DiffMatchPatchCFUtilities.h | 38 +- objectivec/MinMaxMacros.h | 34 +- 8 files changed, 3417 insertions(+), 3080 deletions(-) diff --git a/cpp/diff_match_patch.cpp b/cpp/diff_match_patch.cpp index 64f270c3..3e93ac95 100644 --- a/cpp/diff_match_patch.cpp +++ b/cpp/diff_match_patch.cpp @@ -23,50 +23,52 @@ #include #include "diff_match_patch.h" - ////////////////////////// // // Diff Class // ////////////////////////// - /** * Constructor. Initializes the diff with the provided values. * @param operation One of INSERT, DELETE or EQUAL * @param text The text being applied */ -Diff::Diff(Operation _operation, const QString &_text) : - operation(_operation), text(_text) { - // Construct a diff with the specified operation and text. +Diff::Diff( Operation _operation, const QString &_text ) : + operation( _operation ), + text( _text ) +{ + // Construct a diff with the specified operation and text. } -Diff::Diff() { +Diff::Diff() +{ } - -QString Diff::strOperation(Operation op) { - switch (op) { - case INSERT: - return "INSERT"; - case DELETE: - return "DELETE"; - case EQUAL: - return "EQUAL"; - } - throw "Invalid operation."; +QString Diff::strOperation( Operation op ) +{ + switch ( op ) + { + case INSERT: + return "INSERT"; + case DELETE: + return "DELETE"; + case EQUAL: + return "EQUAL"; + } + throw "Invalid operation."; } /** * Display a human-readable version of this Diff. * @return text version */ -QString Diff::toString() const { - QString prettyText = text; - // Replace linebreaks with Pilcrow signs. - prettyText.replace('\n', L'\u00b6'); - return QString("Diff(") + strOperation(operation) + QString(",\"") - + prettyText + QString("\")"); +QString Diff::toString() const +{ + QString prettyText = text; + // Replace linebreaks with Pilcrow signs. + prettyText.replace( '\n', L'\u00b6' ); + return QString( "Diff(" ) + strOperation( operation ) + QString( ",\"" ) + prettyText + QString( "\")" ); } /** @@ -74,86 +76,97 @@ QString Diff::toString() const { * @param d Another Diff to compare against * @return true or false */ -bool Diff::operator==(const Diff &d) const { - return (d.operation == this->operation) && (d.text == this->text); +bool Diff::operator==( const Diff &d ) const +{ + return ( d.operation == this->operation ) && ( d.text == this->text ); } -bool Diff::operator!=(const Diff &d) const { - return !(operator == (d)); +bool Diff::operator!=( const Diff &d ) const +{ + return !( operator==( d ) ); } - ///////////////////////////////////////////// // // Patch Class // ///////////////////////////////////////////// - /** * Constructor. Initializes with an empty list of diffs. */ Patch::Patch() : - start1(0), start2(0), - length1(0), length2(0) { + start1( 0 ), + start2( 0 ), + length1( 0 ), + length2( 0 ) +{ } -bool Patch::isNull() const { - if (start1 == 0 && start2 == 0 && length1 == 0 && length2 == 0 - && diffs.size() == 0) { - return true; - } - return false; +bool Patch::isNull() const +{ + if ( start1 == 0 && start2 == 0 && length1 == 0 && length2 == 0 && diffs.size() == 0 ) + { + return true; + } + return false; } - /** * Emulate GNU diff's format. * Header: @@ -382,8 +481,9 @@ * Indices are printed as 1-based, not 0-based. * @return The GNU diff string */ -QString Patch::toString() { - QString coords1, coords2; - if (length1 == 0) { - coords1 = QString::number(start1) + QString(",0"); - } else if (length1 == 1) { - coords1 = QString::number(start1 + 1); - } else { - coords1 = QString::number(start1 + 1) + QString(",") - + QString::number(length1); - } - if (length2 == 0) { - coords2 = QString::number(start2) + QString(",0"); - } else if (length2 == 1) { - coords2 = QString::number(start2 + 1); - } else { - coords2 = QString::number(start2 + 1) + QString(",") - + QString::number(length2); - } - QString text; - text = QString("@@ -") + coords1 + QString(" +") + coords2 - + QString(" @@\n"); - // Escape the body of the patch with %xx notation. - foreach (Diff aDiff, diffs) { - switch (aDiff.operation) { - case INSERT: - text += QString('+'); - break; - case DELETE: - text += QString('-'); - break; - case EQUAL: - text += QString(' '); - break; - } - text += QString(QUrl::toPercentEncoding(aDiff.text, " !~*'();/?:@&=+$,#")) - + QString("\n"); - } - - return text; -} +QString Patch::toString() +{ + QString coords1, coords2; + if ( length1 == 0 ) + { + coords1 = QString::number( start1 ) + QString( ",0" ); + } + else if ( length1 == 1 ) + { + coords1 = QString::number( start1 + 1 ); + } + else + { + coords1 = QString::number( start1 + 1 ) + QString( "," ) + QString::number( length1 ); + } + if ( length2 == 0 ) + { + coords2 = QString::number( start2 ) + QString( ",0" ); + } + else if ( length2 == 1 ) + { + coords2 = QString::number( start2 + 1 ); + } + else + { + coords2 = QString::number( start2 + 1 ) + QString( "," ) + QString::number( length2 ); + } + QString text; + text = QString( "@@ -" ) + coords1 + QString( " +" ) + coords2 + QString( " @@\n" ); + // Escape the body of the patch with %xx notation. + foreach( Diff aDiff, diffs ) + { + switch ( aDiff.operation ) + { + case INSERT: + text += QString( '+' ); + break; + case DELETE: + text += QString( '-' ); + break; + case EQUAL: + text += QString( ' ' ); + break; + } + text += QString( QUrl::toPercentEncoding( aDiff.text, " !~*'();/?:@&=+$,#" ) ) + QString( "\n" ); + } + return text; +} ///////////////////////////////////////////// // @@ -162,870 +175,974 @@ QString Patch::toString() { ///////////////////////////////////////////// diff_match_patch::diff_match_patch() : - Diff_Timeout(1.0f), - Diff_EditCost(4), - Match_Threshold(0.5f), - Match_Distance(1000), - Patch_DeleteThreshold(0.5f), - Patch_Margin(4), - Match_MaxBits(32) { + Diff_Timeout( 1.0f ), + Diff_EditCost( 4 ), + Match_Threshold( 0.5f ), + Match_Distance( 1000 ), + Patch_DeleteThreshold( 0.5f ), + Patch_Margin( 4 ), + Match_MaxBits( 32 ) +{ } - -QList diff_match_patch::diff_main(const QString &text1, - const QString &text2) { - return diff_main(text1, text2, true); +QList< Diff > diff_match_patch::diff_main( const QString &text1, const QString &text2 ) +{ + return diff_main( text1, text2, true ); } -QList diff_match_patch::diff_main(const QString &text1, - const QString &text2, bool checklines) { - // Set a deadline by which time the diff must be complete. - clock_t deadline; - if (Diff_Timeout <= 0) { - deadline = std::numeric_limits::max(); - } else { - deadline = clock() + (clock_t)(Diff_Timeout * CLOCKS_PER_SEC); - } - return diff_main(text1, text2, checklines, deadline); +QList< Diff > diff_match_patch::diff_main( const QString &text1, const QString &text2, bool checklines ) +{ + // Set a deadline by which time the diff must be complete. + clock_t deadline; + if ( Diff_Timeout <= 0 ) + { + deadline = std::numeric_limits< clock_t >::max(); + } + else + { + deadline = clock() + (clock_t)( Diff_Timeout * CLOCKS_PER_SEC ); + } + return diff_main( text1, text2, checklines, deadline ); } -QList diff_match_patch::diff_main(const QString &text1, - const QString &text2, bool checklines, clock_t deadline) { - // Check for null inputs. - if (text1.isNull() || text2.isNull()) { - throw "Null inputs. (diff_main)"; - } +QList< Diff > diff_match_patch::diff_main( const QString &text1, const QString &text2, bool checklines, clock_t deadline ) +{ + // Check for null inputs. + if ( text1.isNull() || text2.isNull() ) + { + throw "Null inputs. (diff_main)"; + } + + // Check for equality (speedup). + QList< Diff > diffs; + if ( text1 == text2 ) + { + if ( !text1.isEmpty() ) + { + diffs.append( Diff( EQUAL, text1 ) ); + } + return diffs; + } - // Check for equality (speedup). - QList diffs; - if (text1 == text2) { - if (!text1.isEmpty()) { - diffs.append(Diff(EQUAL, text1)); + // Trim off common prefix (speedup). + int commonlength = diff_commonPrefix( text1, text2 ); + const QString &commonprefix = text1.left( commonlength ); + QString textChopped1 = text1.mid( commonlength ); + QString textChopped2 = text2.mid( commonlength ); + + // Trim off common suffix (speedup). + commonlength = diff_commonSuffix( textChopped1, textChopped2 ); + const QString &commonsuffix = textChopped1.right( commonlength ); + textChopped1 = textChopped1.left( textChopped1.length() - commonlength ); + textChopped2 = textChopped2.left( textChopped2.length() - commonlength ); + + // Compute the diff on the middle block. + diffs = diff_compute( textChopped1, textChopped2, checklines, deadline ); + + // Restore the prefix and suffix. + if ( !commonprefix.isEmpty() ) + { + diffs.prepend( Diff( EQUAL, commonprefix ) ); } + if ( !commonsuffix.isEmpty() ) + { + diffs.append( Diff( EQUAL, commonsuffix ) ); + } + + diff_cleanupMerge( diffs ); + return diffs; - } - - // Trim off common prefix (speedup). - int commonlength = diff_commonPrefix(text1, text2); - const QString &commonprefix = text1.left(commonlength); - QString textChopped1 = text1.mid(commonlength); - QString textChopped2 = text2.mid(commonlength); - - // Trim off common suffix (speedup). - commonlength = diff_commonSuffix(textChopped1, textChopped2); - const QString &commonsuffix = textChopped1.right(commonlength); - textChopped1 = textChopped1.left(textChopped1.length() - commonlength); - textChopped2 = textChopped2.left(textChopped2.length() - commonlength); - - // Compute the diff on the middle block. - diffs = diff_compute(textChopped1, textChopped2, checklines, deadline); - - // Restore the prefix and suffix. - if (!commonprefix.isEmpty()) { - diffs.prepend(Diff(EQUAL, commonprefix)); - } - if (!commonsuffix.isEmpty()) { - diffs.append(Diff(EQUAL, commonsuffix)); - } - - diff_cleanupMerge(diffs); - - return diffs; } +QList< Diff > diff_match_patch::diff_compute( QString text1, QString text2, bool checklines, clock_t deadline ) +{ + QList< Diff > diffs; -QList diff_match_patch::diff_compute(QString text1, QString text2, - bool checklines, clock_t deadline) { - QList diffs; + if ( text1.isEmpty() ) + { + // Just add some text (speedup). + diffs.append( Diff( INSERT, text2 ) ); + return diffs; + } - if (text1.isEmpty()) { - // Just add some text (speedup). - diffs.append(Diff(INSERT, text2)); - return diffs; - } + if ( text2.isEmpty() ) + { + // Just delete some text (speedup). + diffs.append( Diff( DELETE, text1 ) ); + return diffs; + } - if (text2.isEmpty()) { - // Just delete some text (speedup). - diffs.append(Diff(DELETE, text1)); - return diffs; - } + { + const QString longtext = text1.length() > text2.length() ? text1 : text2; + const QString shorttext = text1.length() > text2.length() ? text2 : text1; + const int i = longtext.indexOf( shorttext ); + if ( i != -1 ) + { + // Shorter text is inside the longer text (speedup). + const Operation op = ( text1.length() > text2.length() ) ? DELETE : INSERT; + diffs.append( Diff( op, longtext.left( i ) ) ); + diffs.append( Diff( EQUAL, shorttext ) ); + diffs.append( Diff( op, safeMid( longtext, i + shorttext.length() ) ) ); + return diffs; + } - { - const QString longtext = text1.length() > text2.length() ? text1 : text2; - const QString shorttext = text1.length() > text2.length() ? text2 : text1; - const int i = longtext.indexOf(shorttext); - if (i != -1) { - // Shorter text is inside the longer text (speedup). - const Operation op = (text1.length() > text2.length()) ? DELETE : INSERT; - diffs.append(Diff(op, longtext.left(i))); - diffs.append(Diff(EQUAL, shorttext)); - diffs.append(Diff(op, safeMid(longtext, i + shorttext.length()))); - return diffs; - } - - if (shorttext.length() == 1) { - // Single character string. - // After the previous speedup, the character can't be an equality. - diffs.append(Diff(DELETE, text1)); - diffs.append(Diff(INSERT, text2)); - return diffs; - } - // Garbage collect longtext and shorttext by scoping out. - } - - // Check to see if the problem can be split in two. - const QStringList hm = diff_halfMatch(text1, text2); - if (hm.count() > 0) { - // A half-match was found, sort out the return data. - const QString text1_a = hm[0]; - const QString text1_b = hm[1]; - const QString text2_a = hm[2]; - const QString text2_b = hm[3]; - const QString mid_common = hm[4]; - // Send both pairs off for separate processing. - const QList diffs_a = diff_main(text1_a, text2_a, - checklines, deadline); - const QList diffs_b = diff_main(text1_b, text2_b, - checklines, deadline); - // Merge the results. - diffs = diffs_a; - diffs.append(Diff(EQUAL, mid_common)); - diffs += diffs_b; - return diffs; - } + if ( shorttext.length() == 1 ) + { + // Single character string. + // After the previous speedup, the character can't be an equality. + diffs.append( Diff( DELETE, text1 ) ); + diffs.append( Diff( INSERT, text2 ) ); + return diffs; + } + // Garbage collect longtext and shorttext by scoping out. + } - // Perform a real diff. - if (checklines && text1.length() > 100 && text2.length() > 100) { - return diff_lineMode(text1, text2, deadline); - } + // Check to see if the problem can be split in two. + const QStringList hm = diff_halfMatch( text1, text2 ); + if ( hm.count() > 0 ) + { + // A half-match was found, sort out the return data. + const QString text1_a = hm[ 0 ]; + const QString text1_b = hm[ 1 ]; + const QString text2_a = hm[ 2 ]; + const QString text2_b = hm[ 3 ]; + const QString mid_common = hm[ 4 ]; + // Send both pairs off for separate processing. + const QList< Diff > diffs_a = diff_main( text1_a, text2_a, checklines, deadline ); + const QList< Diff > diffs_b = diff_main( text1_b, text2_b, checklines, deadline ); + // Merge the results. + diffs = diffs_a; + diffs.append( Diff( EQUAL, mid_common ) ); + diffs += diffs_b; + return diffs; + } - return diff_bisect(text1, text2, deadline); -} + // Perform a real diff. + if ( checklines && text1.length() > 100 && text2.length() > 100 ) + { + return diff_lineMode( text1, text2, deadline ); + } + return diff_bisect( text1, text2, deadline ); +} -QList diff_match_patch::diff_lineMode(QString text1, QString text2, - clock_t deadline) { - // Scan the text on a line-by-line basis first. - const QList b = diff_linesToChars(text1, text2); - text1 = b[0].toString(); - text2 = b[1].toString(); - QStringList linearray = b[2].toStringList(); - - QList diffs = diff_main(text1, text2, false, deadline); - - // Convert the diff back to original text. - diff_charsToLines(diffs, linearray); - // Eliminate freak matches (e.g. blank lines) - diff_cleanupSemantic(diffs); - - // Rediff any replacement blocks, this time character-by-character. - // Add a dummy entry at the end. - diffs.append(Diff(EQUAL, "")); - int count_delete = 0; - int count_insert = 0; - QString text_delete = ""; - QString text_insert = ""; - - QMutableListIterator pointer(diffs); - Diff *thisDiff = pointer.hasNext() ? &pointer.next() : NULL; - while (thisDiff != NULL) { - switch (thisDiff->operation) { - case INSERT: - count_insert++; - text_insert += thisDiff->text; - break; - case DELETE: - count_delete++; - text_delete += thisDiff->text; - break; - case EQUAL: - // Upon reaching an equality, check for prior redundancies. - if (count_delete >= 1 && count_insert >= 1) { - // Delete the offending records and add the merged ones. - pointer.previous(); - for (int j = 0; j < count_delete + count_insert; j++) { - pointer.previous(); - pointer.remove(); - } - foreach(Diff newDiff, - diff_main(text_delete, text_insert, false, deadline)) { - pointer.insert(newDiff); - } +QList< Diff > diff_match_patch::diff_lineMode( QString text1, QString text2, clock_t deadline ) +{ + // Scan the text on a line-by-line basis first. + const QList< QVariant > b = diff_linesToChars( text1, text2 ); + text1 = b[ 0 ].toString(); + text2 = b[ 1 ].toString(); + QStringList linearray = b[ 2 ].toStringList(); + + QList< Diff > diffs = diff_main( text1, text2, false, deadline ); + + // Convert the diff back to original text. + diff_charsToLines( diffs, linearray ); + // Eliminate freak matches (e.g. blank lines) + diff_cleanupSemantic( diffs ); + + // Rediff any replacement blocks, this time character-by-character. + // Add a dummy entry at the end. + diffs.append( Diff( EQUAL, "" ) ); + int count_delete = 0; + int count_insert = 0; + QString text_delete = ""; + QString text_insert = ""; + + QMutableListIterator< Diff > pointer( diffs ); + Diff *thisDiff = pointer.hasNext() ? &pointer.next() : NULL; + while ( thisDiff != NULL ) + { + switch ( thisDiff->operation ) + { + case INSERT: + count_insert++; + text_insert += thisDiff->text; + break; + case DELETE: + count_delete++; + text_delete += thisDiff->text; + break; + case EQUAL: + // Upon reaching an equality, check for prior redundancies. + if ( count_delete >= 1 && count_insert >= 1 ) + { + // Delete the offending records and add the merged ones. + pointer.previous(); + for ( int j = 0; j < count_delete + count_insert; j++ ) + { + pointer.previous(); + pointer.remove(); + } + foreach( Diff newDiff, diff_main( text_delete, text_insert, false, deadline ) ) + { + pointer.insert( newDiff ); + } + } + count_insert = 0; + count_delete = 0; + text_delete = ""; + text_insert = ""; + break; } - count_insert = 0; - count_delete = 0; - text_delete = ""; - text_insert = ""; - break; + thisDiff = pointer.hasNext() ? &pointer.next() : NULL; } - thisDiff = pointer.hasNext() ? &pointer.next() : NULL; - } - diffs.removeLast(); // Remove the dummy entry at the end. + diffs.removeLast(); // Remove the dummy entry at the end. - return diffs; + return diffs; } +QList< Diff > diff_match_patch::diff_bisect( const QString &text1, const QString &text2, clock_t deadline ) +{ + // Cache the text lengths to prevent multiple calls. + const int text1_length = text1.length(); + const int text2_length = text2.length(); + const int max_d = ( text1_length + text2_length + 1 ) / 2; + const int v_offset = max_d; + const int v_length = 2 * max_d; + int *v1 = new int[ v_length ]; + int *v2 = new int[ v_length ]; + for ( int x = 0; x < v_length; x++ ) + { + v1[ x ] = -1; + v2[ x ] = -1; + } + v1[ v_offset + 1 ] = 0; + v2[ v_offset + 1 ] = 0; + const int delta = text1_length - text2_length; + // If the total number of characters is odd, then the front path will + // collide with the reverse path. + const bool front = ( delta % 2 != 0 ); + // Offsets for start and end of k loop. + // Prevents mapping of space beyond the grid. + int k1start = 0; + int k1end = 0; + int k2start = 0; + int k2end = 0; + for ( int d = 0; d < max_d; d++ ) + { + // Bail out if deadline is reached. + if ( clock() > deadline ) + { + break; + } -QList diff_match_patch::diff_bisect(const QString &text1, - const QString &text2, clock_t deadline) { - // Cache the text lengths to prevent multiple calls. - const int text1_length = text1.length(); - const int text2_length = text2.length(); - const int max_d = (text1_length + text2_length + 1) / 2; - const int v_offset = max_d; - const int v_length = 2 * max_d; - int *v1 = new int[v_length]; - int *v2 = new int[v_length]; - for (int x = 0; x < v_length; x++) { - v1[x] = -1; - v2[x] = -1; - } - v1[v_offset + 1] = 0; - v2[v_offset + 1] = 0; - const int delta = text1_length - text2_length; - // If the total number of characters is odd, then the front path will - // collide with the reverse path. - const bool front = (delta % 2 != 0); - // Offsets for start and end of k loop. - // Prevents mapping of space beyond the grid. - int k1start = 0; - int k1end = 0; - int k2start = 0; - int k2end = 0; - for (int d = 0; d < max_d; d++) { - // Bail out if deadline is reached. - if (clock() > deadline) { - break; - } - - // Walk the front path one step. - for (int k1 = -d + k1start; k1 <= d - k1end; k1 += 2) { - const int k1_offset = v_offset + k1; - int x1; - if (k1 == -d || (k1 != d && v1[k1_offset - 1] < v1[k1_offset + 1])) { - x1 = v1[k1_offset + 1]; - } else { - x1 = v1[k1_offset - 1] + 1; - } - int y1 = x1 - k1; - while (x1 < text1_length && y1 < text2_length - && text1[x1] == text2[y1]) { - x1++; - y1++; - } - v1[k1_offset] = x1; - if (x1 > text1_length) { - // Ran off the right of the graph. - k1end += 2; - } else if (y1 > text2_length) { - // Ran off the bottom of the graph. - k1start += 2; - } else if (front) { - int k2_offset = v_offset + delta - k1; - if (k2_offset >= 0 && k2_offset < v_length && v2[k2_offset] != -1) { - // Mirror x2 onto top-left coordinate system. - int x2 = text1_length - v2[k2_offset]; - if (x1 >= x2) { - // Overlap detected. - delete [] v1; - delete [] v2; - return diff_bisectSplit(text1, text2, x1, y1, deadline); - } + // Walk the front path one step. + for ( int k1 = -d + k1start; k1 <= d - k1end; k1 += 2 ) + { + const int k1_offset = v_offset + k1; + int x1; + if ( k1 == -d || ( k1 != d && v1[ k1_offset - 1 ] < v1[ k1_offset + 1 ] ) ) + { + x1 = v1[ k1_offset + 1 ]; + } + else + { + x1 = v1[ k1_offset - 1 ] + 1; + } + int y1 = x1 - k1; + while ( x1 < text1_length && y1 < text2_length && text1[ x1 ] == text2[ y1 ] ) + { + x1++; + y1++; + } + v1[ k1_offset ] = x1; + if ( x1 > text1_length ) + { + // Ran off the right of the graph. + k1end += 2; + } + else if ( y1 > text2_length ) + { + // Ran off the bottom of the graph. + k1start += 2; + } + else if ( front ) + { + int k2_offset = v_offset + delta - k1; + if ( k2_offset >= 0 && k2_offset < v_length && v2[ k2_offset ] != -1 ) + { + // Mirror x2 onto top-left coordinate system. + int x2 = text1_length - v2[ k2_offset ]; + if ( x1 >= x2 ) + { + // Overlap detected. + delete[] v1; + delete[] v2; + return diff_bisectSplit( text1, text2, x1, y1, deadline ); + } + } + } } - } - } - - // Walk the reverse path one step. - for (int k2 = -d + k2start; k2 <= d - k2end; k2 += 2) { - const int k2_offset = v_offset + k2; - int x2; - if (k2 == -d || (k2 != d && v2[k2_offset - 1] < v2[k2_offset + 1])) { - x2 = v2[k2_offset + 1]; - } else { - x2 = v2[k2_offset - 1] + 1; - } - int y2 = x2 - k2; - while (x2 < text1_length && y2 < text2_length - && text1[text1_length - x2 - 1] == text2[text2_length - y2 - 1]) { - x2++; - y2++; - } - v2[k2_offset] = x2; - if (x2 > text1_length) { - // Ran off the left of the graph. - k2end += 2; - } else if (y2 > text2_length) { - // Ran off the top of the graph. - k2start += 2; - } else if (!front) { - int k1_offset = v_offset + delta - k2; - if (k1_offset >= 0 && k1_offset < v_length && v1[k1_offset] != -1) { - int x1 = v1[k1_offset]; - int y1 = v_offset + x1 - k1_offset; - // Mirror x2 onto top-left coordinate system. - x2 = text1_length - x2; - if (x1 >= x2) { - // Overlap detected. - delete [] v1; - delete [] v2; - return diff_bisectSplit(text1, text2, x1, y1, deadline); - } + + // Walk the reverse path one step. + for ( int k2 = -d + k2start; k2 <= d - k2end; k2 += 2 ) + { + const int k2_offset = v_offset + k2; + int x2; + if ( k2 == -d || ( k2 != d && v2[ k2_offset - 1 ] < v2[ k2_offset + 1 ] ) ) + { + x2 = v2[ k2_offset + 1 ]; + } + else + { + x2 = v2[ k2_offset - 1 ] + 1; + } + int y2 = x2 - k2; + while ( x2 < text1_length && y2 < text2_length && text1[ text1_length - x2 - 1 ] == text2[ text2_length - y2 - 1 ] ) + { + x2++; + y2++; + } + v2[ k2_offset ] = x2; + if ( x2 > text1_length ) + { + // Ran off the left of the graph. + k2end += 2; + } + else if ( y2 > text2_length ) + { + // Ran off the top of the graph. + k2start += 2; + } + else if ( !front ) + { + int k1_offset = v_offset + delta - k2; + if ( k1_offset >= 0 && k1_offset < v_length && v1[ k1_offset ] != -1 ) + { + int x1 = v1[ k1_offset ]; + int y1 = v_offset + x1 - k1_offset; + // Mirror x2 onto top-left coordinate system. + x2 = text1_length - x2; + if ( x1 >= x2 ) + { + // Overlap detected. + delete[] v1; + delete[] v2; + return diff_bisectSplit( text1, text2, x1, y1, deadline ); + } + } + } } - } - } - } - delete [] v1; - delete [] v2; - // Diff took too long and hit the deadline or - // number of diffs equals number of characters, no commonality at all. - QList diffs; - diffs.append(Diff(DELETE, text1)); - diffs.append(Diff(INSERT, text2)); - return diffs; + } + delete[] v1; + delete[] v2; + // Diff took too long and hit the deadline or + // number of diffs equals number of characters, no commonality at all. + QList< Diff > diffs; + diffs.append( Diff( DELETE, text1 ) ); + diffs.append( Diff( INSERT, text2 ) ); + return diffs; } -QList diff_match_patch::diff_bisectSplit(const QString &text1, - const QString &text2, int x, int y, clock_t deadline) { - QString text1a = text1.left(x); - QString text2a = text2.left(y); - QString text1b = safeMid(text1, x); - QString text2b = safeMid(text2, y); +QList< Diff > diff_match_patch::diff_bisectSplit( const QString &text1, const QString &text2, int x, int y, clock_t deadline ) +{ + QString text1a = text1.left( x ); + QString text2a = text2.left( y ); + QString text1b = safeMid( text1, x ); + QString text2b = safeMid( text2, y ); - // Compute both diffs serially. - QList diffs = diff_main(text1a, text2a, false, deadline); - QList diffsb = diff_main(text1b, text2b, false, deadline); + // Compute both diffs serially. + QList< Diff > diffs = diff_main( text1a, text2a, false, deadline ); + QList< Diff > diffsb = diff_main( text1b, text2b, false, deadline ); - return diffs + diffsb; + return diffs + diffsb; } -QList diff_match_patch::diff_linesToChars(const QString &text1, - const QString &text2) { - QStringList lineArray; - QMap lineHash; - // e.g. linearray[4] == "Hello\n" - // e.g. linehash.get("Hello\n") == 4 - - // "\x00" is a valid character, but various debuggers don't like it. - // So we'll insert a junk entry to avoid generating a null character. - lineArray.append(""); - - const QString chars1 = diff_linesToCharsMunge(text1, lineArray, lineHash); - const QString chars2 = diff_linesToCharsMunge(text2, lineArray, lineHash); - - QList listRet; - listRet.append(QVariant::fromValue(chars1)); - listRet.append(QVariant::fromValue(chars2)); - listRet.append(QVariant::fromValue(lineArray)); - return listRet; -} +QList< QVariant > diff_match_patch::diff_linesToChars( const QString &text1, const QString &text2 ) +{ + QStringList lineArray; + QMap< QString, int > lineHash; + // e.g. linearray[4] == "Hello\n" + // e.g. linehash.get("Hello\n") == 4 + // "\x00" is a valid character, but various debuggers don't like it. + // So we'll insert a junk entry to avoid generating a null character. + lineArray.append( "" ); -QString diff_match_patch::diff_linesToCharsMunge(const QString &text, - QStringList &lineArray, - QMap &lineHash) { - int lineStart = 0; - int lineEnd = -1; - QString line; - QString chars; - // Walk the text, pulling out a substring for each line. - // text.split('\n') would would temporarily double our memory footprint. - // Modifying text would create many large strings to garbage collect. - while (lineEnd < text.length() - 1) { - lineEnd = text.indexOf('\n', lineStart); - if (lineEnd == -1) { - lineEnd = text.length() - 1; - } - line = safeMid(text, lineStart, lineEnd + 1 - lineStart); - lineStart = lineEnd + 1; - - if (lineHash.contains(line)) { - chars += QChar(static_cast(lineHash.value(line))); - } else { - lineArray.append(line); - lineHash.insert(line, lineArray.size() - 1); - chars += QChar(static_cast(lineArray.size() - 1)); - } - } - return chars; -} + const QString chars1 = diff_linesToCharsMunge( text1, lineArray, lineHash ); + const QString chars2 = diff_linesToCharsMunge( text2, lineArray, lineHash ); + QList< QVariant > listRet; + listRet.append( QVariant::fromValue( chars1 ) ); + listRet.append( QVariant::fromValue( chars2 ) ); + listRet.append( QVariant::fromValue( lineArray ) ); + return listRet; +} +QString diff_match_patch::diff_linesToCharsMunge( const QString &text, QStringList &lineArray, QMap< QString, int > &lineHash ) +{ + int lineStart = 0; + int lineEnd = -1; + QString line; + QString chars; + // Walk the text, pulling out a substring for each line. + // text.split('\n') would would temporarily double our memory footprint. + // Modifying text would create many large strings to garbage collect. + while ( lineEnd < text.length() - 1 ) + { + lineEnd = text.indexOf( '\n', lineStart ); + if ( lineEnd == -1 ) + { + lineEnd = text.length() - 1; + } + line = safeMid( text, lineStart, lineEnd + 1 - lineStart ); + lineStart = lineEnd + 1; -void diff_match_patch::diff_charsToLines(QList &diffs, - const QStringList &lineArray) { - // Qt has no mutable foreach construct. - QMutableListIterator i(diffs); - while (i.hasNext()) { - Diff &diff = i.next(); - QString text; - for (int y = 0; y < diff.text.length(); y++) { - text += lineArray.value(static_cast(diff.text[y].unicode())); + if ( lineHash.contains( line ) ) + { + chars += QChar( static_cast< ushort >( lineHash.value( line ) ) ); + } + else + { + lineArray.append( line ); + lineHash.insert( line, lineArray.size() - 1 ); + chars += QChar( static_cast< ushort >( lineArray.size() - 1 ) ); + } } - diff.text = text; - } + return chars; } - -int diff_match_patch::diff_commonPrefix(const QString &text1, - const QString &text2) { - // Performance analysis: http://neil.fraser.name/news/2007/10/09/ - const int n = std::min(text1.length(), text2.length()); - for (int i = 0; i < n; i++) { - if (text1[i] != text2[i]) { - return i; +void diff_match_patch::diff_charsToLines( QList< Diff > &diffs, const QStringList &lineArray ) +{ + // Qt has no mutable foreach construct. + QMutableListIterator< Diff > i( diffs ); + while ( i.hasNext() ) + { + Diff &diff = i.next(); + QString text; + for ( int y = 0; y < diff.text.length(); y++ ) + { + text += lineArray.value( static_cast< ushort >( diff.text[ y ].unicode() ) ); + } + diff.text = text; } - } - return n; } - -int diff_match_patch::diff_commonSuffix(const QString &text1, - const QString &text2) { - // Performance analysis: http://neil.fraser.name/news/2007/10/09/ - const int text1_length = text1.length(); - const int text2_length = text2.length(); - const int n = std::min(text1_length, text2_length); - for (int i = 1; i <= n; i++) { - if (text1[text1_length - i] != text2[text2_length - i]) { - return i - 1; +int diff_match_patch::diff_commonPrefix( const QString &text1, const QString &text2 ) +{ + // Performance analysis: http://neil.fraser.name/news/2007/10/09/ + const int n = std::min( text1.length(), text2.length() ); + for ( int i = 0; i < n; i++ ) + { + if ( text1[ i ] != text2[ i ] ) + { + return i; + } } - } - return n; + return n; } -int diff_match_patch::diff_commonOverlap(const QString &text1, - const QString &text2) { - // Cache the text lengths to prevent multiple calls. - const int text1_length = text1.length(); - const int text2_length = text2.length(); - // Eliminate the null case. - if (text1_length == 0 || text2_length == 0) { - return 0; - } - // Truncate the longer string. - QString text1_trunc = text1; - QString text2_trunc = text2; - if (text1_length > text2_length) { - text1_trunc = text1.right(text2_length); - } else if (text1_length < text2_length) { - text2_trunc = text2.left(text1_length); - } - const int text_length = std::min(text1_length, text2_length); - // Quick check for the worst case. - if (text1_trunc == text2_trunc) { - return text_length; - } - - // Start by looking for a single character match - // and increase length until no match is found. - // Performance analysis: http://neil.fraser.name/news/2010/11/04/ - int best = 0; - int length = 1; - while (true) { - QString pattern = text1_trunc.right(length); - int found = text2_trunc.indexOf(pattern); - if (found == -1) { - return best; - } - length += found; - if (found == 0 || text1_trunc.right(length) == text2_trunc.left(length)) { - best = length; - length++; - } - } +int diff_match_patch::diff_commonSuffix( const QString &text1, const QString &text2 ) +{ + // Performance analysis: http://neil.fraser.name/news/2007/10/09/ + const int text1_length = text1.length(); + const int text2_length = text2.length(); + const int n = std::min( text1_length, text2_length ); + for ( int i = 1; i <= n; i++ ) + { + if ( text1[ text1_length - i ] != text2[ text2_length - i ] ) + { + return i - 1; + } + } + return n; } -QStringList diff_match_patch::diff_halfMatch(const QString &text1, - const QString &text2) { - if (Diff_Timeout <= 0) { - // Don't risk returning a non-optimal diff if we have unlimited time. - return QStringList(); - } - const QString longtext = text1.length() > text2.length() ? text1 : text2; - const QString shorttext = text1.length() > text2.length() ? text2 : text1; - if (longtext.length() < 4 || shorttext.length() * 2 < longtext.length()) { - return QStringList(); // Pointless. - } - - // First check if the second quarter is the seed for a half-match. - const QStringList hm1 = diff_halfMatchI(longtext, shorttext, - (longtext.length() + 3) / 4); - // Check again based on the third quarter. - const QStringList hm2 = diff_halfMatchI(longtext, shorttext, - (longtext.length() + 1) / 2); - QStringList hm; - if (hm1.isEmpty() && hm2.isEmpty()) { - return QStringList(); - } else if (hm2.isEmpty()) { - hm = hm1; - } else if (hm1.isEmpty()) { - hm = hm2; - } else { - // Both matched. Select the longest. - hm = hm1[4].length() > hm2[4].length() ? hm1 : hm2; - } - - // A half-match was found, sort out the return data. - if (text1.length() > text2.length()) { - return hm; - } else { - QStringList listRet; - listRet << hm[2] << hm[3] << hm[0] << hm[1] << hm[4]; - return listRet; - } +int diff_match_patch::diff_commonOverlap( const QString &text1, const QString &text2 ) +{ + // Cache the text lengths to prevent multiple calls. + const int text1_length = text1.length(); + const int text2_length = text2.length(); + // Eliminate the null case. + if ( text1_length == 0 || text2_length == 0 ) + { + return 0; + } + // Truncate the longer string. + QString text1_trunc = text1; + QString text2_trunc = text2; + if ( text1_length > text2_length ) + { + text1_trunc = text1.right( text2_length ); + } + else if ( text1_length < text2_length ) + { + text2_trunc = text2.left( text1_length ); + } + const int text_length = std::min( text1_length, text2_length ); + // Quick check for the worst case. + if ( text1_trunc == text2_trunc ) + { + return text_length; + } + + // Start by looking for a single character match + // and increase length until no match is found. + // Performance analysis: http://neil.fraser.name/news/2010/11/04/ + int best = 0; + int length = 1; + while ( true ) + { + QString pattern = text1_trunc.right( length ); + int found = text2_trunc.indexOf( pattern ); + if ( found == -1 ) + { + return best; + } + length += found; + if ( found == 0 || text1_trunc.right( length ) == text2_trunc.left( length ) ) + { + best = length; + length++; + } + } } +QStringList diff_match_patch::diff_halfMatch( const QString &text1, const QString &text2 ) +{ + if ( Diff_Timeout <= 0 ) + { + // Don't risk returning a non-optimal diff if we have unlimited time. + return QStringList(); + } + const QString longtext = text1.length() > text2.length() ? text1 : text2; + const QString shorttext = text1.length() > text2.length() ? text2 : text1; + if ( longtext.length() < 4 || shorttext.length() * 2 < longtext.length() ) + { + return QStringList(); // Pointless. + } -QStringList diff_match_patch::diff_halfMatchI(const QString &longtext, - const QString &shorttext, - int i) { - // Start with a 1/4 length substring at position i as a seed. - const QString seed = safeMid(longtext, i, longtext.length() / 4); - int j = -1; - QString best_common; - QString best_longtext_a, best_longtext_b; - QString best_shorttext_a, best_shorttext_b; - while ((j = shorttext.indexOf(seed, j + 1)) != -1) { - const int prefixLength = diff_commonPrefix(safeMid(longtext, i), - safeMid(shorttext, j)); - const int suffixLength = diff_commonSuffix(longtext.left(i), - shorttext.left(j)); - if (best_common.length() < suffixLength + prefixLength) { - best_common = safeMid(shorttext, j - suffixLength, suffixLength) - + safeMid(shorttext, j, prefixLength); - best_longtext_a = longtext.left(i - suffixLength); - best_longtext_b = safeMid(longtext, i + prefixLength); - best_shorttext_a = shorttext.left(j - suffixLength); - best_shorttext_b = safeMid(shorttext, j + prefixLength); - } - } - if (best_common.length() * 2 >= longtext.length()) { - QStringList listRet; - listRet << best_longtext_a << best_longtext_b << best_shorttext_a - << best_shorttext_b << best_common; - return listRet; - } else { - return QStringList(); - } + // First check if the second quarter is the seed for a half-match. + const QStringList hm1 = diff_halfMatchI( longtext, shorttext, ( longtext.length() + 3 ) / 4 ); + // Check again based on the third quarter. + const QStringList hm2 = diff_halfMatchI( longtext, shorttext, ( longtext.length() + 1 ) / 2 ); + QStringList hm; + if ( hm1.isEmpty() && hm2.isEmpty() ) + { + return QStringList(); + } + else if ( hm2.isEmpty() ) + { + hm = hm1; + } + else if ( hm1.isEmpty() ) + { + hm = hm2; + } + else + { + // Both matched. Select the longest. + hm = hm1[ 4 ].length() > hm2[ 4 ].length() ? hm1 : hm2; + } + + // A half-match was found, sort out the return data. + if ( text1.length() > text2.length() ) + { + return hm; + } + else + { + QStringList listRet; + listRet << hm[ 2 ] << hm[ 3 ] << hm[ 0 ] << hm[ 1 ] << hm[ 4 ]; + return listRet; + } } +QStringList diff_match_patch::diff_halfMatchI( const QString &longtext, const QString &shorttext, int i ) +{ + // Start with a 1/4 length substring at position i as a seed. + const QString seed = safeMid( longtext, i, longtext.length() / 4 ); + int j = -1; + QString best_common; + QString best_longtext_a, best_longtext_b; + QString best_shorttext_a, best_shorttext_b; + while ( ( j = shorttext.indexOf( seed, j + 1 ) ) != -1 ) + { + const int prefixLength = diff_commonPrefix( safeMid( longtext, i ), safeMid( shorttext, j ) ); + const int suffixLength = diff_commonSuffix( longtext.left( i ), shorttext.left( j ) ); + if ( best_common.length() < suffixLength + prefixLength ) + { + best_common = safeMid( shorttext, j - suffixLength, suffixLength ) + safeMid( shorttext, j, prefixLength ); + best_longtext_a = longtext.left( i - suffixLength ); + best_longtext_b = safeMid( longtext, i + prefixLength ); + best_shorttext_a = shorttext.left( j - suffixLength ); + best_shorttext_b = safeMid( shorttext, j + prefixLength ); + } + } + if ( best_common.length() * 2 >= longtext.length() ) + { + QStringList listRet; + listRet << best_longtext_a << best_longtext_b << best_shorttext_a << best_shorttext_b << best_common; + return listRet; + } + else + { + return QStringList(); + } +} -void diff_match_patch::diff_cleanupSemantic(QList &diffs) { - if (diffs.isEmpty()) { - return; - } - bool changes = false; - QStack equalities; // Stack of equalities. - QString lastequality; // Always equal to equalities.lastElement().text - QMutableListIterator pointer(diffs); - // Number of characters that changed prior to the equality. - int length_insertions1 = 0; - int length_deletions1 = 0; - // Number of characters that changed after the equality. - int length_insertions2 = 0; - int length_deletions2 = 0; - Diff *thisDiff = pointer.hasNext() ? &pointer.next() : NULL; - while (thisDiff != NULL) { - if (thisDiff->operation == EQUAL) { - // Equality found. - equalities.push(*thisDiff); - length_insertions1 = length_insertions2; - length_deletions1 = length_deletions2; - length_insertions2 = 0; - length_deletions2 = 0; - lastequality = thisDiff->text; - } else { - // An insertion or deletion. - if (thisDiff->operation == INSERT) { - length_insertions2 += thisDiff->text.length(); - } else { - length_deletions2 += thisDiff->text.length(); - } - // Eliminate an equality that is smaller or equal to the edits on both - // sides of it. - if (!lastequality.isNull() - && (lastequality.length() - <= std::max(length_insertions1, length_deletions1)) - && (lastequality.length() - <= std::max(length_insertions2, length_deletions2))) { - // printf("Splitting: '%s'\n", qPrintable(lastequality)); - // Walk back to offending equality. - while (*thisDiff != equalities.top()) { - thisDiff = &pointer.previous(); +void diff_match_patch::diff_cleanupSemantic( QList< Diff > &diffs ) +{ + if ( diffs.isEmpty() ) + { + return; + } + bool changes = false; + QStack< Diff > equalities; // Stack of equalities. + QString lastequality; // Always equal to equalities.lastElement().text + QMutableListIterator< Diff > pointer( diffs ); + // Number of characters that changed prior to the equality. + int length_insertions1 = 0; + int length_deletions1 = 0; + // Number of characters that changed after the equality. + int length_insertions2 = 0; + int length_deletions2 = 0; + Diff *thisDiff = pointer.hasNext() ? &pointer.next() : NULL; + while ( thisDiff != NULL ) + { + if ( thisDiff->operation == EQUAL ) + { + // Equality found. + equalities.push( *thisDiff ); + length_insertions1 = length_insertions2; + length_deletions1 = length_deletions2; + length_insertions2 = 0; + length_deletions2 = 0; + lastequality = thisDiff->text; } - pointer.next(); + else + { + // An insertion or deletion. + if ( thisDiff->operation == INSERT ) + { + length_insertions2 += thisDiff->text.length(); + } + else + { + length_deletions2 += thisDiff->text.length(); + } + // Eliminate an equality that is smaller or equal to the edits on both + // sides of it. + if ( !lastequality.isNull() && ( lastequality.length() <= std::max( length_insertions1, length_deletions1 ) ) && ( lastequality.length() <= std::max( length_insertions2, length_deletions2 ) ) ) + { + // printf("Splitting: '%s'\n", qPrintable(lastequality)); + // Walk back to offending equality. + while ( *thisDiff != equalities.top() ) + { + thisDiff = &pointer.previous(); + } + pointer.next(); - // Replace equality with a delete. - pointer.setValue(Diff(DELETE, lastequality)); - // Insert a corresponding an insert. - pointer.insert(Diff(INSERT, lastequality)); + // Replace equality with a delete. + pointer.setValue( Diff( DELETE, lastequality ) ); + // Insert a corresponding an insert. + pointer.insert( Diff( INSERT, lastequality ) ); - equalities.pop(); // Throw away the equality we just deleted. - if (!equalities.isEmpty()) { - // Throw away the previous equality (it needs to be reevaluated). - equalities.pop(); - } - if (equalities.isEmpty()) { - // There are no previous equalities, walk back to the start. - while (pointer.hasPrevious()) { - pointer.previous(); - } - } else { - // There is a safe equality we can fall back to. - thisDiff = &equalities.top(); - while (*thisDiff != pointer.previous()) { - // Intentionally empty loop. - } + equalities.pop(); // Throw away the equality we just deleted. + if ( !equalities.isEmpty() ) + { + // Throw away the previous equality (it needs to be reevaluated). + equalities.pop(); + } + if ( equalities.isEmpty() ) + { + // There are no previous equalities, walk back to the start. + while ( pointer.hasPrevious() ) + { + pointer.previous(); + } + } + else + { + // There is a safe equality we can fall back to. + thisDiff = &equalities.top(); + while ( *thisDiff != pointer.previous() ) + { + // Intentionally empty loop. + } + } + + length_insertions1 = 0; // Reset the counters. + length_deletions1 = 0; + length_insertions2 = 0; + length_deletions2 = 0; + lastequality = QString(); + changes = true; + } } + thisDiff = pointer.hasNext() ? &pointer.next() : NULL; + } - length_insertions1 = 0; // Reset the counters. - length_deletions1 = 0; - length_insertions2 = 0; - length_deletions2 = 0; - lastequality = QString(); - changes = true; - } + // Normalize the diff. + if ( changes ) + { + diff_cleanupMerge( diffs ); } - thisDiff = pointer.hasNext() ? &pointer.next() : NULL; - } - - // Normalize the diff. - if (changes) { - diff_cleanupMerge(diffs); - } - diff_cleanupSemanticLossless(diffs); - - // Find any overlaps between deletions and insertions. - // e.g: abcxxxxxxdef - // -> abcxxxdef - // e.g: xxxabcdefxxx - // -> defxxxabc - // Only extract an overlap if it is as big as the edit ahead or behind it. - pointer.toFront(); - Diff *prevDiff = NULL; - thisDiff = NULL; - if (pointer.hasNext()) { - prevDiff = &pointer.next(); - if (pointer.hasNext()) { - thisDiff = &pointer.next(); - } - } - while (thisDiff != NULL) { - if (prevDiff->operation == DELETE && - thisDiff->operation == INSERT) { - QString deletion = prevDiff->text; - QString insertion = thisDiff->text; - int overlap_length1 = diff_commonOverlap(deletion, insertion); - int overlap_length2 = diff_commonOverlap(insertion, deletion); - if (overlap_length1 >= overlap_length2) { - if (overlap_length1 >= deletion.length() / 2.0 || - overlap_length1 >= insertion.length() / 2.0) { - // Overlap found. Insert an equality and trim the surrounding edits. - pointer.previous(); - pointer.insert(Diff(EQUAL, insertion.left(overlap_length1))); - prevDiff->text = - deletion.left(deletion.length() - overlap_length1); - thisDiff->text = safeMid(insertion, overlap_length1); - // pointer.insert inserts the element before the cursor, so there is - // no need to step past the new element. + diff_cleanupSemanticLossless( diffs ); + + // Find any overlaps between deletions and insertions. + // e.g: abcxxxxxxdef + // -> abcxxxdef + // e.g: xxxabcdefxxx + // -> defxxxabc + // Only extract an overlap if it is as big as the edit ahead or behind it. + pointer.toFront(); + Diff *prevDiff = NULL; + thisDiff = NULL; + if ( pointer.hasNext() ) + { + prevDiff = &pointer.next(); + if ( pointer.hasNext() ) + { + thisDiff = &pointer.next(); } - } else { - if (overlap_length2 >= deletion.length() / 2.0 || - overlap_length2 >= insertion.length() / 2.0) { - // Reverse overlap found. - // Insert an equality and swap and trim the surrounding edits. - pointer.previous(); - pointer.insert(Diff(EQUAL, deletion.left(overlap_length2))); - prevDiff->operation = INSERT; - prevDiff->text = - insertion.left(insertion.length() - overlap_length2); - thisDiff->operation = DELETE; - thisDiff->text = safeMid(deletion, overlap_length2); - // pointer.insert inserts the element before the cursor, so there is - // no need to step past the new element. + } + while ( thisDiff != NULL ) + { + if ( prevDiff->operation == DELETE && thisDiff->operation == INSERT ) + { + QString deletion = prevDiff->text; + QString insertion = thisDiff->text; + int overlap_length1 = diff_commonOverlap( deletion, insertion ); + int overlap_length2 = diff_commonOverlap( insertion, deletion ); + if ( overlap_length1 >= overlap_length2 ) + { + if ( overlap_length1 >= deletion.length() / 2.0 || overlap_length1 >= insertion.length() / 2.0 ) + { + // Overlap found. Insert an equality and trim the surrounding edits. + pointer.previous(); + pointer.insert( Diff( EQUAL, insertion.left( overlap_length1 ) ) ); + prevDiff->text = deletion.left( deletion.length() - overlap_length1 ); + thisDiff->text = safeMid( insertion, overlap_length1 ); + // pointer.insert inserts the element before the cursor, so there is + // no need to step past the new element. + } + } + else + { + if ( overlap_length2 >= deletion.length() / 2.0 || overlap_length2 >= insertion.length() / 2.0 ) + { + // Reverse overlap found. + // Insert an equality and swap and trim the surrounding edits. + pointer.previous(); + pointer.insert( Diff( EQUAL, deletion.left( overlap_length2 ) ) ); + prevDiff->operation = INSERT; + prevDiff->text = insertion.left( insertion.length() - overlap_length2 ); + thisDiff->operation = DELETE; + thisDiff->text = safeMid( deletion, overlap_length2 ); + // pointer.insert inserts the element before the cursor, so there is + // no need to step past the new element. + } + } + thisDiff = pointer.hasNext() ? &pointer.next() : NULL; } - } - thisDiff = pointer.hasNext() ? &pointer.next() : NULL; + prevDiff = thisDiff; + thisDiff = pointer.hasNext() ? &pointer.next() : NULL; } - prevDiff = thisDiff; - thisDiff = pointer.hasNext() ? &pointer.next() : NULL; - } } +void diff_match_patch::diff_cleanupSemanticLossless( QList< Diff > &diffs ) +{ + QString equality1, edit, equality2; + QString commonString; + int commonOffset; + int score, bestScore; + QString bestEquality1, bestEdit, bestEquality2; + // Create a new iterator at the start. + QMutableListIterator< Diff > pointer( diffs ); + Diff *prevDiff = pointer.hasNext() ? &pointer.next() : NULL; + Diff *thisDiff = pointer.hasNext() ? &pointer.next() : NULL; + Diff *nextDiff = pointer.hasNext() ? &pointer.next() : NULL; + + // Intentionally ignore the first and last element (don't need checking). + while ( nextDiff != NULL ) + { + if ( prevDiff->operation == EQUAL && nextDiff->operation == EQUAL ) + { + // This is a single edit surrounded by equalities. + equality1 = prevDiff->text; + edit = thisDiff->text; + equality2 = nextDiff->text; + + // First, shift the edit as far left as possible. + commonOffset = diff_commonSuffix( equality1, edit ); + if ( commonOffset != 0 ) + { + commonString = safeMid( edit, edit.length() - commonOffset ); + equality1 = equality1.left( equality1.length() - commonOffset ); + edit = commonString + edit.left( edit.length() - commonOffset ); + equality2 = commonString + equality2; + } -void diff_match_patch::diff_cleanupSemanticLossless(QList &diffs) { - QString equality1, edit, equality2; - QString commonString; - int commonOffset; - int score, bestScore; - QString bestEquality1, bestEdit, bestEquality2; - // Create a new iterator at the start. - QMutableListIterator pointer(diffs); - Diff *prevDiff = pointer.hasNext() ? &pointer.next() : NULL; - Diff *thisDiff = pointer.hasNext() ? &pointer.next() : NULL; - Diff *nextDiff = pointer.hasNext() ? &pointer.next() : NULL; - - // Intentionally ignore the first and last element (don't need checking). - while (nextDiff != NULL) { - if (prevDiff->operation == EQUAL && - nextDiff->operation == EQUAL) { - // This is a single edit surrounded by equalities. - equality1 = prevDiff->text; - edit = thisDiff->text; - equality2 = nextDiff->text; - - // First, shift the edit as far left as possible. - commonOffset = diff_commonSuffix(equality1, edit); - if (commonOffset != 0) { - commonString = safeMid(edit, edit.length() - commonOffset); - equality1 = equality1.left(equality1.length() - commonOffset); - edit = commonString + edit.left(edit.length() - commonOffset); - equality2 = commonString + equality2; - } - - // Second, step character by character right, looking for the best fit. - bestEquality1 = equality1; - bestEdit = edit; - bestEquality2 = equality2; - bestScore = diff_cleanupSemanticScore(equality1, edit) - + diff_cleanupSemanticScore(edit, equality2); - while (!edit.isEmpty() && !equality2.isEmpty() - && edit[0] == equality2[0]) { - equality1 += edit[0]; - edit = safeMid(edit, 1) + equality2[0]; - equality2 = safeMid(equality2, 1); - score = diff_cleanupSemanticScore(equality1, edit) - + diff_cleanupSemanticScore(edit, equality2); - // The >= encourages trailing rather than leading whitespace on edits. - if (score >= bestScore) { - bestScore = score; + // Second, step character by character right, looking for the best fit. bestEquality1 = equality1; bestEdit = edit; bestEquality2 = equality2; - } - } + bestScore = diff_cleanupSemanticScore( equality1, edit ) + diff_cleanupSemanticScore( edit, equality2 ); + while ( !edit.isEmpty() && !equality2.isEmpty() && edit[ 0 ] == equality2[ 0 ] ) + { + equality1 += edit[ 0 ]; + edit = safeMid( edit, 1 ) + equality2[ 0 ]; + equality2 = safeMid( equality2, 1 ); + score = diff_cleanupSemanticScore( equality1, edit ) + diff_cleanupSemanticScore( edit, equality2 ); + // The >= encourages trailing rather than leading whitespace on edits. + if ( score >= bestScore ) + { + bestScore = score; + bestEquality1 = equality1; + bestEdit = edit; + bestEquality2 = equality2; + } + } - if (prevDiff->text != bestEquality1) { - // We have an improvement, save it back to the diff. - if (!bestEquality1.isEmpty()) { - prevDiff->text = bestEquality1; - } else { - pointer.previous(); // Walk past nextDiff. - pointer.previous(); // Walk past thisDiff. - pointer.previous(); // Walk past prevDiff. - pointer.remove(); // Delete prevDiff. - pointer.next(); // Walk past thisDiff. - pointer.next(); // Walk past nextDiff. - } - thisDiff->text = bestEdit; - if (!bestEquality2.isEmpty()) { - nextDiff->text = bestEquality2; - } else { - pointer.remove(); // Delete nextDiff. - nextDiff = thisDiff; - thisDiff = prevDiff; - } + if ( prevDiff->text != bestEquality1 ) + { + // We have an improvement, save it back to the diff. + if ( !bestEquality1.isEmpty() ) + { + prevDiff->text = bestEquality1; + } + else + { + pointer.previous(); // Walk past nextDiff. + pointer.previous(); // Walk past thisDiff. + pointer.previous(); // Walk past prevDiff. + pointer.remove(); // Delete prevDiff. + pointer.next(); // Walk past thisDiff. + pointer.next(); // Walk past nextDiff. + } + thisDiff->text = bestEdit; + if ( !bestEquality2.isEmpty() ) + { + nextDiff->text = bestEquality2; + } + else + { + pointer.remove(); // Delete nextDiff. + nextDiff = thisDiff; + thisDiff = prevDiff; + } + } } + prevDiff = thisDiff; + thisDiff = nextDiff; + nextDiff = pointer.hasNext() ? &pointer.next() : NULL; } - prevDiff = thisDiff; - thisDiff = nextDiff; - nextDiff = pointer.hasNext() ? &pointer.next() : NULL; - } } +int diff_match_patch::diff_cleanupSemanticScore( const QString &one, const QString &two ) +{ + if ( one.isEmpty() || two.isEmpty() ) + { + // Edges are the best. + return 6; + } -int diff_match_patch::diff_cleanupSemanticScore(const QString &one, - const QString &two) { - if (one.isEmpty() || two.isEmpty()) { - // Edges are the best. - return 6; - } - - // Each port of this function behaves slightly differently due to - // subtle differences in each language's definition of things like - // 'whitespace'. Since this function's purpose is largely cosmetic, - // the choice has been made to use each language's native features - // rather than force total conformity. - QChar char1 = one[one.length() - 1]; - QChar char2 = two[0]; - bool nonAlphaNumeric1 = !char1.isLetterOrNumber(); - bool nonAlphaNumeric2 = !char2.isLetterOrNumber(); - bool whitespace1 = nonAlphaNumeric1 && char1.isSpace(); - bool whitespace2 = nonAlphaNumeric2 && char2.isSpace(); - bool lineBreak1 = whitespace1 && char1.category() == QChar::Other_Control; - bool lineBreak2 = whitespace2 && char2.category() == QChar::Other_Control; - bool blankLine1 = lineBreak1 && BLANKLINEEND.indexIn(one) != -1; - bool blankLine2 = lineBreak2 && BLANKLINESTART.indexIn(two) != -1; - - if (blankLine1 || blankLine2) { - // Five points for blank lines. - return 5; - } else if (lineBreak1 || lineBreak2) { - // Four points for line breaks. - return 4; - } else if (nonAlphaNumeric1 && !whitespace1 && whitespace2) { - // Three points for end of sentences. - return 3; - } else if (whitespace1 || whitespace2) { - // Two points for whitespace. - return 2; - } else if (nonAlphaNumeric1 || nonAlphaNumeric2) { - // One point for non-alphanumeric. - return 1; - } - return 0; + // Each port of this function behaves slightly differently due to + // subtle differences in each language's definition of things like + // 'whitespace'. Since this function's purpose is largely cosmetic, + // the choice has been made to use each language's native features + // rather than force total conformity. + QChar char1 = one[ one.length() - 1 ]; + QChar char2 = two[ 0 ]; + bool nonAlphaNumeric1 = !char1.isLetterOrNumber(); + bool nonAlphaNumeric2 = !char2.isLetterOrNumber(); + bool whitespace1 = nonAlphaNumeric1 && char1.isSpace(); + bool whitespace2 = nonAlphaNumeric2 && char2.isSpace(); + bool lineBreak1 = whitespace1 && char1.category() == QChar::Other_Control; + bool lineBreak2 = whitespace2 && char2.category() == QChar::Other_Control; + bool blankLine1 = lineBreak1 && BLANKLINEEND.indexIn( one ) != -1; + bool blankLine2 = lineBreak2 && BLANKLINESTART.indexIn( two ) != -1; + + if ( blankLine1 || blankLine2 ) + { + // Five points for blank lines. + return 5; + } + else if ( lineBreak1 || lineBreak2 ) + { + // Four points for line breaks. + return 4; + } + else if ( nonAlphaNumeric1 && !whitespace1 && whitespace2 ) + { + // Three points for end of sentences. + return 3; + } + else if ( whitespace1 || whitespace2 ) + { + // Two points for whitespace. + return 2; + } + else if ( nonAlphaNumeric1 || nonAlphaNumeric2 ) + { + // One point for non-alphanumeric. + return 1; + } + return 0; } - // Define some regex patterns for matching boundaries. -QRegExp diff_match_patch::BLANKLINEEND = QRegExp("\\n\\r?\\n$"); -QRegExp diff_match_patch::BLANKLINESTART = QRegExp("^\\r?\\n\\r?\\n"); - - -void diff_match_patch::diff_cleanupEfficiency(QList &diffs) { - if (diffs.isEmpty()) { - return; - } - bool changes = false; - QStack equalities; // Stack of equalities. - QString lastequality; // Always equal to equalities.lastElement().text - QMutableListIterator pointer(diffs); - // Is there an insertion operation before the last equality. - bool pre_ins = false; - // Is there a deletion operation before the last equality. - bool pre_del = false; - // Is there an insertion operation after the last equality. - bool post_ins = false; - // Is there a deletion operation after the last equality. - bool post_del = false; - - Diff *thisDiff = pointer.hasNext() ? &pointer.next() : NULL; - Diff *safeDiff = thisDiff; - - while (thisDiff != NULL) { - if (thisDiff->operation == EQUAL) { - // Equality found. - if (thisDiff->text.length() < Diff_EditCost && (post_ins || post_del)) { - // Candidate found. - equalities.push(*thisDiff); - pre_ins = post_ins; - pre_del = post_del; - lastequality = thisDiff->text; - } else { - // Not a candidate, and can never become one. - equalities.clear(); - lastequality = QString(); - safeDiff = thisDiff; - } - post_ins = post_del = false; - } else { - // An insertion or deletion. - if (thisDiff->operation == DELETE) { - post_del = true; - } else { - post_ins = true; - } - /* +QRegExp diff_match_patch::BLANKLINEEND = QRegExp( "\\n\\r?\\n$" ); +QRegExp diff_match_patch::BLANKLINESTART = QRegExp( "^\\r?\\n\\r?\\n" ); + +void diff_match_patch::diff_cleanupEfficiency( QList< Diff > &diffs ) +{ + if ( diffs.isEmpty() ) + { + return; + } + bool changes = false; + QStack< Diff > equalities; // Stack of equalities. + QString lastequality; // Always equal to equalities.lastElement().text + QMutableListIterator< Diff > pointer( diffs ); + // Is there an insertion operation before the last equality. + bool pre_ins = false; + // Is there a deletion operation before the last equality. + bool pre_del = false; + // Is there an insertion operation after the last equality. + bool post_ins = false; + // Is there a deletion operation after the last equality. + bool post_del = false; + + Diff *thisDiff = pointer.hasNext() ? &pointer.next() : NULL; + Diff *safeDiff = thisDiff; + + while ( thisDiff != NULL ) + { + if ( thisDiff->operation == EQUAL ) + { + // Equality found. + if ( thisDiff->text.length() < Diff_EditCost && ( post_ins || post_del ) ) + { + // Candidate found. + equalities.push( *thisDiff ); + pre_ins = post_ins; + pre_del = post_del; + lastequality = thisDiff->text; + } + else + { + // Not a candidate, and can never become one. + equalities.clear(); + lastequality = QString(); + safeDiff = thisDiff; + } + post_ins = post_del = false; + } + else + { + // An insertion or deletion. + if ( thisDiff->operation == DELETE ) + { + post_del = true; + } + else + { + post_ins = true; + } + /* * Five types to be split: * ABXYCD * AXCD @@ -1033,1073 +1150,1205 @@ void diff_match_patch::diff_cleanupEfficiency(QList &diffs) { * AXCD * ABXC */ - if (!lastequality.isNull() - && ((pre_ins && pre_del && post_ins && post_del) - || ((lastequality.length() < Diff_EditCost / 2) - && ((pre_ins ? 1 : 0) + (pre_del ? 1 : 0) - + (post_ins ? 1 : 0) + (post_del ? 1 : 0)) == 3))) { - // printf("Splitting: '%s'\n", qPrintable(lastequality)); - // Walk back to offending equality. - while (*thisDiff != equalities.top()) { - thisDiff = &pointer.previous(); - } - pointer.next(); - - // Replace equality with a delete. - pointer.setValue(Diff(DELETE, lastequality)); - // Insert a corresponding an insert. - pointer.insert(Diff(INSERT, lastequality)); - thisDiff = &pointer.previous(); - pointer.next(); - - equalities.pop(); // Throw away the equality we just deleted. - lastequality = QString(); - if (pre_ins && pre_del) { - // No changes made which could affect previous entry, keep going. - post_ins = post_del = true; - equalities.clear(); - safeDiff = thisDiff; - } else { - if (!equalities.isEmpty()) { - // Throw away the previous equality (it needs to be reevaluated). - equalities.pop(); - } - if (equalities.isEmpty()) { - // There are no previous questionable equalities, - // walk back to the last known safe diff. - thisDiff = safeDiff; - } else { - // There is an equality we can fall back to. - thisDiff = &equalities.top(); - } - while (*thisDiff != pointer.previous()) { - // Intentionally empty loop. - } - post_ins = post_del = false; - } + if ( !lastequality.isNull() && ( ( pre_ins && pre_del && post_ins && post_del ) || ( ( lastequality.length() < Diff_EditCost / 2 ) && ( ( pre_ins ? 1 : 0 ) + ( pre_del ? 1 : 0 ) + ( post_ins ? 1 : 0 ) + ( post_del ? 1 : 0 ) ) == 3 ) ) ) + { + // printf("Splitting: '%s'\n", qPrintable(lastequality)); + // Walk back to offending equality. + while ( *thisDiff != equalities.top() ) + { + thisDiff = &pointer.previous(); + } + pointer.next(); + + // Replace equality with a delete. + pointer.setValue( Diff( DELETE, lastequality ) ); + // Insert a corresponding an insert. + pointer.insert( Diff( INSERT, lastequality ) ); + thisDiff = &pointer.previous(); + pointer.next(); - changes = true; - } + equalities.pop(); // Throw away the equality we just deleted. + lastequality = QString(); + if ( pre_ins && pre_del ) + { + // No changes made which could affect previous entry, keep going. + post_ins = post_del = true; + equalities.clear(); + safeDiff = thisDiff; + } + else + { + if ( !equalities.isEmpty() ) + { + // Throw away the previous equality (it needs to be reevaluated). + equalities.pop(); + } + if ( equalities.isEmpty() ) + { + // There are no previous questionable equalities, + // walk back to the last known safe diff. + thisDiff = safeDiff; + } + else + { + // There is an equality we can fall back to. + thisDiff = &equalities.top(); + } + while ( *thisDiff != pointer.previous() ) + { + // Intentionally empty loop. + } + post_ins = post_del = false; + } + + changes = true; + } + } + thisDiff = pointer.hasNext() ? &pointer.next() : NULL; } - thisDiff = pointer.hasNext() ? &pointer.next() : NULL; - } - if (changes) { - diff_cleanupMerge(diffs); - } + if ( changes ) + { + diff_cleanupMerge( diffs ); + } } - -void diff_match_patch::diff_cleanupMerge(QList &diffs) { - diffs.append(Diff(EQUAL, "")); // Add a dummy entry at the end. - QMutableListIterator pointer(diffs); - int count_delete = 0; - int count_insert = 0; - QString text_delete = ""; - QString text_insert = ""; - Diff *thisDiff = pointer.hasNext() ? &pointer.next() : NULL; - Diff *prevEqual = NULL; - int commonlength; - while (thisDiff != NULL) { - switch (thisDiff->operation) { - case INSERT: - count_insert++; - text_insert += thisDiff->text; - prevEqual = NULL; - break; - case DELETE: - count_delete++; - text_delete += thisDiff->text; - prevEqual = NULL; - break; - case EQUAL: - if (count_delete + count_insert > 1) { - bool both_types = count_delete != 0 && count_insert != 0; - // Delete the offending records. - pointer.previous(); // Reverse direction. - while (count_delete-- > 0) { - pointer.previous(); - pointer.remove(); - } - while (count_insert-- > 0) { - pointer.previous(); - pointer.remove(); - } - if (both_types) { - // Factor out any common prefixies. - commonlength = diff_commonPrefix(text_insert, text_delete); - if (commonlength != 0) { - if (pointer.hasPrevious()) { - thisDiff = &pointer.previous(); - if (thisDiff->operation != EQUAL) { - throw "Previous diff should have been an equality."; +void diff_match_patch::diff_cleanupMerge( QList< Diff > &diffs ) +{ + diffs.append( Diff( EQUAL, "" ) ); // Add a dummy entry at the end. + QMutableListIterator< Diff > pointer( diffs ); + int count_delete = 0; + int count_insert = 0; + QString text_delete = ""; + QString text_insert = ""; + Diff *thisDiff = pointer.hasNext() ? &pointer.next() : NULL; + Diff *prevEqual = NULL; + int commonlength; + while ( thisDiff != NULL ) + { + switch ( thisDiff->operation ) + { + case INSERT: + count_insert++; + text_insert += thisDiff->text; + prevEqual = NULL; + break; + case DELETE: + count_delete++; + text_delete += thisDiff->text; + prevEqual = NULL; + break; + case EQUAL: + if ( count_delete + count_insert > 1 ) + { + bool both_types = count_delete != 0 && count_insert != 0; + // Delete the offending records. + pointer.previous(); // Reverse direction. + while ( count_delete-- > 0 ) + { + pointer.previous(); + pointer.remove(); + } + while ( count_insert-- > 0 ) + { + pointer.previous(); + pointer.remove(); + } + if ( both_types ) + { + // Factor out any common prefixies. + commonlength = diff_commonPrefix( text_insert, text_delete ); + if ( commonlength != 0 ) + { + if ( pointer.hasPrevious() ) + { + thisDiff = &pointer.previous(); + if ( thisDiff->operation != EQUAL ) + { + throw "Previous diff should have been an equality."; + } + thisDiff->text += text_insert.left( commonlength ); + pointer.next(); + } + else + { + pointer.insert( Diff( EQUAL, text_insert.left( commonlength ) ) ); + } + text_insert = safeMid( text_insert, commonlength ); + text_delete = safeMid( text_delete, commonlength ); + } + // Factor out any common suffixies. + commonlength = diff_commonSuffix( text_insert, text_delete ); + if ( commonlength != 0 ) + { + thisDiff = &pointer.next(); + thisDiff->text = safeMid( text_insert, text_insert.length() - commonlength ) + thisDiff->text; + text_insert = text_insert.left( text_insert.length() - commonlength ); + text_delete = text_delete.left( text_delete.length() - commonlength ); + pointer.previous(); + } + } + // Insert the merged records. + if ( !text_delete.isEmpty() ) + { + pointer.insert( Diff( DELETE, text_delete ) ); + } + if ( !text_insert.isEmpty() ) + { + pointer.insert( Diff( INSERT, text_insert ) ); + } + // Step forward to the equality. + thisDiff = pointer.hasNext() ? &pointer.next() : NULL; } - thisDiff->text += text_insert.left(commonlength); - pointer.next(); - } else { - pointer.insert(Diff(EQUAL, text_insert.left(commonlength))); - } - text_insert = safeMid(text_insert, commonlength); - text_delete = safeMid(text_delete, commonlength); - } - // Factor out any common suffixies. - commonlength = diff_commonSuffix(text_insert, text_delete); - if (commonlength != 0) { - thisDiff = &pointer.next(); - thisDiff->text = safeMid(text_insert, text_insert.length() - - commonlength) + thisDiff->text; - text_insert = text_insert.left(text_insert.length() - - commonlength); - text_delete = text_delete.left(text_delete.length() - - commonlength); - pointer.previous(); - } - } - // Insert the merged records. - if (!text_delete.isEmpty()) { - pointer.insert(Diff(DELETE, text_delete)); - } - if (!text_insert.isEmpty()) { - pointer.insert(Diff(INSERT, text_insert)); - } - // Step forward to the equality. - thisDiff = pointer.hasNext() ? &pointer.next() : NULL; - - } else if (prevEqual != NULL) { - // Merge this equality with the previous one. - prevEqual->text += thisDiff->text; - pointer.remove(); - thisDiff = &pointer.previous(); - pointer.next(); // Forward direction + else if ( prevEqual != NULL ) + { + // Merge this equality with the previous one. + prevEqual->text += thisDiff->text; + pointer.remove(); + thisDiff = &pointer.previous(); + pointer.next(); // Forward direction + } + count_insert = 0; + count_delete = 0; + text_delete = ""; + text_insert = ""; + prevEqual = thisDiff; + break; } - count_insert = 0; - count_delete = 0; - text_delete = ""; - text_insert = ""; - prevEqual = thisDiff; - break; - } - thisDiff = pointer.hasNext() ? &pointer.next() : NULL; - } - if (diffs.back().text.isEmpty()) { - diffs.removeLast(); // Remove the dummy entry at the end. - } - - /* + thisDiff = pointer.hasNext() ? &pointer.next() : NULL; + } + if ( diffs.back().text.isEmpty() ) + { + diffs.removeLast(); // Remove the dummy entry at the end. + } + + /* * Second pass: look for single edits surrounded on both sides by equalities * which can be shifted sideways to eliminate an equality. * e.g: ABAC -> ABAC */ - bool changes = false; - // Create a new iterator at the start. - // (As opposed to walking the current one back.) - pointer.toFront(); - Diff *prevDiff = pointer.hasNext() ? &pointer.next() : NULL; - thisDiff = pointer.hasNext() ? &pointer.next() : NULL; - Diff *nextDiff = pointer.hasNext() ? &pointer.next() : NULL; - - // Intentionally ignore the first and last element (don't need checking). - while (nextDiff != NULL) { - if (prevDiff->operation == EQUAL && - nextDiff->operation == EQUAL) { - // This is a single edit surrounded by equalities. - if (thisDiff->text.endsWith(prevDiff->text)) { - // Shift the edit over the previous equality. - thisDiff->text = prevDiff->text - + thisDiff->text.left(thisDiff->text.length() - - prevDiff->text.length()); - nextDiff->text = prevDiff->text + nextDiff->text; - pointer.previous(); // Walk past nextDiff. - pointer.previous(); // Walk past thisDiff. - pointer.previous(); // Walk past prevDiff. - pointer.remove(); // Delete prevDiff. - pointer.next(); // Walk past thisDiff. - thisDiff = &pointer.next(); // Walk past nextDiff. - nextDiff = pointer.hasNext() ? &pointer.next() : NULL; - changes = true; - } else if (thisDiff->text.startsWith(nextDiff->text)) { - // Shift the edit over the next equality. - prevDiff->text += nextDiff->text; - thisDiff->text = safeMid(thisDiff->text, nextDiff->text.length()) - + nextDiff->text; - pointer.remove(); // Delete nextDiff. - nextDiff = pointer.hasNext() ? &pointer.next() : NULL; - changes = true; + bool changes = false; + // Create a new iterator at the start. + // (As opposed to walking the current one back.) + pointer.toFront(); + Diff *prevDiff = pointer.hasNext() ? &pointer.next() : NULL; + thisDiff = pointer.hasNext() ? &pointer.next() : NULL; + Diff *nextDiff = pointer.hasNext() ? &pointer.next() : NULL; + + // Intentionally ignore the first and last element (don't need checking). + while ( nextDiff != NULL ) + { + if ( prevDiff->operation == EQUAL && nextDiff->operation == EQUAL ) + { + // This is a single edit surrounded by equalities. + if ( thisDiff->text.endsWith( prevDiff->text ) ) + { + // Shift the edit over the previous equality. + thisDiff->text = prevDiff->text + thisDiff->text.left( thisDiff->text.length() - prevDiff->text.length() ); + nextDiff->text = prevDiff->text + nextDiff->text; + pointer.previous(); // Walk past nextDiff. + pointer.previous(); // Walk past thisDiff. + pointer.previous(); // Walk past prevDiff. + pointer.remove(); // Delete prevDiff. + pointer.next(); // Walk past thisDiff. + thisDiff = &pointer.next(); // Walk past nextDiff. + nextDiff = pointer.hasNext() ? &pointer.next() : NULL; + changes = true; + } + else if ( thisDiff->text.startsWith( nextDiff->text ) ) + { + // Shift the edit over the next equality. + prevDiff->text += nextDiff->text; + thisDiff->text = safeMid( thisDiff->text, nextDiff->text.length() ) + nextDiff->text; + pointer.remove(); // Delete nextDiff. + nextDiff = pointer.hasNext() ? &pointer.next() : NULL; + changes = true; + } } + prevDiff = thisDiff; + thisDiff = nextDiff; + nextDiff = pointer.hasNext() ? &pointer.next() : NULL; + } + // If shifts were made, the diff needs reordering and another shift sweep. + if ( changes ) + { + diff_cleanupMerge( diffs ); } - prevDiff = thisDiff; - thisDiff = nextDiff; - nextDiff = pointer.hasNext() ? &pointer.next() : NULL; - } - // If shifts were made, the diff needs reordering and another shift sweep. - if (changes) { - diff_cleanupMerge(diffs); - } } - -int diff_match_patch::diff_xIndex(const QList &diffs, int loc) { - int chars1 = 0; - int chars2 = 0; - int last_chars1 = 0; - int last_chars2 = 0; - Diff lastDiff; - foreach(Diff aDiff, diffs) { - if (aDiff.operation != INSERT) { - // Equality or deletion. - chars1 += aDiff.text.length(); - } - if (aDiff.operation != DELETE) { - // Equality or insertion. - chars2 += aDiff.text.length(); - } - if (chars1 > loc) { - // Overshot the location. - lastDiff = aDiff; - break; - } - last_chars1 = chars1; - last_chars2 = chars2; - } - if (lastDiff.operation == DELETE) { - // The location was deleted. - return last_chars2; - } - // Add the remaining character length. - return last_chars2 + (loc - last_chars1); +int diff_match_patch::diff_xIndex( const QList< Diff > &diffs, int loc ) +{ + int chars1 = 0; + int chars2 = 0; + int last_chars1 = 0; + int last_chars2 = 0; + Diff lastDiff; + foreach( Diff aDiff, diffs ) + { + if ( aDiff.operation != INSERT ) + { + // Equality or deletion. + chars1 += aDiff.text.length(); + } + if ( aDiff.operation != DELETE ) + { + // Equality or insertion. + chars2 += aDiff.text.length(); + } + if ( chars1 > loc ) + { + // Overshot the location. + lastDiff = aDiff; + break; + } + last_chars1 = chars1; + last_chars2 = chars2; + } + if ( lastDiff.operation == DELETE ) + { + // The location was deleted. + return last_chars2; + } + // Add the remaining character length. + return last_chars2 + ( loc - last_chars1 ); } - -QString diff_match_patch::diff_prettyHtml(const QList &diffs) { - QString html; - QString text; - foreach(Diff aDiff, diffs) { - text = aDiff.text; - text.replace("&", "&").replace("<", "<") - .replace(">", ">").replace("\n", "¶
"); - switch (aDiff.operation) { - case INSERT: - html += QString("") + text - + QString(""); - break; - case DELETE: - html += QString("") + text - + QString(""); - break; - case EQUAL: - html += QString("") + text + QString(""); - break; - } - } - return html; +QString diff_match_patch::diff_prettyHtml( const QList< Diff > &diffs ) +{ + QString html; + QString text; + foreach( Diff aDiff, diffs ) + { + text = aDiff.text; + text.replace( "&", "&" ).replace( "<", "<" ).replace( ">", ">" ).replace( "\n", "¶
" ); + switch ( aDiff.operation ) + { + case INSERT: + html += QString( "" ) + text + QString( "" ); + break; + case DELETE: + html += QString( "" ) + text + QString( "" ); + break; + case EQUAL: + html += QString( "" ) + text + QString( "" ); + break; + } + } + return html; } - -QString diff_match_patch::diff_text1(const QList &diffs) { - QString text; - foreach(Diff aDiff, diffs) { - if (aDiff.operation != INSERT) { - text += aDiff.text; +QString diff_match_patch::diff_text1( const QList< Diff > &diffs ) +{ + QString text; + foreach( Diff aDiff, diffs ) + { + if ( aDiff.operation != INSERT ) + { + text += aDiff.text; + } } - } - return text; + return text; } - -QString diff_match_patch::diff_text2(const QList &diffs) { - QString text; - foreach(Diff aDiff, diffs) { - if (aDiff.operation != DELETE) { - text += aDiff.text; +QString diff_match_patch::diff_text2( const QList< Diff > &diffs ) +{ + QString text; + foreach( Diff aDiff, diffs ) + { + if ( aDiff.operation != DELETE ) + { + text += aDiff.text; + } } - } - return text; + return text; } - -int diff_match_patch::diff_levenshtein(const QList &diffs) { - int levenshtein = 0; - int insertions = 0; - int deletions = 0; - foreach(Diff aDiff, diffs) { - switch (aDiff.operation) { - case INSERT: - insertions += aDiff.text.length(); - break; - case DELETE: - deletions += aDiff.text.length(); - break; - case EQUAL: - // A deletion and an insertion is one substitution. - levenshtein += std::max(insertions, deletions); - insertions = 0; - deletions = 0; - break; - } - } - levenshtein += std::max(insertions, deletions); - return levenshtein; +int diff_match_patch::diff_levenshtein( const QList< Diff > &diffs ) +{ + int levenshtein = 0; + int insertions = 0; + int deletions = 0; + foreach( Diff aDiff, diffs ) + { + switch ( aDiff.operation ) + { + case INSERT: + insertions += aDiff.text.length(); + break; + case DELETE: + deletions += aDiff.text.length(); + break; + case EQUAL: + // A deletion and an insertion is one substitution. + levenshtein += std::max( insertions, deletions ); + insertions = 0; + deletions = 0; + break; + } + } + levenshtein += std::max( insertions, deletions ); + return levenshtein; } - -QString diff_match_patch::diff_toDelta(const QList &diffs) { - QString text; - foreach(Diff aDiff, diffs) { - switch (aDiff.operation) { - case INSERT: { - QString encoded = QString(QUrl::toPercentEncoding(aDiff.text, - " !~*'();/?:@&=+$,#")); - text += QString("+") + encoded + QString("\t"); - break; - } - case DELETE: - text += QString("-") + QString::number(aDiff.text.length()) - + QString("\t"); - break; - case EQUAL: - text += QString("=") + QString::number(aDiff.text.length()) - + QString("\t"); - break; - } - } - if (!text.isEmpty()) { - // Strip off trailing tab character. - text = text.left(text.length() - 1); - } - return text; +QString diff_match_patch::diff_toDelta( const QList< Diff > &diffs ) +{ + QString text; + foreach( Diff aDiff, diffs ) + { + switch ( aDiff.operation ) + { + case INSERT: + { + QString encoded = QString( QUrl::toPercentEncoding( aDiff.text, " !~*'();/?:@&=+$,#" ) ); + text += QString( "+" ) + encoded + QString( "\t" ); + break; + } + case DELETE: + text += QString( "-" ) + QString::number( aDiff.text.length() ) + QString( "\t" ); + break; + case EQUAL: + text += QString( "=" ) + QString::number( aDiff.text.length() ) + QString( "\t" ); + break; + } + } + if ( !text.isEmpty() ) + { + // Strip off trailing tab character. + text = text.left( text.length() - 1 ); + } + return text; } - -QList diff_match_patch::diff_fromDelta(const QString &text1, - const QString &delta) { - QList diffs; - int pointer = 0; // Cursor in text1 - QStringList tokens = delta.split("\t"); - foreach(QString token, tokens) { - if (token.isEmpty()) { - // Blank tokens are ok (from a trailing \t). - continue; - } - // Each token begins with a one character parameter which specifies the - // operation of this token (delete, insert, equality). - QString param = safeMid(token, 1); - switch (token[0].toAscii()) { - case '+': - param = QUrl::fromPercentEncoding(qPrintable(param)); - diffs.append(Diff(INSERT, param)); - break; - case '-': - // Fall through. - case '=': { - int n; - n = param.toInt(); - if (n < 0) { - throw QString("Negative number in diff_fromDelta: %1").arg(param); +QList< Diff > diff_match_patch::diff_fromDelta( const QString &text1, const QString &delta ) +{ + QList< Diff > diffs; + int pointer = 0; // Cursor in text1 + QStringList tokens = delta.split( "\t" ); + foreach( QString token, tokens ) + { + if ( token.isEmpty() ) + { + // Blank tokens are ok (from a trailing \t). + continue; } - QString text; - text = safeMid(text1, pointer, n); - pointer += n; - if (token[0] == QChar('=')) { - diffs.append(Diff(EQUAL, text)); - } else { - diffs.append(Diff(DELETE, text)); + // Each token begins with a one character parameter which specifies the + // operation of this token (delete, insert, equality). + QString param = safeMid( token, 1 ); + switch ( token[ 0 ].toAscii() ) + { + case '+': + param = QUrl::fromPercentEncoding( qPrintable( param ) ); + diffs.append( Diff( INSERT, param ) ); + break; + case '-': + // Fall through. + case '=': + { + int n; + n = param.toInt(); + if ( n < 0 ) + { + throw QString( "Negative number in diff_fromDelta: %1" ).arg( param ); + } + QString text; + text = safeMid( text1, pointer, n ); + pointer += n; + if ( token[ 0 ] == QChar( '=' ) ) + { + diffs.append( Diff( EQUAL, text ) ); + } + else + { + diffs.append( Diff( DELETE, text ) ); + } + break; + } + default: + throw QString( "Invalid diff operation in diff_fromDelta: %1" ).arg( token[ 0 ] ); } - break; - } - default: - throw QString("Invalid diff operation in diff_fromDelta: %1") - .arg(token[0]); - } - } - if (pointer != text1.length()) { - throw QString("Delta length (%1) smaller than source text length (%2)") - .arg(pointer).arg(text1.length()); - } - return diffs; + } + if ( pointer != text1.length() ) + { + throw QString( "Delta length (%1) smaller than source text length (%2)" ).arg( pointer ).arg( text1.length() ); + } + return diffs; } +// MATCH FUNCTIONS - // MATCH FUNCTIONS - - -int diff_match_patch::match_main(const QString &text, const QString &pattern, - int loc) { - // Check for null inputs. - if (text.isNull() || pattern.isNull()) { - throw "Null inputs. (match_main)"; - } +int diff_match_patch::match_main( const QString &text, const QString &pattern, int loc ) +{ + // Check for null inputs. + if ( text.isNull() || pattern.isNull() ) + { + throw "Null inputs. (match_main)"; + } - loc = std::max(0, std::min(loc, text.length())); - if (text == pattern) { - // Shortcut (potentially not guaranteed by the algorithm) - return 0; - } else if (text.isEmpty()) { - // Nothing to match. - return -1; - } else if (loc + pattern.length() <= text.length() - && safeMid(text, loc, pattern.length()) == pattern) { - // Perfect match at the perfect spot! (Includes case of null pattern) - return loc; - } else { - // Do a fuzzy compare. - return match_bitap(text, pattern, loc); - } + loc = std::max( 0, std::min( loc, text.length() ) ); + if ( text == pattern ) + { + // Shortcut (potentially not guaranteed by the algorithm) + return 0; + } + else if ( text.isEmpty() ) + { + // Nothing to match. + return -1; + } + else if ( loc + pattern.length() <= text.length() && safeMid( text, loc, pattern.length() ) == pattern ) + { + // Perfect match at the perfect spot! (Includes case of null pattern) + return loc; + } + else + { + // Do a fuzzy compare. + return match_bitap( text, pattern, loc ); + } } +int diff_match_patch::match_bitap( const QString &text, const QString &pattern, int loc ) +{ + if ( !( Match_MaxBits == 0 || pattern.length() <= Match_MaxBits ) ) + { + throw "Pattern too long for this application."; + } + + // Initialise the alphabet. + QMap< QChar, int > s = match_alphabet( pattern ); + + // Highest score beyond which we give up. + double score_threshold = Match_Threshold; + // Is there a nearby exact match? (speedup) + int best_loc = text.indexOf( pattern, loc ); + if ( best_loc != -1 ) + { + score_threshold = std::min( match_bitapScore( 0, best_loc, loc, pattern ), score_threshold ); + // What about in the other direction? (speedup) + best_loc = text.lastIndexOf( pattern, loc + pattern.length() ); + if ( best_loc != -1 ) + { + score_threshold = std::min( match_bitapScore( 0, best_loc, loc, pattern ), score_threshold ); + } + } -int diff_match_patch::match_bitap(const QString &text, const QString &pattern, - int loc) { - if (!(Match_MaxBits == 0 || pattern.length() <= Match_MaxBits)) { - throw "Pattern too long for this application."; - } - - // Initialise the alphabet. - QMap s = match_alphabet(pattern); - - // Highest score beyond which we give up. - double score_threshold = Match_Threshold; - // Is there a nearby exact match? (speedup) - int best_loc = text.indexOf(pattern, loc); - if (best_loc != -1) { - score_threshold = std::min(match_bitapScore(0, best_loc, loc, pattern), - score_threshold); - // What about in the other direction? (speedup) - best_loc = text.lastIndexOf(pattern, loc + pattern.length()); - if (best_loc != -1) { - score_threshold = std::min(match_bitapScore(0, best_loc, loc, pattern), - score_threshold); - } - } - - // Initialise the bit arrays. - int matchmask = 1 << (pattern.length() - 1); - best_loc = -1; - - int bin_min, bin_mid; - int bin_max = pattern.length() + text.length(); - int *rd; - int *last_rd = NULL; - for (int d = 0; d < pattern.length(); d++) { - // Scan for the best match; each iteration allows for one more error. - // Run a binary search to determine how far from 'loc' we can stray at - // this error level. - bin_min = 0; - bin_mid = bin_max; - while (bin_min < bin_mid) { - if (match_bitapScore(d, loc + bin_mid, loc, pattern) - <= score_threshold) { - bin_min = bin_mid; - } else { + // Initialise the bit arrays. + int matchmask = 1 << ( pattern.length() - 1 ); + best_loc = -1; + + int bin_min, bin_mid; + int bin_max = pattern.length() + text.length(); + int *rd; + int *last_rd = NULL; + for ( int d = 0; d < pattern.length(); d++ ) + { + // Scan for the best match; each iteration allows for one more error. + // Run a binary search to determine how far from 'loc' we can stray at + // this error level. + bin_min = 0; + bin_mid = bin_max; + while ( bin_min < bin_mid ) + { + if ( match_bitapScore( d, loc + bin_mid, loc, pattern ) <= score_threshold ) + { + bin_min = bin_mid; + } + else + { + bin_max = bin_mid; + } + bin_mid = ( bin_max - bin_min ) / 2 + bin_min; + } + // Use the result from this iteration as the maximum for the next. bin_max = bin_mid; - } - bin_mid = (bin_max - bin_min) / 2 + bin_min; - } - // Use the result from this iteration as the maximum for the next. - bin_max = bin_mid; - int start = std::max(1, loc - bin_mid + 1); - int finish = std::min(loc + bin_mid, text.length()) + pattern.length(); - - rd = new int[finish + 2]; - rd[finish + 1] = (1 << d) - 1; - for (int j = finish; j >= start; j--) { - int charMatch; - if (text.length() <= j - 1) { - // Out of range. - charMatch = 0; - } else { - charMatch = s.value(text[j - 1], 0); - } - if (d == 0) { - // First pass: exact match. - rd[j] = ((rd[j + 1] << 1) | 1) & charMatch; - } else { - // Subsequent passes: fuzzy match. - rd[j] = ((rd[j + 1] << 1) | 1) & charMatch - | (((last_rd[j + 1] | last_rd[j]) << 1) | 1) - | last_rd[j + 1]; - } - if ((rd[j] & matchmask) != 0) { - double score = match_bitapScore(d, j - 1, loc, pattern); - // This match will almost certainly be better than any existing - // match. But check anyway. - if (score <= score_threshold) { - // Told you so. - score_threshold = score; - best_loc = j - 1; - if (best_loc > loc) { - // When passing loc, don't exceed our current distance from loc. - start = std::max(1, 2 * loc - best_loc); - } else { - // Already passed loc, downhill from here on in. + int start = std::max( 1, loc - bin_mid + 1 ); + int finish = std::min( loc + bin_mid, text.length() ) + pattern.length(); + + rd = new int[ finish + 2 ]; + rd[ finish + 1 ] = ( 1 << d ) - 1; + for ( int j = finish; j >= start; j-- ) + { + int charMatch; + if ( text.length() <= j - 1 ) + { + // Out of range. + charMatch = 0; + } + else + { + charMatch = s.value( text[ j - 1 ], 0 ); + } + if ( d == 0 ) + { + // First pass: exact match. + rd[ j ] = ( ( rd[ j + 1 ] << 1 ) | 1 ) & charMatch; + } + else + { + // Subsequent passes: fuzzy match. + rd[ j ] = ( ( rd[ j + 1 ] << 1 ) | 1 ) & charMatch | ( ( ( last_rd[ j + 1 ] | last_rd[ j ] ) << 1 ) | 1 ) | last_rd[ j + 1 ]; + } + if ( ( rd[ j ] & matchmask ) != 0 ) + { + double score = match_bitapScore( d, j - 1, loc, pattern ); + // This match will almost certainly be better than any existing + // match. But check anyway. + if ( score <= score_threshold ) + { + // Told you so. + score_threshold = score; + best_loc = j - 1; + if ( best_loc > loc ) + { + // When passing loc, don't exceed our current distance from loc. + start = std::max( 1, 2 * loc - best_loc ); + } + else + { + // Already passed loc, downhill from here on in. + break; + } + } + } + } + if ( match_bitapScore( d + 1, loc, loc, pattern ) > score_threshold ) + { + // No hope for a (better) match at greater error levels. break; - } } - } - } - if (match_bitapScore(d + 1, loc, loc, pattern) > score_threshold) { - // No hope for a (better) match at greater error levels. - break; - } - delete [] last_rd; - last_rd = rd; - } - delete [] last_rd; - delete [] rd; - return best_loc; + delete[] last_rd; + last_rd = rd; + } + delete[] last_rd; + delete[] rd; + return best_loc; } - -double diff_match_patch::match_bitapScore(int e, int x, int loc, - const QString &pattern) { - const float accuracy = static_cast (e) / pattern.length(); - const int proximity = qAbs(loc - x); - if (Match_Distance == 0) { - // Dodge divide by zero error. - return proximity == 0 ? accuracy : 1.0; - } - return accuracy + (proximity / static_cast (Match_Distance)); +double diff_match_patch::match_bitapScore( int e, int x, int loc, const QString &pattern ) +{ + const float accuracy = static_cast< float >( e ) / pattern.length(); + const int proximity = qAbs( loc - x ); + if ( Match_Distance == 0 ) + { + // Dodge divide by zero error. + return proximity == 0 ? accuracy : 1.0; + } + return accuracy + ( proximity / static_cast< float >( Match_Distance ) ); } - -QMap diff_match_patch::match_alphabet(const QString &pattern) { - QMap s; - int i; - for (i = 0; i < pattern.length(); i++) { - QChar c = pattern[i]; - s.insert(c, 0); - } - for (i = 0; i < pattern.length(); i++) { - QChar c = pattern[i]; - s.insert(c, s.value(c) | (1 << (pattern.length() - i - 1))); - } - return s; +QMap< QChar, int > diff_match_patch::match_alphabet( const QString &pattern ) +{ + QMap< QChar, int > s; + int i; + for ( i = 0; i < pattern.length(); i++ ) + { + QChar c = pattern[ i ]; + s.insert( c, 0 ); + } + for ( i = 0; i < pattern.length(); i++ ) + { + QChar c = pattern[ i ]; + s.insert( c, s.value( c ) | ( 1 << ( pattern.length() - i - 1 ) ) ); + } + return s; } - // PATCH FUNCTIONS +void diff_match_patch::patch_addContext( Patch &patch, const QString &text ) +{ + if ( text.isEmpty() ) + { + return; + } + QString pattern = safeMid( text, patch.start2, patch.length1 ); + int padding = 0; + + // Look for the first and last matches of pattern in text. If two different + // matches are found, increase the pattern length. + while ( text.indexOf( pattern ) != text.lastIndexOf( pattern ) && pattern.length() < Match_MaxBits - Patch_Margin - Patch_Margin ) + { + padding += Patch_Margin; + pattern = safeMid( text, std::max( 0, patch.start2 - padding ), std::min( text.length(), patch.start2 + patch.length1 + padding ) - std::max( 0, patch.start2 - padding ) ); + } + // Add one chunk for good luck. + padding += Patch_Margin; -void diff_match_patch::patch_addContext(Patch &patch, const QString &text) { - if (text.isEmpty()) { - return; - } - QString pattern = safeMid(text, patch.start2, patch.length1); - int padding = 0; + // Add the prefix. + QString prefix = safeMid( text, std::max( 0, patch.start2 - padding ), patch.start2 - std::max( 0, patch.start2 - padding ) ); + if ( !prefix.isEmpty() ) + { + patch.diffs.prepend( Diff( EQUAL, prefix ) ); + } + // Add the suffix. + QString suffix = safeMid( text, patch.start2 + patch.length1, std::min( text.length(), patch.start2 + patch.length1 + padding ) - ( patch.start2 + patch.length1 ) ); + if ( !suffix.isEmpty() ) + { + patch.diffs.append( Diff( EQUAL, suffix ) ); + } - // Look for the first and last matches of pattern in text. If two different - // matches are found, increase the pattern length. - while (text.indexOf(pattern) != text.lastIndexOf(pattern) - && pattern.length() < Match_MaxBits - Patch_Margin - Patch_Margin) { - padding += Patch_Margin; - pattern = safeMid(text, std::max(0, patch.start2 - padding), - std::min(text.length(), patch.start2 + patch.length1 + padding) - - std::max(0, patch.start2 - padding)); - } - // Add one chunk for good luck. - padding += Patch_Margin; - - // Add the prefix. - QString prefix = safeMid(text, std::max(0, patch.start2 - padding), - patch.start2 - std::max(0, patch.start2 - padding)); - if (!prefix.isEmpty()) { - patch.diffs.prepend(Diff(EQUAL, prefix)); - } - // Add the suffix. - QString suffix = safeMid(text, patch.start2 + patch.length1, - std::min(text.length(), patch.start2 + patch.length1 + padding) - - (patch.start2 + patch.length1)); - if (!suffix.isEmpty()) { - patch.diffs.append(Diff(EQUAL, suffix)); - } - - // Roll back the start points. - patch.start1 -= prefix.length(); - patch.start2 -= prefix.length(); - // Extend the lengths. - patch.length1 += prefix.length() + suffix.length(); - patch.length2 += prefix.length() + suffix.length(); + // Roll back the start points. + patch.start1 -= prefix.length(); + patch.start2 -= prefix.length(); + // Extend the lengths. + patch.length1 += prefix.length() + suffix.length(); + patch.length2 += prefix.length() + suffix.length(); } +QList< Patch > diff_match_patch::patch_make( const QString &text1, const QString &text2 ) +{ + // Check for null inputs. + if ( text1.isNull() || text2.isNull() ) + { + throw "Null inputs. (patch_make)"; + } -QList diff_match_patch::patch_make(const QString &text1, - const QString &text2) { - // Check for null inputs. - if (text1.isNull() || text2.isNull()) { - throw "Null inputs. (patch_make)"; - } - - // No diffs provided, compute our own. - QList diffs = diff_main(text1, text2, true); - if (diffs.size() > 2) { - diff_cleanupSemantic(diffs); - diff_cleanupEfficiency(diffs); - } + // No diffs provided, compute our own. + QList< Diff > diffs = diff_main( text1, text2, true ); + if ( diffs.size() > 2 ) + { + diff_cleanupSemantic( diffs ); + diff_cleanupEfficiency( diffs ); + } - return patch_make(text1, diffs); + return patch_make( text1, diffs ); } - -QList diff_match_patch::patch_make(const QList &diffs) { - // No origin string provided, compute our own. - const QString text1 = diff_text1(diffs); - return patch_make(text1, diffs); +QList< Patch > diff_match_patch::patch_make( const QList< Diff > &diffs ) +{ + // No origin string provided, compute our own. + const QString text1 = diff_text1( diffs ); + return patch_make( text1, diffs ); } +QList< Patch > diff_match_patch::patch_make( const QString &text1, const QString &text2, const QList< Diff > &diffs ) +{ + // text2 is entirely unused. + return patch_make( text1, diffs ); -QList diff_match_patch::patch_make(const QString &text1, - const QString &text2, - const QList &diffs) { - // text2 is entirely unused. - return patch_make(text1, diffs); - - Q_UNUSED(text2) + Q_UNUSED( text2 ) } +QList< Patch > diff_match_patch::patch_make( const QString &text1, const QList< Diff > &diffs ) +{ + // Check for null inputs. + if ( text1.isNull() ) + { + throw "Null inputs. (patch_make)"; + } -QList diff_match_patch::patch_make(const QString &text1, - const QList &diffs) { - // Check for null inputs. - if (text1.isNull()) { - throw "Null inputs. (patch_make)"; - } - - QList patches; - if (diffs.isEmpty()) { - return patches; // Get rid of the null case. - } - Patch patch; - int char_count1 = 0; // Number of characters into the text1 string. - int char_count2 = 0; // Number of characters into the text2 string. - // Start with text1 (prepatch_text) and apply the diffs until we arrive at - // text2 (postpatch_text). We recreate the patches one by one to determine - // context info. - QString prepatch_text = text1; - QString postpatch_text = text1; - foreach(Diff aDiff, diffs) { - if (patch.diffs.isEmpty() && aDiff.operation != EQUAL) { - // A new patch starts here. - patch.start1 = char_count1; - patch.start2 = char_count2; - } - - switch (aDiff.operation) { - case INSERT: - patch.diffs.append(aDiff); - patch.length2 += aDiff.text.length(); - postpatch_text = postpatch_text.left(char_count2) - + aDiff.text + safeMid(postpatch_text, char_count2); - break; - case DELETE: - patch.length1 += aDiff.text.length(); - patch.diffs.append(aDiff); - postpatch_text = postpatch_text.left(char_count2) - + safeMid(postpatch_text, char_count2 + aDiff.text.length()); - break; - case EQUAL: - if (aDiff.text.length() <= 2 * Patch_Margin - && !patch.diffs.isEmpty() && !(aDiff == diffs.back())) { - // Small equality inside a patch. - patch.diffs.append(aDiff); - patch.length1 += aDiff.text.length(); - patch.length2 += aDiff.text.length(); + QList< Patch > patches; + if ( diffs.isEmpty() ) + { + return patches; // Get rid of the null case. + } + Patch patch; + int char_count1 = 0; // Number of characters into the text1 string. + int char_count2 = 0; // Number of characters into the text2 string. + // Start with text1 (prepatch_text) and apply the diffs until we arrive at + // text2 (postpatch_text). We recreate the patches one by one to determine + // context info. + QString prepatch_text = text1; + QString postpatch_text = text1; + foreach( Diff aDiff, diffs ) + { + if ( patch.diffs.isEmpty() && aDiff.operation != EQUAL ) + { + // A new patch starts here. + patch.start1 = char_count1; + patch.start2 = char_count2; } - if (aDiff.text.length() >= 2 * Patch_Margin) { - // Time for a new patch. - if (!patch.diffs.isEmpty()) { - patch_addContext(patch, prepatch_text); - patches.append(patch); - patch = Patch(); - // Unlike Unidiff, our patch lists have a rolling context. - // http://code.google.com/p/google-diff-match-patch/wiki/Unidiff - // Update prepatch text & pos to reflect the application of the - // just completed patch. - prepatch_text = postpatch_text; - char_count1 = char_count2; - } + switch ( aDiff.operation ) + { + case INSERT: + patch.diffs.append( aDiff ); + patch.length2 += aDiff.text.length(); + postpatch_text = postpatch_text.left( char_count2 ) + aDiff.text + safeMid( postpatch_text, char_count2 ); + break; + case DELETE: + patch.length1 += aDiff.text.length(); + patch.diffs.append( aDiff ); + postpatch_text = postpatch_text.left( char_count2 ) + safeMid( postpatch_text, char_count2 + aDiff.text.length() ); + break; + case EQUAL: + if ( aDiff.text.length() <= 2 * Patch_Margin && !patch.diffs.isEmpty() && !( aDiff == diffs.back() ) ) + { + // Small equality inside a patch. + patch.diffs.append( aDiff ); + patch.length1 += aDiff.text.length(); + patch.length2 += aDiff.text.length(); + } + + if ( aDiff.text.length() >= 2 * Patch_Margin ) + { + // Time for a new patch. + if ( !patch.diffs.isEmpty() ) + { + patch_addContext( patch, prepatch_text ); + patches.append( patch ); + patch = Patch(); + // Unlike Unidiff, our patch lists have a rolling context. + // http://code.google.com/p/google-diff-match-patch/wiki/Unidiff + // Update prepatch text & pos to reflect the application of the + // just completed patch. + prepatch_text = postpatch_text; + char_count1 = char_count2; + } + } + break; } - break; - } - // Update the current character count. - if (aDiff.operation != INSERT) { - char_count1 += aDiff.text.length(); + // Update the current character count. + if ( aDiff.operation != INSERT ) + { + char_count1 += aDiff.text.length(); + } + if ( aDiff.operation != DELETE ) + { + char_count2 += aDiff.text.length(); + } } - if (aDiff.operation != DELETE) { - char_count2 += aDiff.text.length(); + // Pick up the leftover patch if not empty. + if ( !patch.diffs.isEmpty() ) + { + patch_addContext( patch, prepatch_text ); + patches.append( patch ); } - } - // Pick up the leftover patch if not empty. - if (!patch.diffs.isEmpty()) { - patch_addContext(patch, prepatch_text); - patches.append(patch); - } - return patches; + return patches; } - -QList diff_match_patch::patch_deepCopy(QList &patches) { - QList patchesCopy; - foreach(Patch aPatch, patches) { - Patch patchCopy = Patch(); - foreach(Diff aDiff, aPatch.diffs) { - Diff diffCopy = Diff(aDiff.operation, aDiff.text); - patchCopy.diffs.append(diffCopy); - } - patchCopy.start1 = aPatch.start1; - patchCopy.start2 = aPatch.start2; - patchCopy.length1 = aPatch.length1; - patchCopy.length2 = aPatch.length2; - patchesCopy.append(patchCopy); - } - return patchesCopy; +QList< Patch > diff_match_patch::patch_deepCopy( QList< Patch > &patches ) +{ + QList< Patch > patchesCopy; + foreach( Patch aPatch, patches ) + { + Patch patchCopy = Patch(); + foreach( Diff aDiff, aPatch.diffs ) + { + Diff diffCopy = Diff( aDiff.operation, aDiff.text ); + patchCopy.diffs.append( diffCopy ); + } + patchCopy.start1 = aPatch.start1; + patchCopy.start2 = aPatch.start2; + patchCopy.length1 = aPatch.length1; + patchCopy.length2 = aPatch.length2; + patchesCopy.append( patchCopy ); + } + return patchesCopy; } +QPair< QString, QVector< bool > > diff_match_patch::patch_apply( QList< Patch > &patches, const QString &sourceText ) +{ + QString text = sourceText; // Copy to preserve original. + if ( patches.isEmpty() ) + { + return QPair< QString, QVector< bool > >( text, QVector< bool >( 0 ) ); + } -QPair > diff_match_patch::patch_apply( - QList &patches, const QString &sourceText) { - QString text = sourceText; // Copy to preserve original. - if (patches.isEmpty()) { - return QPair >(text, QVector(0)); - } - - // Deep copy the patches so that no changes are made to originals. - QList patchesCopy = patch_deepCopy(patches); - - QString nullPadding = patch_addPadding(patchesCopy); - text = nullPadding + text + nullPadding; - patch_splitMax(patchesCopy); - - int x = 0; - // delta keeps track of the offset between the expected and actual location - // of the previous patch. If there are patches expected at positions 10 and - // 20, but the first patch was found at 12, delta is 2 and the second patch - // has an effective expected position of 22. - int delta = 0; - QVector results(patchesCopy.size()); - foreach(Patch aPatch, patchesCopy) { - int expected_loc = aPatch.start2 + delta; - QString text1 = diff_text1(aPatch.diffs); - int start_loc; - int end_loc = -1; - if (text1.length() > Match_MaxBits) { - // patch_splitMax will only provide an oversized pattern in the case of - // a monster delete. - start_loc = match_main(text, text1.left(Match_MaxBits), expected_loc); - if (start_loc != -1) { - end_loc = match_main(text, text1.right(Match_MaxBits), - expected_loc + text1.length() - Match_MaxBits); - if (end_loc == -1 || start_loc >= end_loc) { - // Can't find valid trailing context. Drop this patch. - start_loc = -1; + // Deep copy the patches so that no changes are made to originals. + QList< Patch > patchesCopy = patch_deepCopy( patches ); + + QString nullPadding = patch_addPadding( patchesCopy ); + text = nullPadding + text + nullPadding; + patch_splitMax( patchesCopy ); + + int x = 0; + // delta keeps track of the offset between the expected and actual location + // of the previous patch. If there are patches expected at positions 10 and + // 20, but the first patch was found at 12, delta is 2 and the second patch + // has an effective expected position of 22. + int delta = 0; + QVector< bool > results( patchesCopy.size() ); + foreach( Patch aPatch, patchesCopy ) + { + int expected_loc = aPatch.start2 + delta; + QString text1 = diff_text1( aPatch.diffs ); + int start_loc; + int end_loc = -1; + if ( text1.length() > Match_MaxBits ) + { + // patch_splitMax will only provide an oversized pattern in the case of + // a monster delete. + start_loc = match_main( text, text1.left( Match_MaxBits ), expected_loc ); + if ( start_loc != -1 ) + { + end_loc = match_main( text, text1.right( Match_MaxBits ), expected_loc + text1.length() - Match_MaxBits ); + if ( end_loc == -1 || start_loc >= end_loc ) + { + // Can't find valid trailing context. Drop this patch. + start_loc = -1; + } + } + } + else + { + start_loc = match_main( text, text1, expected_loc ); } - } - } else { - start_loc = match_main(text, text1, expected_loc); - } - if (start_loc == -1) { - // No match found. :( - results[x] = false; - // Subtract the delta for this failed patch from subsequent patches. - delta -= aPatch.length2 - aPatch.length1; - } else { - // Found a match. :) - results[x] = true; - delta = start_loc - expected_loc; - QString text2; - if (end_loc == -1) { - text2 = safeMid(text, start_loc, text1.length()); - } else { - text2 = safeMid(text, start_loc, end_loc + Match_MaxBits - start_loc); - } - if (text1 == text2) { - // Perfect match, just shove the replacement text in. - text = text.left(start_loc) + diff_text2(aPatch.diffs) - + safeMid(text, start_loc + text1.length()); - } else { - // Imperfect match. Run a diff to get a framework of equivalent - // indices. - QList diffs = diff_main(text1, text2, false); - if (text1.length() > Match_MaxBits - && diff_levenshtein(diffs) / static_cast (text1.length()) - > Patch_DeleteThreshold) { - // The end points match, but the content is unacceptably bad. - results[x] = false; - } else { - diff_cleanupSemanticLossless(diffs); - int index1 = 0; - foreach(Diff aDiff, aPatch.diffs) { - if (aDiff.operation != EQUAL) { - int index2 = diff_xIndex(diffs, index1); - if (aDiff.operation == INSERT) { - // Insertion - text = text.left(start_loc + index2) + aDiff.text - + safeMid(text, start_loc + index2); - } else if (aDiff.operation == DELETE) { - // Deletion - text = text.left(start_loc + index2) - + safeMid(text, start_loc + diff_xIndex(diffs, - index1 + aDiff.text.length())); - } + if ( start_loc == -1 ) + { + // No match found. :( + results[ x ] = false; + // Subtract the delta for this failed patch from subsequent patches. + delta -= aPatch.length2 - aPatch.length1; + } + else + { + // Found a match. :) + results[ x ] = true; + delta = start_loc - expected_loc; + QString text2; + if ( end_loc == -1 ) + { + text2 = safeMid( text, start_loc, text1.length() ); + } + else + { + text2 = safeMid( text, start_loc, end_loc + Match_MaxBits - start_loc ); } - if (aDiff.operation != DELETE) { - index1 += aDiff.text.length(); + if ( text1 == text2 ) + { + // Perfect match, just shove the replacement text in. + text = text.left( start_loc ) + diff_text2( aPatch.diffs ) + safeMid( text, start_loc + text1.length() ); + } + else + { + // Imperfect match. Run a diff to get a framework of equivalent + // indices. + QList< Diff > diffs = diff_main( text1, text2, false ); + if ( text1.length() > Match_MaxBits && diff_levenshtein( diffs ) / static_cast< float >( text1.length() ) > Patch_DeleteThreshold ) + { + // The end points match, but the content is unacceptably bad. + results[ x ] = false; + } + else + { + diff_cleanupSemanticLossless( diffs ); + int index1 = 0; + foreach( Diff aDiff, aPatch.diffs ) + { + if ( aDiff.operation != EQUAL ) + { + int index2 = diff_xIndex( diffs, index1 ); + if ( aDiff.operation == INSERT ) + { + // Insertion + text = text.left( start_loc + index2 ) + aDiff.text + safeMid( text, start_loc + index2 ); + } + else if ( aDiff.operation == DELETE ) + { + // Deletion + text = text.left( start_loc + index2 ) + safeMid( text, start_loc + diff_xIndex( diffs, index1 + aDiff.text.length() ) ); + } + } + if ( aDiff.operation != DELETE ) + { + index1 += aDiff.text.length(); + } + } + } } - } } - } - } - x++; - } - // Strip the padding off. - text = safeMid(text, nullPadding.length(), text.length() - - 2 * nullPadding.length()); - return QPair >(text, results); + x++; + } + // Strip the padding off. + text = safeMid( text, nullPadding.length(), text.length() - 2 * nullPadding.length() ); + return QPair< QString, QVector< bool > >( text, results ); } +QString diff_match_patch::patch_addPadding( QList< Patch > &patches ) +{ + short paddingLength = Patch_Margin; + QString nullPadding = ""; + for ( short x = 1; x <= paddingLength; x++ ) + { + nullPadding += QChar( (ushort)x ); + } + + // Bump all the patches forward. + QMutableListIterator< Patch > pointer( patches ); + while ( pointer.hasNext() ) + { + Patch &aPatch = pointer.next(); + aPatch.start1 += paddingLength; + aPatch.start2 += paddingLength; + } + + // Add some padding on start of first diff. + Patch &firstPatch = patches.first(); + QList< Diff > &firstPatchDiffs = firstPatch.diffs; + if ( firstPatchDiffs.empty() || firstPatchDiffs.first().operation != EQUAL ) + { + // Add nullPadding equality. + firstPatchDiffs.prepend( Diff( EQUAL, nullPadding ) ); + firstPatch.start1 -= paddingLength; // Should be 0. + firstPatch.start2 -= paddingLength; // Should be 0. + firstPatch.length1 += paddingLength; + firstPatch.length2 += paddingLength; + } + else if ( paddingLength > firstPatchDiffs.first().text.length() ) + { + // Grow first equality. + Diff &firstDiff = firstPatchDiffs.first(); + int extraLength = paddingLength - firstDiff.text.length(); + firstDiff.text = safeMid( nullPadding, firstDiff.text.length(), paddingLength - firstDiff.text.length() ) + firstDiff.text; + firstPatch.start1 -= extraLength; + firstPatch.start2 -= extraLength; + firstPatch.length1 += extraLength; + firstPatch.length2 += extraLength; + } + + // Add some padding on end of last diff. + Patch &lastPatch = patches.first(); + QList< Diff > &lastPatchDiffs = lastPatch.diffs; + if ( lastPatchDiffs.empty() || lastPatchDiffs.last().operation != EQUAL ) + { + // Add nullPadding equality. + lastPatchDiffs.append( Diff( EQUAL, nullPadding ) ); + lastPatch.length1 += paddingLength; + lastPatch.length2 += paddingLength; + } + else if ( paddingLength > lastPatchDiffs.last().text.length() ) + { + // Grow last equality. + Diff &lastDiff = lastPatchDiffs.last(); + int extraLength = paddingLength - lastDiff.text.length(); + lastDiff.text += nullPadding.left( extraLength ); + lastPatch.length1 += extraLength; + lastPatch.length2 += extraLength; + } -QString diff_match_patch::patch_addPadding(QList &patches) { - short paddingLength = Patch_Margin; - QString nullPadding = ""; - for (short x = 1; x <= paddingLength; x++) { - nullPadding += QChar((ushort)x); - } - - // Bump all the patches forward. - QMutableListIterator pointer(patches); - while (pointer.hasNext()) { - Patch &aPatch = pointer.next(); - aPatch.start1 += paddingLength; - aPatch.start2 += paddingLength; - } - - // Add some padding on start of first diff. - Patch &firstPatch = patches.first(); - QList &firstPatchDiffs = firstPatch.diffs; - if (firstPatchDiffs.empty() || firstPatchDiffs.first().operation != EQUAL) { - // Add nullPadding equality. - firstPatchDiffs.prepend(Diff(EQUAL, nullPadding)); - firstPatch.start1 -= paddingLength; // Should be 0. - firstPatch.start2 -= paddingLength; // Should be 0. - firstPatch.length1 += paddingLength; - firstPatch.length2 += paddingLength; - } else if (paddingLength > firstPatchDiffs.first().text.length()) { - // Grow first equality. - Diff &firstDiff = firstPatchDiffs.first(); - int extraLength = paddingLength - firstDiff.text.length(); - firstDiff.text = safeMid(nullPadding, firstDiff.text.length(), - paddingLength - firstDiff.text.length()) + firstDiff.text; - firstPatch.start1 -= extraLength; - firstPatch.start2 -= extraLength; - firstPatch.length1 += extraLength; - firstPatch.length2 += extraLength; - } - - // Add some padding on end of last diff. - Patch &lastPatch = patches.first(); - QList &lastPatchDiffs = lastPatch.diffs; - if (lastPatchDiffs.empty() || lastPatchDiffs.last().operation != EQUAL) { - // Add nullPadding equality. - lastPatchDiffs.append(Diff(EQUAL, nullPadding)); - lastPatch.length1 += paddingLength; - lastPatch.length2 += paddingLength; - } else if (paddingLength > lastPatchDiffs.last().text.length()) { - // Grow last equality. - Diff &lastDiff = lastPatchDiffs.last(); - int extraLength = paddingLength - lastDiff.text.length(); - lastDiff.text += nullPadding.left(extraLength); - lastPatch.length1 += extraLength; - lastPatch.length2 += extraLength; - } - - return nullPadding; + return nullPadding; } +void diff_match_patch::patch_splitMax( QList< Patch > &patches ) +{ + short patch_size = Match_MaxBits; + QString precontext, postcontext; + Patch patch; + int start1, start2; + bool empty; + Operation diff_type; + QString diff_text; + QMutableListIterator< Patch > pointer( patches ); + Patch bigpatch; + + if ( pointer.hasNext() ) + { + bigpatch = pointer.next(); + } -void diff_match_patch::patch_splitMax(QList &patches) { - short patch_size = Match_MaxBits; - QString precontext, postcontext; - Patch patch; - int start1, start2; - bool empty; - Operation diff_type; - QString diff_text; - QMutableListIterator pointer(patches); - Patch bigpatch; - - if (pointer.hasNext()) { - bigpatch = pointer.next(); - } - - while (!bigpatch.isNull()) { - if (bigpatch.length1 <= patch_size) { - bigpatch = pointer.hasNext() ? pointer.next() : Patch(); - continue; - } - // Remove the big old patch. - pointer.remove(); - start1 = bigpatch.start1; - start2 = bigpatch.start2; - precontext = ""; - while (!bigpatch.diffs.isEmpty()) { - // Create one of several smaller patches. - patch = Patch(); - empty = true; - patch.start1 = start1 - precontext.length(); - patch.start2 = start2 - precontext.length(); - if (!precontext.isEmpty()) { - patch.length1 = patch.length2 = precontext.length(); - patch.diffs.append(Diff(EQUAL, precontext)); - } - while (!bigpatch.diffs.isEmpty() - && patch.length1 < patch_size - Patch_Margin) { - diff_type = bigpatch.diffs.front().operation; - diff_text = bigpatch.diffs.front().text; - if (diff_type == INSERT) { - // Insertions are harmless. - patch.length2 += diff_text.length(); - start2 += diff_text.length(); - patch.diffs.append(bigpatch.diffs.front()); - bigpatch.diffs.removeFirst(); - empty = false; - } else if (diff_type == DELETE && patch.diffs.size() == 1 - && patch.diffs.front().operation == EQUAL - && diff_text.length() > 2 * patch_size) { - // This is a large deletion. Let it pass in one chunk. - patch.length1 += diff_text.length(); - start1 += diff_text.length(); - empty = false; - patch.diffs.append(Diff(diff_type, diff_text)); - bigpatch.diffs.removeFirst(); - } else { - // Deletion or equality. Only take as much as we can stomach. - diff_text = diff_text.left(std::min(diff_text.length(), - patch_size - patch.length1 - Patch_Margin)); - patch.length1 += diff_text.length(); - start1 += diff_text.length(); - if (diff_type == EQUAL) { - patch.length2 += diff_text.length(); - start2 += diff_text.length(); - } else { - empty = false; - } - patch.diffs.append(Diff(diff_type, diff_text)); - if (diff_text == bigpatch.diffs.front().text) { - bigpatch.diffs.removeFirst(); - } else { - bigpatch.diffs.front().text = safeMid(bigpatch.diffs.front().text, - diff_text.length()); - } + while ( !bigpatch.isNull() ) + { + if ( bigpatch.length1 <= patch_size ) + { + bigpatch = pointer.hasNext() ? pointer.next() : Patch(); + continue; } - } - // Compute the head context for the next patch. - precontext = diff_text2(patch.diffs); - precontext = safeMid(precontext, precontext.length() - Patch_Margin); - // Append the end context for this patch. - if (diff_text1(bigpatch.diffs).length() > Patch_Margin) { - postcontext = diff_text1(bigpatch.diffs).left(Patch_Margin); - } else { - postcontext = diff_text1(bigpatch.diffs); - } - if (!postcontext.isEmpty()) { - patch.length1 += postcontext.length(); - patch.length2 += postcontext.length(); - if (!patch.diffs.isEmpty() - && patch.diffs.back().operation == EQUAL) { - patch.diffs.back().text += postcontext; - } else { - patch.diffs.append(Diff(EQUAL, postcontext)); + // Remove the big old patch. + pointer.remove(); + start1 = bigpatch.start1; + start2 = bigpatch.start2; + precontext = ""; + while ( !bigpatch.diffs.isEmpty() ) + { + // Create one of several smaller patches. + patch = Patch(); + empty = true; + patch.start1 = start1 - precontext.length(); + patch.start2 = start2 - precontext.length(); + if ( !precontext.isEmpty() ) + { + patch.length1 = patch.length2 = precontext.length(); + patch.diffs.append( Diff( EQUAL, precontext ) ); + } + while ( !bigpatch.diffs.isEmpty() && patch.length1 < patch_size - Patch_Margin ) + { + diff_type = bigpatch.diffs.front().operation; + diff_text = bigpatch.diffs.front().text; + if ( diff_type == INSERT ) + { + // Insertions are harmless. + patch.length2 += diff_text.length(); + start2 += diff_text.length(); + patch.diffs.append( bigpatch.diffs.front() ); + bigpatch.diffs.removeFirst(); + empty = false; + } + else if ( diff_type == DELETE && patch.diffs.size() == 1 && patch.diffs.front().operation == EQUAL && diff_text.length() > 2 * patch_size ) + { + // This is a large deletion. Let it pass in one chunk. + patch.length1 += diff_text.length(); + start1 += diff_text.length(); + empty = false; + patch.diffs.append( Diff( diff_type, diff_text ) ); + bigpatch.diffs.removeFirst(); + } + else + { + // Deletion or equality. Only take as much as we can stomach. + diff_text = diff_text.left( std::min( diff_text.length(), patch_size - patch.length1 - Patch_Margin ) ); + patch.length1 += diff_text.length(); + start1 += diff_text.length(); + if ( diff_type == EQUAL ) + { + patch.length2 += diff_text.length(); + start2 += diff_text.length(); + } + else + { + empty = false; + } + patch.diffs.append( Diff( diff_type, diff_text ) ); + if ( diff_text == bigpatch.diffs.front().text ) + { + bigpatch.diffs.removeFirst(); + } + else + { + bigpatch.diffs.front().text = safeMid( bigpatch.diffs.front().text, diff_text.length() ); + } + } + } + // Compute the head context for the next patch. + precontext = diff_text2( patch.diffs ); + precontext = safeMid( precontext, precontext.length() - Patch_Margin ); + // Append the end context for this patch. + if ( diff_text1( bigpatch.diffs ).length() > Patch_Margin ) + { + postcontext = diff_text1( bigpatch.diffs ).left( Patch_Margin ); + } + else + { + postcontext = diff_text1( bigpatch.diffs ); + } + if ( !postcontext.isEmpty() ) + { + patch.length1 += postcontext.length(); + patch.length2 += postcontext.length(); + if ( !patch.diffs.isEmpty() && patch.diffs.back().operation == EQUAL ) + { + patch.diffs.back().text += postcontext; + } + else + { + patch.diffs.append( Diff( EQUAL, postcontext ) ); + } + } + if ( !empty ) + { + pointer.insert( patch ); + } } - } - if (!empty) { - pointer.insert(patch); - } + bigpatch = pointer.hasNext() ? pointer.next() : Patch(); } - bigpatch = pointer.hasNext() ? pointer.next() : Patch(); - } } - -QString diff_match_patch::patch_toText(const QList &patches) { - QString text; - foreach(Patch aPatch, patches) { - text.append(aPatch.toString()); - } - return text; +QString diff_match_patch::patch_toText( const QList< Patch > &patches ) +{ + QString text; + foreach( Patch aPatch, patches ) + { + text.append( aPatch.toString() ); + } + return text; } +QList< Patch > diff_match_patch::patch_fromText( const QString &textline ) +{ + QList< Patch > patches; + if ( textline.isEmpty() ) + { + return patches; + } + QStringList text = textline.split( "\n", QString::SkipEmptyParts ); + Patch patch; + QRegExp patchHeader( "^@@ -(\\d+),?(\\d*) \\+(\\d+),?(\\d*) @@$" ); + char sign; + QString line; + while ( !text.isEmpty() ) + { + if ( !patchHeader.exactMatch( text.front() ) ) + { + throw QString( "Invalid patch string: %1" ).arg( text.front() ); + } + + patch = Patch(); + patch.start1 = patchHeader.cap( 1 ).toInt(); + if ( patchHeader.cap( 2 ).isEmpty() ) + { + patch.start1--; + patch.length1 = 1; + } + else if ( patchHeader.cap( 2 ) == "0" ) + { + patch.length1 = 0; + } + else + { + patch.start1--; + patch.length1 = patchHeader.cap( 2 ).toInt(); + } -QList diff_match_patch::patch_fromText(const QString &textline) { - QList patches; - if (textline.isEmpty()) { - return patches; - } - QStringList text = textline.split("\n", QString::SkipEmptyParts); - Patch patch; - QRegExp patchHeader("^@@ -(\\d+),?(\\d*) \\+(\\d+),?(\\d*) @@$"); - char sign; - QString line; - while (!text.isEmpty()) { - if (!patchHeader.exactMatch(text.front())) { - throw QString("Invalid patch string: %1").arg(text.front()); - } - - patch = Patch(); - patch.start1 = patchHeader.cap(1).toInt(); - if (patchHeader.cap(2).isEmpty()) { - patch.start1--; - patch.length1 = 1; - } else if (patchHeader.cap(2) == "0") { - patch.length1 = 0; - } else { - patch.start1--; - patch.length1 = patchHeader.cap(2).toInt(); - } - - patch.start2 = patchHeader.cap(3).toInt(); - if (patchHeader.cap(4).isEmpty()) { - patch.start2--; - patch.length2 = 1; - } else if (patchHeader.cap(4) == "0") { - patch.length2 = 0; - } else { - patch.start2--; - patch.length2 = patchHeader.cap(4).toInt(); - } - text.removeFirst(); - - while (!text.isEmpty()) { - if (text.front().isEmpty()) { + patch.start2 = patchHeader.cap( 3 ).toInt(); + if ( patchHeader.cap( 4 ).isEmpty() ) + { + patch.start2--; + patch.length2 = 1; + } + else if ( patchHeader.cap( 4 ) == "0" ) + { + patch.length2 = 0; + } + else + { + patch.start2--; + patch.length2 = patchHeader.cap( 4 ).toInt(); + } text.removeFirst(); - continue; - } - sign = text.front()[0].toAscii(); - line = safeMid(text.front(), 1); - line = line.replace("+", "%2B"); // decode would change all "+" to " " - line = QUrl::fromPercentEncoding(qPrintable(line)); - if (sign == '-') { - // Deletion. - patch.diffs.append(Diff(DELETE, line)); - } else if (sign == '+') { - // Insertion. - patch.diffs.append(Diff(INSERT, line)); - } else if (sign == ' ') { - // Minor equality. - patch.diffs.append(Diff(EQUAL, line)); - } else if (sign == '@') { - // Start of next patch. - break; - } else { - // WTF? - throw QString("Invalid patch mode '%1' in: %2").arg(sign).arg(line); - return QList(); - } - text.removeFirst(); - } - - patches.append(patch); - - } - return patches; + + while ( !text.isEmpty() ) + { + if ( text.front().isEmpty() ) + { + text.removeFirst(); + continue; + } + sign = text.front()[ 0 ].toAscii(); + line = safeMid( text.front(), 1 ); + line = line.replace( "+", "%2B" ); // decode would change all "+" to " " + line = QUrl::fromPercentEncoding( qPrintable( line ) ); + if ( sign == '-' ) + { + // Deletion. + patch.diffs.append( Diff( DELETE, line ) ); + } + else if ( sign == '+' ) + { + // Insertion. + patch.diffs.append( Diff( INSERT, line ) ); + } + else if ( sign == ' ' ) + { + // Minor equality. + patch.diffs.append( Diff( EQUAL, line ) ); + } + else if ( sign == '@' ) + { + // Start of next patch. + break; + } + else + { + // WTF? + throw QString( "Invalid patch mode '%1' in: %2" ).arg( sign ).arg( line ); + return QList< Patch >(); + } + text.removeFirst(); + } + + patches.append( patch ); + } + return patches; } diff --git a/cpp/diff_match_patch.h b/cpp/diff_match_patch.h index 82d32832..ee9ec44a 100644 --- a/cpp/diff_match_patch.h +++ b/cpp/diff_match_patch.h @@ -56,111 +56,109 @@ */ - /**- * The data structure representing a diff is a Linked list of Diff objects: * {Diff(Operation.DELETE, "Hello"), Diff(Operation.INSERT, "Goodbye"), * Diff(Operation.EQUAL, " world.")} * which means: delete "Hello", add "Goodbye" and keep " world." */ -enum Operation { - DELETE, INSERT, EQUAL +enum Operation +{ + DELETE, + INSERT, + EQUAL }; - /** * Class representing one diff operation. */ -class Diff { - public: - Operation operation; - // One of: INSERT, DELETE or EQUAL. - QString text; - // The text associated with this diff operation. - - /** +class Diff +{ +public: + Operation operation; + // One of: INSERT, DELETE or EQUAL. + QString text; + // The text associated with this diff operation. + + /** * Constructor. Initializes the diff with the provided values. * @param operation One of INSERT, DELETE or EQUAL. * @param text The text being applied. */ - Diff(Operation _operation, const QString &_text); - Diff(); - inline bool isNull() const; - QString toString() const; - bool operator==(const Diff &d) const; - bool operator!=(const Diff &d) const; + Diff( Operation _operation, const QString &_text ); + Diff(); + inline bool isNull() const; + QString toString() const; + bool operator==( const Diff &d ) const; + bool operator!=( const Diff &d ) const; - static QString strOperation(Operation op); + static QString strOperation( Operation op ); }; - /** * Class representing one patch operation. */ -class Patch { - public: - QList diffs; - int start1; - int start2; - int length1; - int length2; - - /** +class Patch +{ +public: + QList< Diff > diffs; + int start1; + int start2; + int length1; + int length2; + + /** * Constructor. Initializes with an empty list of diffs. */ - Patch(); - bool isNull() const; - QString toString(); + Patch(); + bool isNull() const; + QString toString(); }; - /** * Class containing the diff, match and patch methods. * Also contains the behaviour settings. */ -class diff_match_patch { - - friend class diff_match_patch_test; - - public: - // Defaults. - // Set these on your diff_match_patch instance to override the defaults. - - // Number of seconds to map a diff before giving up (0 for infinity). - float Diff_Timeout; - // Cost of an empty edit operation in terms of edit characters. - short Diff_EditCost; - // At what point is no match declared (0.0 = perfection, 1.0 = very loose). - float Match_Threshold; - // How far to search for a match (0 = exact location, 1000+ = broad match). - // A match this many characters away from the expected location will add - // 1.0 to the score (0.0 is a perfect match). - int Match_Distance; - // When deleting a large block of text (over ~64 characters), how close does - // the contents have to match the expected contents. (0.0 = perfection, - // 1.0 = very loose). Note that Match_Threshold controls how closely the - // end points of a delete need to match. - float Patch_DeleteThreshold; - // Chunk size for context length. - short Patch_Margin; - - // The number of bits in an int. - short Match_MaxBits; - - private: - // Define some regex patterns for matching boundaries. - static QRegExp BLANKLINEEND; - static QRegExp BLANKLINESTART; - - - public: - - diff_match_patch(); - - // DIFF FUNCTIONS - - - /** +class diff_match_patch +{ + friend class diff_match_patch_test; + +public: + // Defaults. + // Set these on your diff_match_patch instance to override the defaults. + + // Number of seconds to map a diff before giving up (0 for infinity). + float Diff_Timeout; + // Cost of an empty edit operation in terms of edit characters. + short Diff_EditCost; + // At what point is no match declared (0.0 = perfection, 1.0 = very loose). + float Match_Threshold; + // How far to search for a match (0 = exact location, 1000+ = broad match). + // A match this many characters away from the expected location will add + // 1.0 to the score (0.0 is a perfect match). + int Match_Distance; + // When deleting a large block of text (over ~64 characters), how close does + // the contents have to match the expected contents. (0.0 = perfection, + // 1.0 = very loose). Note that Match_Threshold controls how closely the + // end points of a delete need to match. + float Patch_DeleteThreshold; + // Chunk size for context length. + short Patch_Margin; + + // The number of bits in an int. + short Match_MaxBits; + +private: + // Define some regex patterns for matching boundaries. + static QRegExp BLANKLINEEND; + static QRegExp BLANKLINESTART; + +public: + diff_match_patch(); + + // DIFF FUNCTIONS + + /** * Find the differences between two texts. * Run a faster slightly less optimal diff. * This method allows the 'checklines' of diff_main() to be optional. @@ -169,9 +167,9 @@ class diff_match_patch { * @param text2 New string to be diffed. * @return Linked List of Diff objects. */ - QList diff_main(const QString &text1, const QString &text2); + QList< Diff > diff_main( const QString &text1, const QString &text2 ); - /** + /** * Find the differences between two texts. * @param text1 Old string to be diffed. * @param text2 New string to be diffed. @@ -180,9 +178,9 @@ class diff_match_patch { * If true, then run a faster slightly less optimal diff. * @return Linked List of Diff objects. */ - QList diff_main(const QString &text1, const QString &text2, bool checklines); + QList< Diff > diff_main( const QString &text1, const QString &text2, bool checklines ); - /** + /** * Find the differences between two texts. Simplifies the problem by * stripping any common prefix or suffix off the texts before diffing. * @param text1 Old string to be diffed. @@ -194,10 +192,10 @@ class diff_match_patch { * internally for recursive calls. Users should set DiffTimeout instead. * @return Linked List of Diff objects. */ - private: - QList diff_main(const QString &text1, const QString &text2, bool checklines, clock_t deadline); +private: + QList< Diff > diff_main( const QString &text1, const QString &text2, bool checklines, clock_t deadline ); - /** + /** * Find the differences between two texts. Assumes that the texts do not * have any common prefix or suffix. * @param text1 Old string to be diffed. @@ -208,10 +206,10 @@ class diff_match_patch { * @param deadline Time when the diff should be complete by. * @return Linked List of Diff objects. */ - private: - QList diff_compute(QString text1, QString text2, bool checklines, clock_t deadline); +private: + QList< Diff > diff_compute( QString text1, QString text2, bool checklines, clock_t deadline ); - /** + /** * Do a quick line-level diff on both strings, then rediff the parts for * greater accuracy. * This speedup can produce non-minimal diffs. @@ -220,10 +218,10 @@ class diff_match_patch { * @param deadline Time when the diff should be complete by. * @return Linked List of Diff objects. */ - private: - QList diff_lineMode(QString text1, QString text2, clock_t deadline); +private: + QList< Diff > diff_lineMode( QString text1, QString text2, clock_t deadline ); - /** + /** * Find the 'middle snake' of a diff, split the problem in two * and return the recursively constructed diff. * See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations. @@ -231,10 +229,10 @@ class diff_match_patch { * @param text2 New string to be diffed. * @return Linked List of Diff objects. */ - protected: - QList diff_bisect(const QString &text1, const QString &text2, clock_t deadline); +protected: + QList< Diff > diff_bisect( const QString &text1, const QString &text2, clock_t deadline ); - /** + /** * Given the location of the 'middle snake', split the diff in two parts * and recurse. * @param text1 Old string to be diffed. @@ -244,10 +242,10 @@ class diff_match_patch { * @param deadline Time at which to bail if not yet complete. * @return LinkedList of Diff objects. */ - private: - QList diff_bisectSplit(const QString &text1, const QString &text2, int x, int y, clock_t deadline); +private: + QList< Diff > diff_bisectSplit( const QString &text1, const QString &text2, int x, int y, clock_t deadline ); - /** + /** * Split two texts into a list of strings. Reduce the texts to a string of * hashes where each Unicode character represents one line. * @param text1 First string. @@ -256,10 +254,10 @@ class diff_match_patch { * encoded text2 and the List of unique strings. The zeroth element * of the List of unique strings is intentionally blank. */ - protected: - QList diff_linesToChars(const QString &text1, const QString &text2); // return elems 0 and 1 are QString, elem 2 is QStringList +protected: + QList< QVariant > diff_linesToChars( const QString &text1, const QString &text2 ); // return elems 0 and 1 are QString, elem 2 is QStringList - /** + /** * Split a text into a list of strings. Reduce the texts to a string of * hashes where each Unicode character represents one line. * @param text String to encode. @@ -267,48 +265,47 @@ class diff_match_patch { * @param lineHash Map of strings to indices. * @return Encoded string. */ - private: - QString diff_linesToCharsMunge(const QString &text, QStringList &lineArray, - QMap &lineHash); +private: + QString diff_linesToCharsMunge( const QString &text, QStringList &lineArray, QMap< QString, int > &lineHash ); - /** + /** * Rehydrate the text in a diff from a string of line hashes to real lines of * text. * @param diffs LinkedList of Diff objects. * @param lineArray List of unique strings. */ - private: - void diff_charsToLines(QList &diffs, const QStringList &lineArray); +private: + void diff_charsToLines( QList< Diff > &diffs, const QStringList &lineArray ); - /** + /** * Determine the common prefix of two strings. * @param text1 First string. * @param text2 Second string. * @return The number of characters common to the start of each string. */ - public: - int diff_commonPrefix(const QString &text1, const QString &text2); +public: + int diff_commonPrefix( const QString &text1, const QString &text2 ); - /** + /** * Determine the common suffix of two strings. * @param text1 First string. * @param text2 Second string. * @return The number of characters common to the end of each string. */ - public: - int diff_commonSuffix(const QString &text1, const QString &text2); +public: + int diff_commonSuffix( const QString &text1, const QString &text2 ); - /** + /** * Determine if the suffix of one string is the prefix of another. * @param text1 First string. * @param text2 Second string. * @return The number of characters common to the end of the first * string and the start of the second string. */ - protected: - int diff_commonOverlap(const QString &text1, const QString &text2); +protected: + int diff_commonOverlap( const QString &text1, const QString &text2 ); - /** + /** * Do the two texts share a substring which is at least half the length of * the longer text? * This speedup can produce non-minimal diffs. @@ -318,10 +315,10 @@ class diff_match_patch { * suffix of text1, the prefix of text2, the suffix of text2 and the * common middle. Or null if there was no match. */ - protected: - QStringList diff_halfMatch(const QString &text1, const QString &text2); +protected: + QStringList diff_halfMatch( const QString &text1, const QString &text2 ); - /** + /** * Does a substring of shorttext exist within longtext such that the * substring is at least half the length of longtext? * @param longtext Longer string. @@ -331,26 +328,26 @@ class diff_match_patch { * suffix of longtext, the prefix of shorttext, the suffix of shorttext * and the common middle. Or null if there was no match. */ - private: - QStringList diff_halfMatchI(const QString &longtext, const QString &shorttext, int i); +private: + QStringList diff_halfMatchI( const QString &longtext, const QString &shorttext, int i ); - /** + /** * Reduce the number of edits by eliminating semantically trivial equalities. * @param diffs LinkedList of Diff objects. */ - public: - void diff_cleanupSemantic(QList &diffs); +public: + void diff_cleanupSemantic( QList< Diff > &diffs ); - /** + /** * Look for single edits surrounded on both sides by equalities * which can be shifted sideways to align the edit to a word boundary. * e.g: The cat came. -> The cat came. * @param diffs LinkedList of Diff objects. */ - public: - void diff_cleanupSemanticLossless(QList &diffs); +public: + void diff_cleanupSemanticLossless( QList< Diff > &diffs ); - /** + /** * Given two strings, compute a score representing whether the internal * boundary falls on logical boundaries. * Scores range from 6 (best) to 0 (worst). @@ -358,25 +355,25 @@ class diff_match_patch { * @param two Second string. * @return The score. */ - private: - int diff_cleanupSemanticScore(const QString &one, const QString &two); +private: + int diff_cleanupSemanticScore( const QString &one, const QString &two ); - /** + /** * Reduce the number of edits by eliminating operationally trivial equalities. * @param diffs LinkedList of Diff objects. */ - public: - void diff_cleanupEfficiency(QList &diffs); +public: + void diff_cleanupEfficiency( QList< Diff > &diffs ); - /** + /** * Reorder and merge like edit sections. Merge equalities. * Any edit section can move as long as it doesn't cross an equality. * @param diffs LinkedList of Diff objects. */ - public: - void diff_cleanupMerge(QList &diffs); +public: + void diff_cleanupMerge( QList< Diff > &diffs ); - /** + /** * loc is a location in text1, compute and return the equivalent location in * text2. * e.g. "The cat" vs "The big cat", 1->1, 5->8 @@ -384,43 +381,43 @@ class diff_match_patch { * @param loc Location within text1. * @return Location within text2. */ - public: - int diff_xIndex(const QList &diffs, int loc); +public: + int diff_xIndex( const QList< Diff > &diffs, int loc ); - /** + /** * Convert a Diff list into a pretty HTML report. * @param diffs LinkedList of Diff objects. * @return HTML representation. */ - public: - QString diff_prettyHtml(const QList &diffs); +public: + QString diff_prettyHtml( const QList< Diff > &diffs ); - /** + /** * Compute and return the source text (all equalities and deletions). * @param diffs LinkedList of Diff objects. * @return Source text. */ - public: - QString diff_text1(const QList &diffs); +public: + QString diff_text1( const QList< Diff > &diffs ); - /** + /** * Compute and return the destination text (all equalities and insertions). * @param diffs LinkedList of Diff objects. * @return Destination text. */ - public: - QString diff_text2(const QList &diffs); +public: + QString diff_text2( const QList< Diff > &diffs ); - /** + /** * Compute the Levenshtein distance; the number of inserted, deleted or * substituted characters. * @param diffs LinkedList of Diff objects. * @return Number of changes. */ - public: - int diff_levenshtein(const QList &diffs); +public: + int diff_levenshtein( const QList< Diff > &diffs ); - /** + /** * Crush the diff into an encoded string which describes the operations * required to transform text1 into text2. * E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'. @@ -428,10 +425,10 @@ class diff_match_patch { * @param diffs Array of diff tuples. * @return Delta text. */ - public: - QString diff_toDelta(const QList &diffs); +public: + QString diff_toDelta( const QList< Diff > &diffs ); - /** + /** * Given the original text1, and an encoded string which describes the * operations required to transform text1 into text2, compute the full diff. * @param text1 Source string for the diff. @@ -439,14 +436,12 @@ class diff_match_patch { * @return Array of diff tuples or null if invalid. * @throws QString If invalid input. */ - public: - QList diff_fromDelta(const QString &text1, const QString &delta); +public: + QList< Diff > diff_fromDelta( const QString &text1, const QString &delta ); + // MATCH FUNCTIONS - // MATCH FUNCTIONS - - - /** + /** * Locate the best instance of 'pattern' in 'text' near 'loc'. * Returns -1 if no match found. * @param text The text to search. @@ -454,10 +449,10 @@ class diff_match_patch { * @param loc The location to search around. * @return Best match index or -1. */ - public: - int match_main(const QString &text, const QString &pattern, int loc); +public: + int match_main( const QString &text, const QString &pattern, int loc ); - /** + /** * Locate the best instance of 'pattern' in 'text' near 'loc' using the * Bitap algorithm. Returns -1 if no match found. * @param text The text to search. @@ -465,10 +460,10 @@ class diff_match_patch { * @param loc The location to search around. * @return Best match index or -1. */ - protected: - int match_bitap(const QString &text, const QString &pattern, int loc); +protected: + int match_bitap( const QString &text, const QString &pattern, int loc ); - /** + /** * Compute and return the score for a match with e errors and x location. * @param e Number of errors in match. * @param x Location of match. @@ -476,50 +471,48 @@ class diff_match_patch { * @param pattern Pattern being sought. * @return Overall score for match (0.0 = good, 1.0 = bad). */ - private: - double match_bitapScore(int e, int x, int loc, const QString &pattern); +private: + double match_bitapScore( int e, int x, int loc, const QString &pattern ); - /** + /** * Initialise the alphabet for the Bitap algorithm. * @param pattern The text to encode. * @return Hash of character locations. */ - protected: - QMap match_alphabet(const QString &pattern); - - - // PATCH FUNCTIONS +protected: + QMap< QChar, int > match_alphabet( const QString &pattern ); + // PATCH FUNCTIONS - /** + /** * Increase the context until it is unique, * but don't let the pattern expand beyond Match_MaxBits. * @param patch The patch to grow. * @param text Source text. */ - protected: - void patch_addContext(Patch &patch, const QString &text); +protected: + void patch_addContext( Patch &patch, const QString &text ); - /** + /** * Compute a list of patches to turn text1 into text2. * A set of diffs will be computed. * @param text1 Old text. * @param text2 New text. * @return LinkedList of Patch objects. */ - public: - QList patch_make(const QString &text1, const QString &text2); +public: + QList< Patch > patch_make( const QString &text1, const QString &text2 ); - /** + /** * Compute a list of patches to turn text1 into text2. * text1 will be derived from the provided diffs. * @param diffs Array of diff tuples for text1 to text2. * @return LinkedList of Patch objects. */ - public: - QList patch_make(const QList &diffs); +public: + QList< Patch > patch_make( const QList< Diff > &diffs ); - /** + /** * Compute a list of patches to turn text1 into text2. * text2 is ignored, diffs are the delta between text1 and text2. * @param text1 Old text. @@ -528,28 +521,28 @@ class diff_match_patch { * @return LinkedList of Patch objects. * @deprecated Prefer patch_make(const QString &text1, const QList &diffs). */ - public: - QList patch_make(const QString &text1, const QString &text2, const QList &diffs); +public: + QList< Patch > patch_make( const QString &text1, const QString &text2, const QList< Diff > &diffs ); - /** + /** * Compute a list of patches to turn text1 into text2. * text2 is not provided, diffs are the delta between text1 and text2. * @param text1 Old text. * @param diffs Array of diff tuples for text1 to text2. * @return LinkedList of Patch objects. */ - public: - QList patch_make(const QString &text1, const QList &diffs); +public: + QList< Patch > patch_make( const QString &text1, const QList< Diff > &diffs ); - /** + /** * Given an array of patches, return another array that is identical. * @param patches Array of patch objects. * @return Array of patch objects. */ - public: - QList patch_deepCopy(QList &patches); +public: + QList< Patch > patch_deepCopy( QList< Patch > &patches ); - /** + /** * Merge a set of patches onto the text. Return a patched text, as well * as an array of true/false values indicating which patches were applied. * @param patches Array of patch objects. @@ -557,58 +550,56 @@ class diff_match_patch { * @return Two element Object array, containing the new text and an array of * boolean values. */ - public: - QPair > patch_apply(QList &patches, const QString &text); +public: + QPair< QString, QVector< bool > > patch_apply( QList< Patch > &patches, const QString &text ); - /** + /** * Add some padding on text start and end so that edges can match something. * Intended to be called only from within patch_apply. * @param patches Array of patch objects. * @return The padding string added to each side. */ - public: - QString patch_addPadding(QList &patches); +public: + QString patch_addPadding( QList< Patch > &patches ); - /** + /** * Look through the patches and break up any which are longer than the * maximum limit of the match algorithm. * Intended to be called only from within patch_apply. * @param patches LinkedList of Patch objects. */ - public: - void patch_splitMax(QList &patches); +public: + void patch_splitMax( QList< Patch > &patches ); - /** + /** * Take a list of patches and return a textual representation. * @param patches List of Patch objects. * @return Text representation of patches. */ - public: - QString patch_toText(const QList &patches); +public: + QString patch_toText( const QList< Patch > &patches ); - /** + /** * Parse a textual representation of patches and return a List of Patch * objects. * @param textline Text representation of patches. * @return List of Patch objects. * @throws QString If invalid input. */ - public: - QList patch_fromText(const QString &textline); +public: + QList< Patch > patch_fromText( const QString &textline ); - /** + /** * A safer version of QString.mid(pos). This one returns "" instead of * null when the postion equals the string length. * @param str String to take a substring from. * @param pos Position to start the substring from. * @return Substring. */ - private: - static inline QString safeMid(const QString &str, int pos) { - return (pos == str.length()) ? QString("") : str.mid(pos); - } +private: + static inline QString safeMid( const QString &str, int pos ) { return ( pos == str.length() ) ? QString( "" ) : str.mid( pos ); } - /** + /** * A safer version of QString.mid(pos, len). This one returns "" instead of * null when the postion equals the string length. * @param str String to take a substring from. @@ -616,10 +607,8 @@ class diff_match_patch { * @param len Length of substring. * @return Substring. */ - private: - static inline QString safeMid(const QString &str, int pos, int len) { - return (pos == str.length()) ? QString("") : str.mid(pos, len); - } +private: + static inline QString safeMid( const QString &str, int pos, int len ) { return ( pos == str.length() ) ? QString( "" ) : str.mid( pos, len ); } }; -#endif // DIFF_MATCH_PATCH_H +#endif // DIFF_MATCH_PATCH_H diff --git a/cpp/diff_match_patch_test.cpp b/cpp/diff_match_patch_test.cpp index f75b1cd0..0d0a39c3 100644 --- a/cpp/diff_match_patch_test.cpp +++ b/cpp/diff_match_patch_test.cpp @@ -21,419 +21,445 @@ #include "diff_match_patch.h" #include "diff_match_patch_test.h" -int main(int argc, char **argv) { - diff_match_patch_test dmp_test; - qDebug("Starting diff_match_patch unit tests."); - dmp_test.run_all_tests(); - qDebug("Done."); - return 0; - Q_UNUSED(argc) - Q_UNUSED(argv) +int main( int argc, char **argv ) +{ + diff_match_patch_test dmp_test; + qDebug( "Starting diff_match_patch unit tests." ); + dmp_test.run_all_tests(); + qDebug( "Done." ); + return 0; + Q_UNUSED( argc ) + Q_UNUSED( argv ) } - -diff_match_patch_test::diff_match_patch_test() { +diff_match_patch_test::diff_match_patch_test() +{ } -void diff_match_patch_test::run_all_tests() { - QTime t; - t.start(); - try { - testDiffCommonPrefix(); - testDiffCommonSuffix(); - testDiffCommonOverlap(); - testDiffHalfmatch(); - testDiffLinesToChars(); - testDiffCharsToLines(); - testDiffCleanupMerge(); - testDiffCleanupSemanticLossless(); - testDiffCleanupSemantic(); - testDiffCleanupEfficiency(); - testDiffPrettyHtml(); - testDiffText(); - testDiffDelta(); - testDiffXIndex(); - testDiffLevenshtein(); - testDiffBisect(); - testDiffMain(); - - testMatchAlphabet(); - testMatchBitap(); - testMatchMain(); - - testPatchObj(); - testPatchFromText(); - testPatchToText(); - testPatchAddContext(); - testPatchMake(); - testPatchSplitMax(); - testPatchAddPadding(); - testPatchApply(); - qDebug("All tests passed."); - } catch (QString strCase) { - qDebug("Test failed: %s", qPrintable(strCase)); - } - qDebug("Total time: %d ms", t.elapsed()); +void diff_match_patch_test::run_all_tests() +{ + QTime t; + t.start(); + try + { + testDiffCommonPrefix(); + testDiffCommonSuffix(); + testDiffCommonOverlap(); + testDiffHalfmatch(); + testDiffLinesToChars(); + testDiffCharsToLines(); + testDiffCleanupMerge(); + testDiffCleanupSemanticLossless(); + testDiffCleanupSemantic(); + testDiffCleanupEfficiency(); + testDiffPrettyHtml(); + testDiffText(); + testDiffDelta(); + testDiffXIndex(); + testDiffLevenshtein(); + testDiffBisect(); + testDiffMain(); + + testMatchAlphabet(); + testMatchBitap(); + testMatchMain(); + + testPatchObj(); + testPatchFromText(); + testPatchToText(); + testPatchAddContext(); + testPatchMake(); + testPatchSplitMax(); + testPatchAddPadding(); + testPatchApply(); + qDebug( "All tests passed." ); + } + catch ( QString strCase ) + { + qDebug( "Test failed: %s", qPrintable( strCase ) ); + } + qDebug( "Total time: %d ms", t.elapsed() ); } // DIFF TEST FUNCTIONS -void diff_match_patch_test::testDiffCommonPrefix() { - // Detect any common prefix. - assertEquals("diff_commonPrefix: Null case.", 0, dmp.diff_commonPrefix("abc", "xyz")); +void diff_match_patch_test::testDiffCommonPrefix() +{ + // Detect any common prefix. + assertEquals( "diff_commonPrefix: Null case.", 0, dmp.diff_commonPrefix( "abc", "xyz" ) ); - assertEquals("diff_commonPrefix: Non-null case.", 4, dmp.diff_commonPrefix("1234abcdef", "1234xyz")); + assertEquals( "diff_commonPrefix: Non-null case.", 4, dmp.diff_commonPrefix( "1234abcdef", "1234xyz" ) ); - assertEquals("diff_commonPrefix: Whole case.", 4, dmp.diff_commonPrefix("1234", "1234xyz")); + assertEquals( "diff_commonPrefix: Whole case.", 4, dmp.diff_commonPrefix( "1234", "1234xyz" ) ); } -void diff_match_patch_test::testDiffCommonSuffix() { - // Detect any common suffix. - assertEquals("diff_commonSuffix: Null case.", 0, dmp.diff_commonSuffix("abc", "xyz")); +void diff_match_patch_test::testDiffCommonSuffix() +{ + // Detect any common suffix. + assertEquals( "diff_commonSuffix: Null case.", 0, dmp.diff_commonSuffix( "abc", "xyz" ) ); - assertEquals("diff_commonSuffix: Non-null case.", 4, dmp.diff_commonSuffix("abcdef1234", "xyz1234")); + assertEquals( "diff_commonSuffix: Non-null case.", 4, dmp.diff_commonSuffix( "abcdef1234", "xyz1234" ) ); - assertEquals("diff_commonSuffix: Whole case.", 4, dmp.diff_commonSuffix("1234", "xyz1234")); + assertEquals( "diff_commonSuffix: Whole case.", 4, dmp.diff_commonSuffix( "1234", "xyz1234" ) ); } -void diff_match_patch_test::testDiffCommonOverlap() { - // Detect any suffix/prefix overlap. - assertEquals("diff_commonOverlap: Null case.", 0, dmp.diff_commonOverlap("", "abcd")); +void diff_match_patch_test::testDiffCommonOverlap() +{ + // Detect any suffix/prefix overlap. + assertEquals( "diff_commonOverlap: Null case.", 0, dmp.diff_commonOverlap( "", "abcd" ) ); - assertEquals("diff_commonOverlap: Whole case.", 3, dmp.diff_commonOverlap("abc", "abcd")); + assertEquals( "diff_commonOverlap: Whole case.", 3, dmp.diff_commonOverlap( "abc", "abcd" ) ); - assertEquals("diff_commonOverlap: No overlap.", 0, dmp.diff_commonOverlap("123456", "abcd")); + assertEquals( "diff_commonOverlap: No overlap.", 0, dmp.diff_commonOverlap( "123456", "abcd" ) ); - assertEquals("diff_commonOverlap: Overlap.", 3, dmp.diff_commonOverlap("123456xxx", "xxxabcd")); + assertEquals( "diff_commonOverlap: Overlap.", 3, dmp.diff_commonOverlap( "123456xxx", "xxxabcd" ) ); - // Some overly clever languages (C#) may treat ligatures as equal to their - // component letters. E.g. U+FB01 == 'fi' - assertEquals("diff_commonOverlap: Unicode.", 0, dmp.diff_commonOverlap("fi", QString::fromWCharArray((const wchar_t*) L"\ufb01i", 2))); + // Some overly clever languages (C#) may treat ligatures as equal to their + // component letters. E.g. U+FB01 == 'fi' + assertEquals( "diff_commonOverlap: Unicode.", 0, dmp.diff_commonOverlap( "fi", QString::fromWCharArray( (const wchar_t *)L"\ufb01i", 2 ) ) ); } -void diff_match_patch_test::testDiffHalfmatch() { - // Detect a halfmatch. - dmp.Diff_Timeout = 1; - assertEmpty("diff_halfMatch: No match #1.", dmp.diff_halfMatch("1234567890", "abcdef")); +void diff_match_patch_test::testDiffHalfmatch() +{ + // Detect a halfmatch. + dmp.Diff_Timeout = 1; + assertEmpty( "diff_halfMatch: No match #1.", dmp.diff_halfMatch( "1234567890", "abcdef" ) ); - assertEmpty("diff_halfMatch: No match #2.", dmp.diff_halfMatch("12345", "23")); + assertEmpty( "diff_halfMatch: No match #2.", dmp.diff_halfMatch( "12345", "23" ) ); - assertEquals("diff_halfMatch: Single Match #1.", QString("12,90,a,z,345678").split(","), dmp.diff_halfMatch("1234567890", "a345678z")); + assertEquals( "diff_halfMatch: Single Match #1.", QString( "12,90,a,z,345678" ).split( "," ), dmp.diff_halfMatch( "1234567890", "a345678z" ) ); - assertEquals("diff_halfMatch: Single Match #2.", QString("a,z,12,90,345678").split(","), dmp.diff_halfMatch("a345678z", "1234567890")); + assertEquals( "diff_halfMatch: Single Match #2.", QString( "a,z,12,90,345678" ).split( "," ), dmp.diff_halfMatch( "a345678z", "1234567890" ) ); - assertEquals("diff_halfMatch: Single Match #3.", QString("abc,z,1234,0,56789").split(","), dmp.diff_halfMatch("abc56789z", "1234567890")); + assertEquals( "diff_halfMatch: Single Match #3.", QString( "abc,z,1234,0,56789" ).split( "," ), dmp.diff_halfMatch( "abc56789z", "1234567890" ) ); - assertEquals("diff_halfMatch: Single Match #4.", QString("a,xyz,1,7890,23456").split(","), dmp.diff_halfMatch("a23456xyz", "1234567890")); + assertEquals( "diff_halfMatch: Single Match #4.", QString( "a,xyz,1,7890,23456" ).split( "," ), dmp.diff_halfMatch( "a23456xyz", "1234567890" ) ); - assertEquals("diff_halfMatch: Multiple Matches #1.", QString("12123,123121,a,z,1234123451234").split(","), dmp.diff_halfMatch("121231234123451234123121", "a1234123451234z")); + assertEquals( "diff_halfMatch: Multiple Matches #1.", QString( "12123,123121,a,z,1234123451234" ).split( "," ), dmp.diff_halfMatch( "121231234123451234123121", "a1234123451234z" ) ); - assertEquals("diff_halfMatch: Multiple Matches #2.", QString(",-=-=-=-=-=,x,,x-=-=-=-=-=-=-=").split(","), dmp.diff_halfMatch("x-=-=-=-=-=-=-=-=-=-=-=-=", "xx-=-=-=-=-=-=-=")); + assertEquals( "diff_halfMatch: Multiple Matches #2.", QString( ",-=-=-=-=-=,x,,x-=-=-=-=-=-=-=" ).split( "," ), dmp.diff_halfMatch( "x-=-=-=-=-=-=-=-=-=-=-=-=", "xx-=-=-=-=-=-=-=" ) ); - assertEquals("diff_halfMatch: Multiple Matches #3.", QString("-=-=-=-=-=,,,y,-=-=-=-=-=-=-=y").split(","), dmp.diff_halfMatch("-=-=-=-=-=-=-=-=-=-=-=-=y", "-=-=-=-=-=-=-=yy")); + assertEquals( "diff_halfMatch: Multiple Matches #3.", QString( "-=-=-=-=-=,,,y,-=-=-=-=-=-=-=y" ).split( "," ), dmp.diff_halfMatch( "-=-=-=-=-=-=-=-=-=-=-=-=y", "-=-=-=-=-=-=-=yy" ) ); - // Optimal diff would be -q+x=H-i+e=lloHe+Hu=llo-Hew+y not -qHillo+x=HelloHe-w+Hulloy - assertEquals("diff_halfMatch: Non-optimal halfmatch.", QString("qHillo,w,x,Hulloy,HelloHe").split(","), dmp.diff_halfMatch("qHilloHelloHew", "xHelloHeHulloy")); + // Optimal diff would be -q+x=H-i+e=lloHe+Hu=llo-Hew+y not -qHillo+x=HelloHe-w+Hulloy + assertEquals( "diff_halfMatch: Non-optimal halfmatch.", QString( "qHillo,w,x,Hulloy,HelloHe" ).split( "," ), dmp.diff_halfMatch( "qHilloHelloHew", "xHelloHeHulloy" ) ); - dmp.Diff_Timeout = 0; - assertEmpty("diff_halfMatch: Optimal no halfmatch.", dmp.diff_halfMatch("qHilloHelloHew", "xHelloHeHulloy")); + dmp.Diff_Timeout = 0; + assertEmpty( "diff_halfMatch: Optimal no halfmatch.", dmp.diff_halfMatch( "qHilloHelloHew", "xHelloHeHulloy" ) ); } -void diff_match_patch_test::testDiffLinesToChars() { - // Convert lines down to characters. - QStringList tmpVector; - QList tmpVarList; - tmpVector.append(""); - tmpVector.append("alpha\n"); - tmpVector.append("beta\n"); - tmpVarList << QVariant::fromValue(QString() + QChar((ushort)1) + QChar((ushort)2) + QChar((ushort)1)); //(("\u0001\u0002\u0001")); - tmpVarList << QVariant::fromValue(QString() + QChar((ushort)2) + QChar((ushort)1) + QChar((ushort)2)); // (("\u0002\u0001\u0002")); - tmpVarList << QVariant::fromValue(tmpVector); - assertEquals("diff_linesToChars:", tmpVarList, dmp.diff_linesToChars("alpha\nbeta\nalpha\n", "beta\nalpha\nbeta\n")); - - tmpVector.clear(); - tmpVarList.clear(); - tmpVector.append(""); - tmpVector.append("alpha\r\n"); - tmpVector.append("beta\r\n"); - tmpVector.append("\r\n"); - tmpVarList << QVariant::fromValue(QString("")); - tmpVarList << QVariant::fromValue(QString() + QChar((ushort)1) + QChar((ushort)2) + QChar((ushort)3) + QChar((ushort)3)); // (("\u0001\u0002\u0003\u0003")); - tmpVarList << QVariant::fromValue(tmpVector); - assertEquals("diff_linesToChars:", tmpVarList, dmp.diff_linesToChars("", "alpha\r\nbeta\r\n\r\n\r\n")); - - tmpVector.clear(); - tmpVarList.clear(); - tmpVector.append(""); - tmpVector.append("a"); - tmpVector.append("b"); - tmpVarList << QVariant::fromValue(QString() + QChar((ushort)1)); // (("\u0001")); - tmpVarList << QVariant::fromValue(QString() + QChar((ushort)2)); // (("\u0002")); - tmpVarList << QVariant::fromValue(tmpVector); - assertEquals("diff_linesToChars:", tmpVarList, dmp.diff_linesToChars("a", "b")); - - // More than 256 to reveal any 8-bit limitations. - int n = 300; - tmpVector.clear(); - tmpVarList.clear(); - QString lines; - QString chars; - for (int x = 1; x < n + 1; x++) { - tmpVector.append(QString::number(x) + "\n"); - lines += QString::number(x) + "\n"; - chars += QChar(static_cast(x)); - } - assertEquals("diff_linesToChars: More than 256 (setup).", n, tmpVector.size()); - assertEquals("diff_linesToChars: More than 256 (setup).", n, chars.length()); - tmpVector.prepend(""); - tmpVarList << QVariant::fromValue(chars); - tmpVarList << QVariant::fromValue(QString("")); - tmpVarList << QVariant::fromValue(tmpVector); - assertEquals("diff_linesToChars: More than 256.", tmpVarList, dmp.diff_linesToChars(lines, "")); +void diff_match_patch_test::testDiffLinesToChars() +{ + // Convert lines down to characters. + QStringList tmpVector; + QList< QVariant > tmpVarList; + tmpVector.append( "" ); + tmpVector.append( "alpha\n" ); + tmpVector.append( "beta\n" ); + tmpVarList << QVariant::fromValue( QString() + QChar( (ushort)1 ) + QChar( (ushort)2 ) + QChar( (ushort)1 ) ); //(("\u0001\u0002\u0001")); + tmpVarList << QVariant::fromValue( QString() + QChar( (ushort)2 ) + QChar( (ushort)1 ) + QChar( (ushort)2 ) ); // (("\u0002\u0001\u0002")); + tmpVarList << QVariant::fromValue( tmpVector ); + assertEquals( "diff_linesToChars:", tmpVarList, dmp.diff_linesToChars( "alpha\nbeta\nalpha\n", "beta\nalpha\nbeta\n" ) ); + + tmpVector.clear(); + tmpVarList.clear(); + tmpVector.append( "" ); + tmpVector.append( "alpha\r\n" ); + tmpVector.append( "beta\r\n" ); + tmpVector.append( "\r\n" ); + tmpVarList << QVariant::fromValue( QString( "" ) ); + tmpVarList << QVariant::fromValue( QString() + QChar( (ushort)1 ) + QChar( (ushort)2 ) + QChar( (ushort)3 ) + QChar( (ushort)3 ) ); // (("\u0001\u0002\u0003\u0003")); + tmpVarList << QVariant::fromValue( tmpVector ); + assertEquals( "diff_linesToChars:", tmpVarList, dmp.diff_linesToChars( "", "alpha\r\nbeta\r\n\r\n\r\n" ) ); + + tmpVector.clear(); + tmpVarList.clear(); + tmpVector.append( "" ); + tmpVector.append( "a" ); + tmpVector.append( "b" ); + tmpVarList << QVariant::fromValue( QString() + QChar( (ushort)1 ) ); // (("\u0001")); + tmpVarList << QVariant::fromValue( QString() + QChar( (ushort)2 ) ); // (("\u0002")); + tmpVarList << QVariant::fromValue( tmpVector ); + assertEquals( "diff_linesToChars:", tmpVarList, dmp.diff_linesToChars( "a", "b" ) ); + + // More than 256 to reveal any 8-bit limitations. + int n = 300; + tmpVector.clear(); + tmpVarList.clear(); + QString lines; + QString chars; + for ( int x = 1; x < n + 1; x++ ) + { + tmpVector.append( QString::number( x ) + "\n" ); + lines += QString::number( x ) + "\n"; + chars += QChar( static_cast< ushort >( x ) ); + } + assertEquals( "diff_linesToChars: More than 256 (setup).", n, tmpVector.size() ); + assertEquals( "diff_linesToChars: More than 256 (setup).", n, chars.length() ); + tmpVector.prepend( "" ); + tmpVarList << QVariant::fromValue( chars ); + tmpVarList << QVariant::fromValue( QString( "" ) ); + tmpVarList << QVariant::fromValue( tmpVector ); + assertEquals( "diff_linesToChars: More than 256.", tmpVarList, dmp.diff_linesToChars( lines, "" ) ); } -void diff_match_patch_test::testDiffCharsToLines() { - // First check that Diff equality works. - assertTrue("diff_charsToLines:", Diff(EQUAL, "a") == Diff(EQUAL, "a")); - - assertEquals("diff_charsToLines:", Diff(EQUAL, "a"), Diff(EQUAL, "a")); - - // Convert chars up to lines. - QList diffs; - diffs << Diff(EQUAL, QString() + QChar((ushort)1) + QChar((ushort)2) + QChar((ushort)1)); // ("\u0001\u0002\u0001"); - diffs << Diff(INSERT, QString() + QChar((ushort)2) + QChar((ushort)1) + QChar((ushort)2)); // ("\u0002\u0001\u0002"); - QStringList tmpVector; - tmpVector.append(""); - tmpVector.append("alpha\n"); - tmpVector.append("beta\n"); - dmp.diff_charsToLines(diffs, tmpVector); - assertEquals("diff_charsToLines:", diffList(Diff(EQUAL, "alpha\nbeta\nalpha\n"), Diff(INSERT, "beta\nalpha\nbeta\n")), diffs); - - // More than 256 to reveal any 8-bit limitations. - int n = 300; - tmpVector.clear(); - QList tmpVarList; - QString lines; - QString chars; - for (int x = 1; x < n + 1; x++) { - tmpVector.append(QString::number(x) + "\n"); - lines += QString::number(x) + "\n"; - chars += QChar(static_cast(x)); - } - assertEquals("diff_linesToChars: More than 256 (setup).", n, tmpVector.size()); - assertEquals("diff_linesToChars: More than 256 (setup).", n, chars.length()); - tmpVector.prepend(""); - diffs = diffList(Diff(DELETE, chars)); - dmp.diff_charsToLines(diffs, tmpVector); - assertEquals("diff_charsToLines: More than 256.", diffList(Diff(DELETE, lines)), diffs); +void diff_match_patch_test::testDiffCharsToLines() +{ + // First check that Diff equality works. + assertTrue( "diff_charsToLines:", Diff( EQUAL, "a" ) == Diff( EQUAL, "a" ) ); + + assertEquals( "diff_charsToLines:", Diff( EQUAL, "a" ), Diff( EQUAL, "a" ) ); + + // Convert chars up to lines. + QList< Diff > diffs; + diffs << Diff( EQUAL, QString() + QChar( (ushort)1 ) + QChar( (ushort)2 ) + QChar( (ushort)1 ) ); // ("\u0001\u0002\u0001"); + diffs << Diff( INSERT, QString() + QChar( (ushort)2 ) + QChar( (ushort)1 ) + QChar( (ushort)2 ) ); // ("\u0002\u0001\u0002"); + QStringList tmpVector; + tmpVector.append( "" ); + tmpVector.append( "alpha\n" ); + tmpVector.append( "beta\n" ); + dmp.diff_charsToLines( diffs, tmpVector ); + assertEquals( "diff_charsToLines:", diffList( Diff( EQUAL, "alpha\nbeta\nalpha\n" ), Diff( INSERT, "beta\nalpha\nbeta\n" ) ), diffs ); + + // More than 256 to reveal any 8-bit limitations. + int n = 300; + tmpVector.clear(); + QList< QVariant > tmpVarList; + QString lines; + QString chars; + for ( int x = 1; x < n + 1; x++ ) + { + tmpVector.append( QString::number( x ) + "\n" ); + lines += QString::number( x ) + "\n"; + chars += QChar( static_cast< ushort >( x ) ); + } + assertEquals( "diff_linesToChars: More than 256 (setup).", n, tmpVector.size() ); + assertEquals( "diff_linesToChars: More than 256 (setup).", n, chars.length() ); + tmpVector.prepend( "" ); + diffs = diffList( Diff( DELETE, chars ) ); + dmp.diff_charsToLines( diffs, tmpVector ); + assertEquals( "diff_charsToLines: More than 256.", diffList( Diff( DELETE, lines ) ), diffs ); } -void diff_match_patch_test::testDiffCleanupMerge() { - // Cleanup a messy diff. - QList diffs; - dmp.diff_cleanupMerge(diffs); - assertEquals("diff_cleanupMerge: Null case.", diffList(), diffs); +void diff_match_patch_test::testDiffCleanupMerge() +{ + // Cleanup a messy diff. + QList< Diff > diffs; + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Null case.", diffList(), diffs ); - diffs = diffList(Diff(EQUAL, "a"), Diff(DELETE, "b"), Diff(INSERT, "c")); - dmp.diff_cleanupMerge(diffs); - assertEquals("diff_cleanupMerge: No change case.", diffList(Diff(EQUAL, "a"), Diff(DELETE, "b"), Diff(INSERT, "c")), diffs); + diffs = diffList( Diff( EQUAL, "a" ), Diff( DELETE, "b" ), Diff( INSERT, "c" ) ); + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: No change case.", diffList( Diff( EQUAL, "a" ), Diff( DELETE, "b" ), Diff( INSERT, "c" ) ), diffs ); - diffs = diffList(Diff(EQUAL, "a"), Diff(EQUAL, "b"), Diff(EQUAL, "c")); - dmp.diff_cleanupMerge(diffs); - assertEquals("diff_cleanupMerge: Merge equalities.", diffList(Diff(EQUAL, "abc")), diffs); + diffs = diffList( Diff( EQUAL, "a" ), Diff( EQUAL, "b" ), Diff( EQUAL, "c" ) ); + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Merge equalities.", diffList( Diff( EQUAL, "abc" ) ), diffs ); - diffs = diffList(Diff(DELETE, "a"), Diff(DELETE, "b"), Diff(DELETE, "c")); - dmp.diff_cleanupMerge(diffs); - assertEquals("diff_cleanupMerge: Merge deletions.", diffList(Diff(DELETE, "abc")), diffs); + diffs = diffList( Diff( DELETE, "a" ), Diff( DELETE, "b" ), Diff( DELETE, "c" ) ); + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Merge deletions.", diffList( Diff( DELETE, "abc" ) ), diffs ); - diffs = diffList(Diff(INSERT, "a"), Diff(INSERT, "b"), Diff(INSERT, "c")); - dmp.diff_cleanupMerge(diffs); - assertEquals("diff_cleanupMerge: Merge insertions.", diffList(Diff(INSERT, "abc")), diffs); + diffs = diffList( Diff( INSERT, "a" ), Diff( INSERT, "b" ), Diff( INSERT, "c" ) ); + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Merge insertions.", diffList( Diff( INSERT, "abc" ) ), diffs ); - diffs = diffList(Diff(DELETE, "a"), Diff(INSERT, "b"), Diff(DELETE, "c"), Diff(INSERT, "d"), Diff(EQUAL, "e"), Diff(EQUAL, "f")); - dmp.diff_cleanupMerge(diffs); - assertEquals("diff_cleanupMerge: Merge interweave.", diffList(Diff(DELETE, "ac"), Diff(INSERT, "bd"), Diff(EQUAL, "ef")), diffs); + diffs = diffList( Diff( DELETE, "a" ), Diff( INSERT, "b" ), Diff( DELETE, "c" ), Diff( INSERT, "d" ), Diff( EQUAL, "e" ), Diff( EQUAL, "f" ) ); + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Merge interweave.", diffList( Diff( DELETE, "ac" ), Diff( INSERT, "bd" ), Diff( EQUAL, "ef" ) ), diffs ); - diffs = diffList(Diff(DELETE, "a"), Diff(INSERT, "abc"), Diff(DELETE, "dc")); - dmp.diff_cleanupMerge(diffs); - assertEquals("diff_cleanupMerge: Prefix and suffix detection.", diffList(Diff(EQUAL, "a"), Diff(DELETE, "d"), Diff(INSERT, "b"), Diff(EQUAL, "c")), diffs); + diffs = diffList( Diff( DELETE, "a" ), Diff( INSERT, "abc" ), Diff( DELETE, "dc" ) ); + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Prefix and suffix detection.", diffList( Diff( EQUAL, "a" ), Diff( DELETE, "d" ), Diff( INSERT, "b" ), Diff( EQUAL, "c" ) ), diffs ); - diffs = diffList(Diff(EQUAL, "x"), Diff(DELETE, "a"), Diff(INSERT, "abc"), Diff(DELETE, "dc"), Diff(EQUAL, "y")); - dmp.diff_cleanupMerge(diffs); - assertEquals("diff_cleanupMerge: Prefix and suffix detection with equalities.", diffList(Diff(EQUAL, "xa"), Diff(DELETE, "d"), Diff(INSERT, "b"), Diff(EQUAL, "cy")), diffs); + diffs = diffList( Diff( EQUAL, "x" ), Diff( DELETE, "a" ), Diff( INSERT, "abc" ), Diff( DELETE, "dc" ), Diff( EQUAL, "y" ) ); + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Prefix and suffix detection with equalities.", diffList( Diff( EQUAL, "xa" ), Diff( DELETE, "d" ), Diff( INSERT, "b" ), Diff( EQUAL, "cy" ) ), diffs ); - diffs = diffList(Diff(EQUAL, "a"), Diff(INSERT, "ba"), Diff(EQUAL, "c")); - dmp.diff_cleanupMerge(diffs); - assertEquals("diff_cleanupMerge: Slide edit left.", diffList(Diff(INSERT, "ab"), Diff(EQUAL, "ac")), diffs); + diffs = diffList( Diff( EQUAL, "a" ), Diff( INSERT, "ba" ), Diff( EQUAL, "c" ) ); + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Slide edit left.", diffList( Diff( INSERT, "ab" ), Diff( EQUAL, "ac" ) ), diffs ); - diffs = diffList(Diff(EQUAL, "c"), Diff(INSERT, "ab"), Diff(EQUAL, "a")); - dmp.diff_cleanupMerge(diffs); - assertEquals("diff_cleanupMerge: Slide edit right.", diffList(Diff(EQUAL, "ca"), Diff(INSERT, "ba")), diffs); + diffs = diffList( Diff( EQUAL, "c" ), Diff( INSERT, "ab" ), Diff( EQUAL, "a" ) ); + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Slide edit right.", diffList( Diff( EQUAL, "ca" ), Diff( INSERT, "ba" ) ), diffs ); - diffs = diffList(Diff(EQUAL, "a"), Diff(DELETE, "b"), Diff(EQUAL, "c"), Diff(DELETE, "ac"), Diff(EQUAL, "x")); - dmp.diff_cleanupMerge(diffs); - assertEquals("diff_cleanupMerge: Slide edit left recursive.", diffList(Diff(DELETE, "abc"), Diff(EQUAL, "acx")), diffs); + diffs = diffList( Diff( EQUAL, "a" ), Diff( DELETE, "b" ), Diff( EQUAL, "c" ), Diff( DELETE, "ac" ), Diff( EQUAL, "x" ) ); + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Slide edit left recursive.", diffList( Diff( DELETE, "abc" ), Diff( EQUAL, "acx" ) ), diffs ); - diffs = diffList(Diff(EQUAL, "x"), Diff(DELETE, "ca"), Diff(EQUAL, "c"), Diff(DELETE, "b"), Diff(EQUAL, "a")); - dmp.diff_cleanupMerge(diffs); - assertEquals("diff_cleanupMerge: Slide edit right recursive.", diffList(Diff(EQUAL, "xca"), Diff(DELETE, "cba")), diffs); + diffs = diffList( Diff( EQUAL, "x" ), Diff( DELETE, "ca" ), Diff( EQUAL, "c" ), Diff( DELETE, "b" ), Diff( EQUAL, "a" ) ); + dmp.diff_cleanupMerge( diffs ); + assertEquals( "diff_cleanupMerge: Slide edit right recursive.", diffList( Diff( EQUAL, "xca" ), Diff( DELETE, "cba" ) ), diffs ); } -void diff_match_patch_test::testDiffCleanupSemanticLossless() { - // Slide diffs to match logical boundaries. - QList diffs = diffList(); - dmp.diff_cleanupSemanticLossless(diffs); - assertEquals("diff_cleanupSemantic: Null case.", diffList(), diffs); +void diff_match_patch_test::testDiffCleanupSemanticLossless() +{ + // Slide diffs to match logical boundaries. + QList< Diff > diffs = diffList(); + dmp.diff_cleanupSemanticLossless( diffs ); + assertEquals( "diff_cleanupSemantic: Null case.", diffList(), diffs ); - diffs = diffList(Diff(EQUAL, "AAA\r\n\r\nBBB"), Diff(INSERT, "\r\nDDD\r\n\r\nBBB"), Diff(EQUAL, "\r\nEEE")); - dmp.diff_cleanupSemanticLossless(diffs); - assertEquals("diff_cleanupSemanticLossless: Blank lines.", diffList(Diff(EQUAL, "AAA\r\n\r\n"), Diff(INSERT, "BBB\r\nDDD\r\n\r\n"), Diff(EQUAL, "BBB\r\nEEE")), diffs); + diffs = diffList( Diff( EQUAL, "AAA\r\n\r\nBBB" ), Diff( INSERT, "\r\nDDD\r\n\r\nBBB" ), Diff( EQUAL, "\r\nEEE" ) ); + dmp.diff_cleanupSemanticLossless( diffs ); + assertEquals( "diff_cleanupSemanticLossless: Blank lines.", diffList( Diff( EQUAL, "AAA\r\n\r\n" ), Diff( INSERT, "BBB\r\nDDD\r\n\r\n" ), Diff( EQUAL, "BBB\r\nEEE" ) ), diffs ); - diffs = diffList(Diff(EQUAL, "AAA\r\nBBB"), Diff(INSERT, " DDD\r\nBBB"), Diff(EQUAL, " EEE")); - dmp.diff_cleanupSemanticLossless(diffs); - assertEquals("diff_cleanupSemanticLossless: Line boundaries.", diffList(Diff(EQUAL, "AAA\r\n"), Diff(INSERT, "BBB DDD\r\n"), Diff(EQUAL, "BBB EEE")), diffs); + diffs = diffList( Diff( EQUAL, "AAA\r\nBBB" ), Diff( INSERT, " DDD\r\nBBB" ), Diff( EQUAL, " EEE" ) ); + dmp.diff_cleanupSemanticLossless( diffs ); + assertEquals( "diff_cleanupSemanticLossless: Line boundaries.", diffList( Diff( EQUAL, "AAA\r\n" ), Diff( INSERT, "BBB DDD\r\n" ), Diff( EQUAL, "BBB EEE" ) ), diffs ); - diffs = diffList(Diff(EQUAL, "The c"), Diff(INSERT, "ow and the c"), Diff(EQUAL, "at.")); - dmp.diff_cleanupSemanticLossless(diffs); - assertEquals("diff_cleanupSemantic: Word boundaries.", diffList(Diff(EQUAL, "The "), Diff(INSERT, "cow and the "), Diff(EQUAL, "cat.")), diffs); + diffs = diffList( Diff( EQUAL, "The c" ), Diff( INSERT, "ow and the c" ), Diff( EQUAL, "at." ) ); + dmp.diff_cleanupSemanticLossless( diffs ); + assertEquals( "diff_cleanupSemantic: Word boundaries.", diffList( Diff( EQUAL, "The " ), Diff( INSERT, "cow and the " ), Diff( EQUAL, "cat." ) ), diffs ); - diffs = diffList(Diff(EQUAL, "The-c"), Diff(INSERT, "ow-and-the-c"), Diff(EQUAL, "at.")); - dmp.diff_cleanupSemanticLossless(diffs); - assertEquals("diff_cleanupSemantic: Alphanumeric boundaries.", diffList(Diff(EQUAL, "The-"), Diff(INSERT, "cow-and-the-"), Diff(EQUAL, "cat.")), diffs); + diffs = diffList( Diff( EQUAL, "The-c" ), Diff( INSERT, "ow-and-the-c" ), Diff( EQUAL, "at." ) ); + dmp.diff_cleanupSemanticLossless( diffs ); + assertEquals( "diff_cleanupSemantic: Alphanumeric boundaries.", diffList( Diff( EQUAL, "The-" ), Diff( INSERT, "cow-and-the-" ), Diff( EQUAL, "cat." ) ), diffs ); - diffs = diffList(Diff(EQUAL, "a"), Diff(DELETE, "a"), Diff(EQUAL, "ax")); - dmp.diff_cleanupSemanticLossless(diffs); - assertEquals("diff_cleanupSemantic: Hitting the start.", diffList(Diff(DELETE, "a"), Diff(EQUAL, "aax")), diffs); + diffs = diffList( Diff( EQUAL, "a" ), Diff( DELETE, "a" ), Diff( EQUAL, "ax" ) ); + dmp.diff_cleanupSemanticLossless( diffs ); + assertEquals( "diff_cleanupSemantic: Hitting the start.", diffList( Diff( DELETE, "a" ), Diff( EQUAL, "aax" ) ), diffs ); - diffs = diffList(Diff(EQUAL, "xa"), Diff(DELETE, "a"), Diff(EQUAL, "a")); - dmp.diff_cleanupSemanticLossless(diffs); - assertEquals("diff_cleanupSemantic: Hitting the end.", diffList(Diff(EQUAL, "xaa"), Diff(DELETE, "a")), diffs); + diffs = diffList( Diff( EQUAL, "xa" ), Diff( DELETE, "a" ), Diff( EQUAL, "a" ) ); + dmp.diff_cleanupSemanticLossless( diffs ); + assertEquals( "diff_cleanupSemantic: Hitting the end.", diffList( Diff( EQUAL, "xaa" ), Diff( DELETE, "a" ) ), diffs ); - diffs = diffList(Diff(EQUAL, "The xxx. The "), Diff(INSERT, "zzz. The "), Diff(EQUAL, "yyy.")); - dmp.diff_cleanupSemanticLossless(diffs); - assertEquals("diff_cleanupSemantic: Sentence boundaries.", diffList(Diff(EQUAL, "The xxx."), Diff(INSERT, " The zzz."), Diff(EQUAL, " The yyy.")), diffs); + diffs = diffList( Diff( EQUAL, "The xxx. The " ), Diff( INSERT, "zzz. The " ), Diff( EQUAL, "yyy." ) ); + dmp.diff_cleanupSemanticLossless( diffs ); + assertEquals( "diff_cleanupSemantic: Sentence boundaries.", diffList( Diff( EQUAL, "The xxx." ), Diff( INSERT, " The zzz." ), Diff( EQUAL, " The yyy." ) ), diffs ); } -void diff_match_patch_test::testDiffCleanupSemantic() { - // Cleanup semantically trivial equalities. - QList diffs = diffList(); - dmp.diff_cleanupSemantic(diffs); - assertEquals("diff_cleanupSemantic: Null case.", diffList(), diffs); +void diff_match_patch_test::testDiffCleanupSemantic() +{ + // Cleanup semantically trivial equalities. + QList< Diff > diffs = diffList(); + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: Null case.", diffList(), diffs ); - diffs = diffList(Diff(DELETE, "ab"), Diff(INSERT, "cd"), Diff(EQUAL, "12"), Diff(DELETE, "e")); - dmp.diff_cleanupSemantic(diffs); - assertEquals("diff_cleanupSemantic: No elimination #1.", diffList(Diff(DELETE, "ab"), Diff(INSERT, "cd"), Diff(EQUAL, "12"), Diff(DELETE, "e")), diffs); + diffs = diffList( Diff( DELETE, "ab" ), Diff( INSERT, "cd" ), Diff( EQUAL, "12" ), Diff( DELETE, "e" ) ); + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: No elimination #1.", diffList( Diff( DELETE, "ab" ), Diff( INSERT, "cd" ), Diff( EQUAL, "12" ), Diff( DELETE, "e" ) ), diffs ); - diffs = diffList(Diff(DELETE, "abc"), Diff(INSERT, "ABC"), Diff(EQUAL, "1234"), Diff(DELETE, "wxyz")); - dmp.diff_cleanupSemantic(diffs); - assertEquals("diff_cleanupSemantic: No elimination #2.", diffList(Diff(DELETE, "abc"), Diff(INSERT, "ABC"), Diff(EQUAL, "1234"), Diff(DELETE, "wxyz")), diffs); + diffs = diffList( Diff( DELETE, "abc" ), Diff( INSERT, "ABC" ), Diff( EQUAL, "1234" ), Diff( DELETE, "wxyz" ) ); + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: No elimination #2.", diffList( Diff( DELETE, "abc" ), Diff( INSERT, "ABC" ), Diff( EQUAL, "1234" ), Diff( DELETE, "wxyz" ) ), diffs ); - diffs = diffList(Diff(DELETE, "a"), Diff(EQUAL, "b"), Diff(DELETE, "c")); - dmp.diff_cleanupSemantic(diffs); - assertEquals("diff_cleanupSemantic: Simple elimination.", diffList(Diff(DELETE, "abc"), Diff(INSERT, "b")), diffs); + diffs = diffList( Diff( DELETE, "a" ), Diff( EQUAL, "b" ), Diff( DELETE, "c" ) ); + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: Simple elimination.", diffList( Diff( DELETE, "abc" ), Diff( INSERT, "b" ) ), diffs ); - diffs = diffList(Diff(DELETE, "ab"), Diff(EQUAL, "cd"), Diff(DELETE, "e"), Diff(EQUAL, "f"), Diff(INSERT, "g")); - dmp.diff_cleanupSemantic(diffs); - assertEquals("diff_cleanupSemantic: Backpass elimination.", diffList(Diff(DELETE, "abcdef"), Diff(INSERT, "cdfg")), diffs); + diffs = diffList( Diff( DELETE, "ab" ), Diff( EQUAL, "cd" ), Diff( DELETE, "e" ), Diff( EQUAL, "f" ), Diff( INSERT, "g" ) ); + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: Backpass elimination.", diffList( Diff( DELETE, "abcdef" ), Diff( INSERT, "cdfg" ) ), diffs ); - diffs = diffList(Diff(INSERT, "1"), Diff(EQUAL, "A"), Diff(DELETE, "B"), Diff(INSERT, "2"), Diff(EQUAL, "_"), Diff(INSERT, "1"), Diff(EQUAL, "A"), Diff(DELETE, "B"), Diff(INSERT, "2")); - dmp.diff_cleanupSemantic(diffs); - assertEquals("diff_cleanupSemantic: Multiple elimination.", diffList(Diff(DELETE, "AB_AB"), Diff(INSERT, "1A2_1A2")), diffs); + diffs = diffList( Diff( INSERT, "1" ), Diff( EQUAL, "A" ), Diff( DELETE, "B" ), Diff( INSERT, "2" ), Diff( EQUAL, "_" ), Diff( INSERT, "1" ), Diff( EQUAL, "A" ), Diff( DELETE, "B" ), Diff( INSERT, "2" ) ); + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: Multiple elimination.", diffList( Diff( DELETE, "AB_AB" ), Diff( INSERT, "1A2_1A2" ) ), diffs ); - diffs = diffList(Diff(EQUAL, "The c"), Diff(DELETE, "ow and the c"), Diff(EQUAL, "at.")); - dmp.diff_cleanupSemantic(diffs); - assertEquals("diff_cleanupSemantic: Word boundaries.", diffList(Diff(EQUAL, "The "), Diff(DELETE, "cow and the "), Diff(EQUAL, "cat.")), diffs); + diffs = diffList( Diff( EQUAL, "The c" ), Diff( DELETE, "ow and the c" ), Diff( EQUAL, "at." ) ); + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: Word boundaries.", diffList( Diff( EQUAL, "The " ), Diff( DELETE, "cow and the " ), Diff( EQUAL, "cat." ) ), diffs ); - diffs = diffList(Diff(DELETE, "abcxx"), Diff(INSERT, "xxdef")); - dmp.diff_cleanupSemantic(diffs); - assertEquals("diff_cleanupSemantic: No overlap elimination.", diffList(Diff(DELETE, "abcxx"), Diff(INSERT, "xxdef")), diffs); + diffs = diffList( Diff( DELETE, "abcxx" ), Diff( INSERT, "xxdef" ) ); + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: No overlap elimination.", diffList( Diff( DELETE, "abcxx" ), Diff( INSERT, "xxdef" ) ), diffs ); - diffs = diffList(Diff(DELETE, "abcxxx"), Diff(INSERT, "xxxdef")); - dmp.diff_cleanupSemantic(diffs); - assertEquals("diff_cleanupSemantic: Overlap elimination.", diffList(Diff(DELETE, "abc"), Diff(EQUAL, "xxx"), Diff(INSERT, "def")), diffs); + diffs = diffList( Diff( DELETE, "abcxxx" ), Diff( INSERT, "xxxdef" ) ); + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: Overlap elimination.", diffList( Diff( DELETE, "abc" ), Diff( EQUAL, "xxx" ), Diff( INSERT, "def" ) ), diffs ); - diffs = diffList(Diff(DELETE, "xxxabc"), Diff(INSERT, "defxxx")); - dmp.diff_cleanupSemantic(diffs); - assertEquals("diff_cleanupSemantic: Reverse overlap elimination.", diffList(Diff(INSERT, "def"), Diff(EQUAL, "xxx"), Diff(DELETE, "abc")), diffs); + diffs = diffList( Diff( DELETE, "xxxabc" ), Diff( INSERT, "defxxx" ) ); + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: Reverse overlap elimination.", diffList( Diff( INSERT, "def" ), Diff( EQUAL, "xxx" ), Diff( DELETE, "abc" ) ), diffs ); - diffs = diffList(Diff(DELETE, "abcd1212"), Diff(INSERT, "1212efghi"), Diff(EQUAL, "----"), Diff(DELETE, "A3"), Diff(INSERT, "3BC")); - dmp.diff_cleanupSemantic(diffs); - assertEquals("diff_cleanupSemantic: Two overlap eliminations.", diffList(Diff(DELETE, "abcd"), Diff(EQUAL, "1212"), Diff(INSERT, "efghi"), Diff(EQUAL, "----"), Diff(DELETE, "A"), Diff(EQUAL, "3"), Diff(INSERT, "BC")), diffs); + diffs = diffList( Diff( DELETE, "abcd1212" ), Diff( INSERT, "1212efghi" ), Diff( EQUAL, "----" ), Diff( DELETE, "A3" ), Diff( INSERT, "3BC" ) ); + dmp.diff_cleanupSemantic( diffs ); + assertEquals( "diff_cleanupSemantic: Two overlap eliminations.", diffList( Diff( DELETE, "abcd" ), Diff( EQUAL, "1212" ), Diff( INSERT, "efghi" ), Diff( EQUAL, "----" ), Diff( DELETE, "A" ), Diff( EQUAL, "3" ), Diff( INSERT, "BC" ) ), diffs ); } -void diff_match_patch_test::testDiffCleanupEfficiency() { - // Cleanup operationally trivial equalities. - dmp.Diff_EditCost = 4; - QList diffs = diffList(); - dmp.diff_cleanupEfficiency(diffs); - assertEquals("diff_cleanupEfficiency: Null case.", diffList(), diffs); - - diffs = diffList(Diff(DELETE, "ab"), Diff(INSERT, "12"), Diff(EQUAL, "wxyz"), Diff(DELETE, "cd"), Diff(INSERT, "34")); - dmp.diff_cleanupEfficiency(diffs); - assertEquals("diff_cleanupEfficiency: No elimination.", diffList(Diff(DELETE, "ab"), Diff(INSERT, "12"), Diff(EQUAL, "wxyz"), Diff(DELETE, "cd"), Diff(INSERT, "34")), diffs); - - diffs = diffList(Diff(DELETE, "ab"), Diff(INSERT, "12"), Diff(EQUAL, "xyz"), Diff(DELETE, "cd"), Diff(INSERT, "34")); - dmp.diff_cleanupEfficiency(diffs); - assertEquals("diff_cleanupEfficiency: Four-edit elimination.", diffList(Diff(DELETE, "abxyzcd"), Diff(INSERT, "12xyz34")), diffs); - - diffs = diffList(Diff(INSERT, "12"), Diff(EQUAL, "x"), Diff(DELETE, "cd"), Diff(INSERT, "34")); - dmp.diff_cleanupEfficiency(diffs); - assertEquals("diff_cleanupEfficiency: Three-edit elimination.", diffList(Diff(DELETE, "xcd"), Diff(INSERT, "12x34")), diffs); - - diffs = diffList(Diff(DELETE, "ab"), Diff(INSERT, "12"), Diff(EQUAL, "xy"), Diff(INSERT, "34"), Diff(EQUAL, "z"), Diff(DELETE, "cd"), Diff(INSERT, "56")); - dmp.diff_cleanupEfficiency(diffs); - assertEquals("diff_cleanupEfficiency: Backpass elimination.", diffList(Diff(DELETE, "abxyzcd"), Diff(INSERT, "12xy34z56")), diffs); - - dmp.Diff_EditCost = 5; - diffs = diffList(Diff(DELETE, "ab"), Diff(INSERT, "12"), Diff(EQUAL, "wxyz"), Diff(DELETE, "cd"), Diff(INSERT, "34")); - dmp.diff_cleanupEfficiency(diffs); - assertEquals("diff_cleanupEfficiency: High cost elimination.", diffList(Diff(DELETE, "abwxyzcd"), Diff(INSERT, "12wxyz34")), diffs); - dmp.Diff_EditCost = 4; +void diff_match_patch_test::testDiffCleanupEfficiency() +{ + // Cleanup operationally trivial equalities. + dmp.Diff_EditCost = 4; + QList< Diff > diffs = diffList(); + dmp.diff_cleanupEfficiency( diffs ); + assertEquals( "diff_cleanupEfficiency: Null case.", diffList(), diffs ); + + diffs = diffList( Diff( DELETE, "ab" ), Diff( INSERT, "12" ), Diff( EQUAL, "wxyz" ), Diff( DELETE, "cd" ), Diff( INSERT, "34" ) ); + dmp.diff_cleanupEfficiency( diffs ); + assertEquals( "diff_cleanupEfficiency: No elimination.", diffList( Diff( DELETE, "ab" ), Diff( INSERT, "12" ), Diff( EQUAL, "wxyz" ), Diff( DELETE, "cd" ), Diff( INSERT, "34" ) ), diffs ); + + diffs = diffList( Diff( DELETE, "ab" ), Diff( INSERT, "12" ), Diff( EQUAL, "xyz" ), Diff( DELETE, "cd" ), Diff( INSERT, "34" ) ); + dmp.diff_cleanupEfficiency( diffs ); + assertEquals( "diff_cleanupEfficiency: Four-edit elimination.", diffList( Diff( DELETE, "abxyzcd" ), Diff( INSERT, "12xyz34" ) ), diffs ); + + diffs = diffList( Diff( INSERT, "12" ), Diff( EQUAL, "x" ), Diff( DELETE, "cd" ), Diff( INSERT, "34" ) ); + dmp.diff_cleanupEfficiency( diffs ); + assertEquals( "diff_cleanupEfficiency: Three-edit elimination.", diffList( Diff( DELETE, "xcd" ), Diff( INSERT, "12x34" ) ), diffs ); + + diffs = diffList( Diff( DELETE, "ab" ), Diff( INSERT, "12" ), Diff( EQUAL, "xy" ), Diff( INSERT, "34" ), Diff( EQUAL, "z" ), Diff( DELETE, "cd" ), Diff( INSERT, "56" ) ); + dmp.diff_cleanupEfficiency( diffs ); + assertEquals( "diff_cleanupEfficiency: Backpass elimination.", diffList( Diff( DELETE, "abxyzcd" ), Diff( INSERT, "12xy34z56" ) ), diffs ); + + dmp.Diff_EditCost = 5; + diffs = diffList( Diff( DELETE, "ab" ), Diff( INSERT, "12" ), Diff( EQUAL, "wxyz" ), Diff( DELETE, "cd" ), Diff( INSERT, "34" ) ); + dmp.diff_cleanupEfficiency( diffs ); + assertEquals( "diff_cleanupEfficiency: High cost elimination.", diffList( Diff( DELETE, "abwxyzcd" ), Diff( INSERT, "12wxyz34" ) ), diffs ); + dmp.Diff_EditCost = 4; } -void diff_match_patch_test::testDiffPrettyHtml() { - // Pretty print. - QList diffs = diffList(Diff(EQUAL, "a\n"), Diff(DELETE, "b"), Diff(INSERT, "c&d")); - assertEquals("diff_prettyHtml:", "
<B>b</B>c&d", dmp.diff_prettyHtml(diffs)); +void diff_match_patch_test::testDiffPrettyHtml() +{ + // Pretty print. + QList< Diff > diffs = diffList( Diff( EQUAL, "a\n" ), Diff( DELETE, "b" ), Diff( INSERT, "c&d" ) ); + assertEquals( "diff_prettyHtml:", "
<B>b</B>c&d", dmp.diff_prettyHtml( diffs ) ); } -void diff_match_patch_test::testDiffText() { - // Compute the source and destination texts. - QList diffs = diffList(Diff(EQUAL, "jump"), Diff(DELETE, "s"), Diff(INSERT, "ed"), Diff(EQUAL, " over "), Diff(DELETE, "the"), Diff(INSERT, "a"), Diff(EQUAL, " lazy")); - assertEquals("diff_text1:", "jumps over the lazy", dmp.diff_text1(diffs)); - assertEquals("diff_text2:", "jumped over a lazy", dmp.diff_text2(diffs)); +void diff_match_patch_test::testDiffText() +{ + // Compute the source and destination texts. + QList< Diff > diffs = diffList( Diff( EQUAL, "jump" ), Diff( DELETE, "s" ), Diff( INSERT, "ed" ), Diff( EQUAL, " over " ), Diff( DELETE, "the" ), Diff( INSERT, "a" ), Diff( EQUAL, " lazy" ) ); + assertEquals( "diff_text1:", "jumps over the lazy", dmp.diff_text1( diffs ) ); + assertEquals( "diff_text2:", "jumped over a lazy", dmp.diff_text2( diffs ) ); } -void diff_match_patch_test::testDiffDelta() { - // Convert a diff into delta string. - QList diffs = diffList(Diff(EQUAL, "jump"), Diff(DELETE, "s"), Diff(INSERT, "ed"), Diff(EQUAL, " over "), Diff(DELETE, "the"), Diff(INSERT, "a"), Diff(EQUAL, " lazy"), Diff(INSERT, "old dog")); - QString text1 = dmp.diff_text1(diffs); - assertEquals("diff_text1: Base text.", "jumps over the lazy", text1); +void diff_match_patch_test::testDiffDelta() +{ + // Convert a diff into delta string. + QList< Diff > diffs = diffList( Diff( EQUAL, "jump" ), Diff( DELETE, "s" ), Diff( INSERT, "ed" ), Diff( EQUAL, " over " ), Diff( DELETE, "the" ), Diff( INSERT, "a" ), Diff( EQUAL, " lazy" ), Diff( INSERT, "old dog" ) ); + QString text1 = dmp.diff_text1( diffs ); + assertEquals( "diff_text1: Base text.", "jumps over the lazy", text1 ); - QString delta = dmp.diff_toDelta(diffs); - assertEquals("diff_toDelta:", "=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog", delta); + QString delta = dmp.diff_toDelta( diffs ); + assertEquals( "diff_toDelta:", "=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog", delta ); - // Convert delta string into a diff. - assertEquals("diff_fromDelta: Normal.", diffs, dmp.diff_fromDelta(text1, delta)); + // Convert delta string into a diff. + assertEquals( "diff_fromDelta: Normal.", diffs, dmp.diff_fromDelta( text1, delta ) ); - // Generates error (19 < 20). - try { - dmp.diff_fromDelta(text1 + "x", delta); - assertFalse("diff_fromDelta: Too long.", true); - } catch (QString ex) { - // Exception expected. - } + // Generates error (19 < 20). + try + { + dmp.diff_fromDelta( text1 + "x", delta ); + assertFalse( "diff_fromDelta: Too long.", true ); + } + catch ( QString ex ) + { + // Exception expected. + } - // Generates error (19 > 18). - try { - dmp.diff_fromDelta(text1.mid(1), delta); - assertFalse("diff_fromDelta: Too short.", true); - } catch (QString ex) { - // Exception expected. - } + // Generates error (19 > 18). + try + { + dmp.diff_fromDelta( text1.mid( 1 ), delta ); + assertFalse( "diff_fromDelta: Too short.", true ); + } + catch ( QString ex ) + { + // Exception expected. + } - // Generates error (%c3%xy invalid Unicode). - /* This test does not work because QUrl::fromPercentEncoding("%xy") -> "?" + // Generates error (%c3%xy invalid Unicode). + /* This test does not work because QUrl::fromPercentEncoding("%xy") -> "?" try { dmp.diff_fromDelta("", "+%c3%xy"); assertFalse("diff_fromDelta: Invalid character.", true); @@ -442,746 +468,815 @@ void diff_match_patch_test::testDiffDelta() { } */ - // Test deltas with special characters. - diffs = diffList(Diff(EQUAL, QString::fromWCharArray((const wchar_t*) L"\u0680 \000 \t %", 7)), Diff(DELETE, QString::fromWCharArray((const wchar_t*) L"\u0681 \001 \n ^", 7)), Diff(INSERT, QString::fromWCharArray((const wchar_t*) L"\u0682 \002 \\ |", 7))); - text1 = dmp.diff_text1(diffs); - assertEquals("diff_text1: Unicode text.", QString::fromWCharArray((const wchar_t*) L"\u0680 \000 \t %\u0681 \001 \n ^", 14), text1); + // Test deltas with special characters. + diffs = diffList( Diff( EQUAL, QString::fromWCharArray( (const wchar_t *)L"\u0680 \000 \t %", 7 ) ), Diff( DELETE, QString::fromWCharArray( (const wchar_t *)L"\u0681 \001 \n ^", 7 ) ), Diff( INSERT, QString::fromWCharArray( (const wchar_t *)L"\u0682 \002 \\ |", 7 ) ) ); + text1 = dmp.diff_text1( diffs ); + assertEquals( "diff_text1: Unicode text.", QString::fromWCharArray( (const wchar_t *)L"\u0680 \000 \t %\u0681 \001 \n ^", 14 ), text1 ); - delta = dmp.diff_toDelta(diffs); - assertEquals("diff_toDelta: Unicode.", "=7\t-7\t+%DA%82 %02 %5C %7C", delta); + delta = dmp.diff_toDelta( diffs ); + assertEquals( "diff_toDelta: Unicode.", "=7\t-7\t+%DA%82 %02 %5C %7C", delta ); - assertEquals("diff_fromDelta: Unicode.", diffs, dmp.diff_fromDelta(text1, delta)); + assertEquals( "diff_fromDelta: Unicode.", diffs, dmp.diff_fromDelta( text1, delta ) ); - // Verify pool of unchanged characters. - diffs = diffList(Diff(INSERT, "A-Z a-z 0-9 - _ . ! ~ * ' ( ) ; / ? : @ & = + $ , # ")); - QString text2 = dmp.diff_text2(diffs); - assertEquals("diff_text2: Unchanged characters.", "A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", text2); + // Verify pool of unchanged characters. + diffs = diffList( Diff( INSERT, "A-Z a-z 0-9 - _ . ! ~ * ' ( ) ; / ? : @ & = + $ , # " ) ); + QString text2 = dmp.diff_text2( diffs ); + assertEquals( "diff_text2: Unchanged characters.", "A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", text2 ); - delta = dmp.diff_toDelta(diffs); - assertEquals("diff_toDelta: Unchanged characters.", "+A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", delta); + delta = dmp.diff_toDelta( diffs ); + assertEquals( "diff_toDelta: Unchanged characters.", "+A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", delta ); - // Convert delta string into a diff. - assertEquals("diff_fromDelta: Unchanged characters.", diffs, dmp.diff_fromDelta("", delta)); + // Convert delta string into a diff. + assertEquals( "diff_fromDelta: Unchanged characters.", diffs, dmp.diff_fromDelta( "", delta ) ); } -void diff_match_patch_test::testDiffXIndex() { - // Translate a location in text1 to text2. - QList diffs = diffList(Diff(DELETE, "a"), Diff(INSERT, "1234"), Diff(EQUAL, "xyz")); - assertEquals("diff_xIndex: Translation on equality.", 5, dmp.diff_xIndex(diffs, 2)); +void diff_match_patch_test::testDiffXIndex() +{ + // Translate a location in text1 to text2. + QList< Diff > diffs = diffList( Diff( DELETE, "a" ), Diff( INSERT, "1234" ), Diff( EQUAL, "xyz" ) ); + assertEquals( "diff_xIndex: Translation on equality.", 5, dmp.diff_xIndex( diffs, 2 ) ); - diffs = diffList(Diff(EQUAL, "a"), Diff(DELETE, "1234"), Diff(EQUAL, "xyz")); - assertEquals("diff_xIndex: Translation on deletion.", 1, dmp.diff_xIndex(diffs, 3)); + diffs = diffList( Diff( EQUAL, "a" ), Diff( DELETE, "1234" ), Diff( EQUAL, "xyz" ) ); + assertEquals( "diff_xIndex: Translation on deletion.", 1, dmp.diff_xIndex( diffs, 3 ) ); } -void diff_match_patch_test::testDiffLevenshtein() { - QList diffs = diffList(Diff(DELETE, "abc"), Diff(INSERT, "1234"), Diff(EQUAL, "xyz")); - assertEquals("diff_levenshtein: Trailing equality.", 4, dmp.diff_levenshtein(diffs)); +void diff_match_patch_test::testDiffLevenshtein() +{ + QList< Diff > diffs = diffList( Diff( DELETE, "abc" ), Diff( INSERT, "1234" ), Diff( EQUAL, "xyz" ) ); + assertEquals( "diff_levenshtein: Trailing equality.", 4, dmp.diff_levenshtein( diffs ) ); - diffs = diffList(Diff(EQUAL, "xyz"), Diff(DELETE, "abc"), Diff(INSERT, "1234")); - assertEquals("diff_levenshtein: Leading equality.", 4, dmp.diff_levenshtein(diffs)); + diffs = diffList( Diff( EQUAL, "xyz" ), Diff( DELETE, "abc" ), Diff( INSERT, "1234" ) ); + assertEquals( "diff_levenshtein: Leading equality.", 4, dmp.diff_levenshtein( diffs ) ); - diffs = diffList(Diff(DELETE, "abc"), Diff(EQUAL, "xyz"), Diff(INSERT, "1234")); - assertEquals("diff_levenshtein: Middle equality.", 7, dmp.diff_levenshtein(diffs)); + diffs = diffList( Diff( DELETE, "abc" ), Diff( EQUAL, "xyz" ), Diff( INSERT, "1234" ) ); + assertEquals( "diff_levenshtein: Middle equality.", 7, dmp.diff_levenshtein( diffs ) ); } -void diff_match_patch_test::testDiffBisect() { - // Normal. - QString a = "cat"; - QString b = "map"; - // Since the resulting diff hasn't been normalized, it would be ok if - // the insertion and deletion pairs are swapped. - // If the order changes, tweak this test as required. - QList diffs = diffList(Diff(DELETE, "c"), Diff(INSERT, "m"), Diff(EQUAL, "a"), Diff(DELETE, "t"), Diff(INSERT, "p")); - assertEquals("diff_bisect: Normal.", diffs, dmp.diff_bisect(a, b, std::numeric_limits::max())); - - // Timeout. - diffs = diffList(Diff(DELETE, "cat"), Diff(INSERT, "map")); - assertEquals("diff_bisect: Timeout.", diffs, dmp.diff_bisect(a, b, 0)); +void diff_match_patch_test::testDiffBisect() +{ + // Normal. + QString a = "cat"; + QString b = "map"; + // Since the resulting diff hasn't been normalized, it would be ok if + // the insertion and deletion pairs are swapped. + // If the order changes, tweak this test as required. + QList< Diff > diffs = diffList( Diff( DELETE, "c" ), Diff( INSERT, "m" ), Diff( EQUAL, "a" ), Diff( DELETE, "t" ), Diff( INSERT, "p" ) ); + assertEquals( "diff_bisect: Normal.", diffs, dmp.diff_bisect( a, b, std::numeric_limits< clock_t >::max() ) ); + + // Timeout. + diffs = diffList( Diff( DELETE, "cat" ), Diff( INSERT, "map" ) ); + assertEquals( "diff_bisect: Timeout.", diffs, dmp.diff_bisect( a, b, 0 ) ); } -void diff_match_patch_test::testDiffMain() { - // Perform a trivial diff. - QList diffs = diffList(); - assertEquals("diff_main: Null case.", diffs, dmp.diff_main("", "", false)); +void diff_match_patch_test::testDiffMain() +{ + // Perform a trivial diff. + QList< Diff > diffs = diffList(); + assertEquals( "diff_main: Null case.", diffs, dmp.diff_main( "", "", false ) ); - diffs = diffList(Diff(EQUAL, "abc")); - assertEquals("diff_main: Equality.", diffs, dmp.diff_main("abc", "abc", false)); + diffs = diffList( Diff( EQUAL, "abc" ) ); + assertEquals( "diff_main: Equality.", diffs, dmp.diff_main( "abc", "abc", false ) ); - diffs = diffList(Diff(EQUAL, "ab"), Diff(INSERT, "123"), Diff(EQUAL, "c")); - assertEquals("diff_main: Simple insertion.", diffs, dmp.diff_main("abc", "ab123c", false)); + diffs = diffList( Diff( EQUAL, "ab" ), Diff( INSERT, "123" ), Diff( EQUAL, "c" ) ); + assertEquals( "diff_main: Simple insertion.", diffs, dmp.diff_main( "abc", "ab123c", false ) ); - diffs = diffList(Diff(EQUAL, "a"), Diff(DELETE, "123"), Diff(EQUAL, "bc")); - assertEquals("diff_main: Simple deletion.", diffs, dmp.diff_main("a123bc", "abc", false)); + diffs = diffList( Diff( EQUAL, "a" ), Diff( DELETE, "123" ), Diff( EQUAL, "bc" ) ); + assertEquals( "diff_main: Simple deletion.", diffs, dmp.diff_main( "a123bc", "abc", false ) ); - diffs = diffList(Diff(EQUAL, "a"), Diff(INSERT, "123"), Diff(EQUAL, "b"), Diff(INSERT, "456"), Diff(EQUAL, "c")); - assertEquals("diff_main: Two insertions.", diffs, dmp.diff_main("abc", "a123b456c", false)); + diffs = diffList( Diff( EQUAL, "a" ), Diff( INSERT, "123" ), Diff( EQUAL, "b" ), Diff( INSERT, "456" ), Diff( EQUAL, "c" ) ); + assertEquals( "diff_main: Two insertions.", diffs, dmp.diff_main( "abc", "a123b456c", false ) ); - diffs = diffList(Diff(EQUAL, "a"), Diff(DELETE, "123"), Diff(EQUAL, "b"), Diff(DELETE, "456"), Diff(EQUAL, "c")); - assertEquals("diff_main: Two deletions.", diffs, dmp.diff_main("a123b456c", "abc", false)); + diffs = diffList( Diff( EQUAL, "a" ), Diff( DELETE, "123" ), Diff( EQUAL, "b" ), Diff( DELETE, "456" ), Diff( EQUAL, "c" ) ); + assertEquals( "diff_main: Two deletions.", diffs, dmp.diff_main( "a123b456c", "abc", false ) ); - // Perform a real diff. - // Switch off the timeout. - dmp.Diff_Timeout = 0; - diffs = diffList(Diff(DELETE, "a"), Diff(INSERT, "b")); - assertEquals("diff_main: Simple case #1.", diffs, dmp.diff_main("a", "b", false)); + // Perform a real diff. + // Switch off the timeout. + dmp.Diff_Timeout = 0; + diffs = diffList( Diff( DELETE, "a" ), Diff( INSERT, "b" ) ); + assertEquals( "diff_main: Simple case #1.", diffs, dmp.diff_main( "a", "b", false ) ); - diffs = diffList(Diff(DELETE, "Apple"), Diff(INSERT, "Banana"), Diff(EQUAL, "s are a"), Diff(INSERT, "lso"), Diff(EQUAL, " fruit.")); - assertEquals("diff_main: Simple case #2.", diffs, dmp.diff_main("Apples are a fruit.", "Bananas are also fruit.", false)); + diffs = diffList( Diff( DELETE, "Apple" ), Diff( INSERT, "Banana" ), Diff( EQUAL, "s are a" ), Diff( INSERT, "lso" ), Diff( EQUAL, " fruit." ) ); + assertEquals( "diff_main: Simple case #2.", diffs, dmp.diff_main( "Apples are a fruit.", "Bananas are also fruit.", false ) ); - diffs = diffList(Diff(DELETE, "a"), Diff(INSERT, QString::fromWCharArray((const wchar_t*) L"\u0680", 1)), Diff(EQUAL, "x"), Diff(DELETE, "\t"), Diff(INSERT, QString::fromWCharArray((const wchar_t*) L"\000", 1))); - assertEquals("diff_main: Simple case #3.", diffs, dmp.diff_main("ax\t", QString::fromWCharArray((const wchar_t*) L"\u0680x\000", 3), false)); + diffs = diffList( Diff( DELETE, "a" ), Diff( INSERT, QString::fromWCharArray( (const wchar_t *)L"\u0680", 1 ) ), Diff( EQUAL, "x" ), Diff( DELETE, "\t" ), Diff( INSERT, QString::fromWCharArray( (const wchar_t *)L"\000", 1 ) ) ); + assertEquals( "diff_main: Simple case #3.", diffs, dmp.diff_main( "ax\t", QString::fromWCharArray( (const wchar_t *)L"\u0680x\000", 3 ), false ) ); - diffs = diffList(Diff(DELETE, "1"), Diff(EQUAL, "a"), Diff(DELETE, "y"), Diff(EQUAL, "b"), Diff(DELETE, "2"), Diff(INSERT, "xab")); - assertEquals("diff_main: Overlap #1.", diffs, dmp.diff_main("1ayb2", "abxab", false)); + diffs = diffList( Diff( DELETE, "1" ), Diff( EQUAL, "a" ), Diff( DELETE, "y" ), Diff( EQUAL, "b" ), Diff( DELETE, "2" ), Diff( INSERT, "xab" ) ); + assertEquals( "diff_main: Overlap #1.", diffs, dmp.diff_main( "1ayb2", "abxab", false ) ); - diffs = diffList(Diff(INSERT, "xaxcx"), Diff(EQUAL, "abc"), Diff(DELETE, "y")); - assertEquals("diff_main: Overlap #2.", diffs, dmp.diff_main("abcy", "xaxcxabc", false)); + diffs = diffList( Diff( INSERT, "xaxcx" ), Diff( EQUAL, "abc" ), Diff( DELETE, "y" ) ); + assertEquals( "diff_main: Overlap #2.", diffs, dmp.diff_main( "abcy", "xaxcxabc", false ) ); - diffs = diffList(Diff(DELETE, "ABCD"), Diff(EQUAL, "a"), Diff(DELETE, "="), Diff(INSERT, "-"), Diff(EQUAL, "bcd"), Diff(DELETE, "="), Diff(INSERT, "-"), Diff(EQUAL, "efghijklmnopqrs"), Diff(DELETE, "EFGHIJKLMNOefg")); - assertEquals("diff_main: Overlap #3.", diffs, dmp.diff_main("ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg", "a-bcd-efghijklmnopqrs", false)); + diffs = diffList( Diff( DELETE, "ABCD" ), Diff( EQUAL, "a" ), Diff( DELETE, "=" ), Diff( INSERT, "-" ), Diff( EQUAL, "bcd" ), Diff( DELETE, "=" ), Diff( INSERT, "-" ), Diff( EQUAL, "efghijklmnopqrs" ), Diff( DELETE, "EFGHIJKLMNOefg" ) ); + assertEquals( "diff_main: Overlap #3.", diffs, dmp.diff_main( "ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg", "a-bcd-efghijklmnopqrs", false ) ); - diffs = diffList(Diff(INSERT, " "), Diff(EQUAL, "a"), Diff(INSERT, "nd"), Diff(EQUAL, " [[Pennsylvania]]"), Diff(DELETE, " and [[New")); - assertEquals("diff_main: Large equality.", diffs, dmp.diff_main("a [[Pennsylvania]] and [[New", " and [[Pennsylvania]]", false)); + diffs = diffList( Diff( INSERT, " " ), Diff( EQUAL, "a" ), Diff( INSERT, "nd" ), Diff( EQUAL, " [[Pennsylvania]]" ), Diff( DELETE, " and [[New" ) ); + assertEquals( "diff_main: Large equality.", diffs, dmp.diff_main( "a [[Pennsylvania]] and [[New", " and [[Pennsylvania]]", false ) ); - dmp.Diff_Timeout = 0.1f; // 100ms - // This test may 'fail' on extremely fast computers. If so, just increase the text lengths. - QString a = "`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n"; - QString b = "I am the very model of a modern major general,\nI've information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n"; - // Increase the text lengths by 1024 times to ensure a timeout. - for (int x = 0; x < 10; x++) { - a = a + a; - b = b + b; - } - clock_t startTime = clock(); - dmp.diff_main(a, b); - clock_t endTime = clock(); - // Test that we took at least the timeout period. - assertTrue("diff_main: Timeout min.", dmp.Diff_Timeout * CLOCKS_PER_SEC <= endTime - startTime); - // Test that we didn't take forever (be forgiving). - // Theoretically this test could fail very occasionally if the - // OS task swaps or locks up for a second at the wrong moment. - // Java seems to overrun by ~80% (compared with 10% for other languages). - // Therefore use an upper limit of 0.5s instead of 0.2s. - assertTrue("diff_main: Timeout max.", dmp.Diff_Timeout * CLOCKS_PER_SEC * 2 > endTime - startTime); - dmp.Diff_Timeout = 0; - - // Test the linemode speedup. - // Must be long to pass the 100 char cutoff. - a = "1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n"; - b = "abcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\n"; - assertEquals("diff_main: Simple line-mode.", dmp.diff_main(a, b, true), dmp.diff_main(a, b, false)); - - a = "1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"; - b = "abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij"; - assertEquals("diff_main: Single line-mode.", dmp.diff_main(a, b, true), dmp.diff_main(a, b, false)); - - a = "1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n"; - b = "abcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n"; - QStringList texts_linemode = diff_rebuildtexts(dmp.diff_main(a, b, true)); - QStringList texts_textmode = diff_rebuildtexts(dmp.diff_main(a, b, false)); - assertEquals("diff_main: Overlap line-mode.", texts_textmode, texts_linemode); - - // Test null inputs. - try { - dmp.diff_main(NULL, NULL); - assertFalse("diff_main: Null inputs.", true); - } catch (const char* ex) { - // Exception expected. - } + dmp.Diff_Timeout = 0.1f; // 100ms + // This test may 'fail' on extremely fast computers. If so, just increase the text lengths. + QString a = "`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n"; + QString b = "I am the very model of a modern major general,\nI've information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n"; + // Increase the text lengths by 1024 times to ensure a timeout. + for ( int x = 0; x < 10; x++ ) + { + a = a + a; + b = b + b; + } + clock_t startTime = clock(); + dmp.diff_main( a, b ); + clock_t endTime = clock(); + // Test that we took at least the timeout period. + assertTrue( "diff_main: Timeout min.", dmp.Diff_Timeout * CLOCKS_PER_SEC <= endTime - startTime ); + // Test that we didn't take forever (be forgiving). + // Theoretically this test could fail very occasionally if the + // OS task swaps or locks up for a second at the wrong moment. + // Java seems to overrun by ~80% (compared with 10% for other languages). + // Therefore use an upper limit of 0.5s instead of 0.2s. + assertTrue( "diff_main: Timeout max.", dmp.Diff_Timeout * CLOCKS_PER_SEC * 2 > endTime - startTime ); + dmp.Diff_Timeout = 0; + + // Test the linemode speedup. + // Must be long to pass the 100 char cutoff. + a = "1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n"; + b = "abcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\n"; + assertEquals( "diff_main: Simple line-mode.", dmp.diff_main( a, b, true ), dmp.diff_main( a, b, false ) ); + + a = "1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"; + b = "abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij"; + assertEquals( "diff_main: Single line-mode.", dmp.diff_main( a, b, true ), dmp.diff_main( a, b, false ) ); + + a = "1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n"; + b = "abcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n"; + QStringList texts_linemode = diff_rebuildtexts( dmp.diff_main( a, b, true ) ); + QStringList texts_textmode = diff_rebuildtexts( dmp.diff_main( a, b, false ) ); + assertEquals( "diff_main: Overlap line-mode.", texts_textmode, texts_linemode ); + + // Test null inputs. + try + { + dmp.diff_main( NULL, NULL ); + assertFalse( "diff_main: Null inputs.", true ); + } + catch ( const char *ex ) + { + // Exception expected. + } } - // MATCH TEST FUNCTIONS - -void diff_match_patch_test::testMatchAlphabet() { - // Initialise the bitmasks for Bitap. - QMap bitmask; - bitmask.insert('a', 4); - bitmask.insert('b', 2); - bitmask.insert('c', 1); - assertEquals("match_alphabet: Unique.", bitmask, dmp.match_alphabet("abc")); - - bitmask = QMap(); - bitmask.insert('a', 37); - bitmask.insert('b', 18); - bitmask.insert('c', 8); - assertEquals("match_alphabet: Duplicates.", bitmask, dmp.match_alphabet("abcaba")); +void diff_match_patch_test::testMatchAlphabet() +{ + // Initialise the bitmasks for Bitap. + QMap< QChar, int > bitmask; + bitmask.insert( 'a', 4 ); + bitmask.insert( 'b', 2 ); + bitmask.insert( 'c', 1 ); + assertEquals( "match_alphabet: Unique.", bitmask, dmp.match_alphabet( "abc" ) ); + + bitmask = QMap< QChar, int >(); + bitmask.insert( 'a', 37 ); + bitmask.insert( 'b', 18 ); + bitmask.insert( 'c', 8 ); + assertEquals( "match_alphabet: Duplicates.", bitmask, dmp.match_alphabet( "abcaba" ) ); } -void diff_match_patch_test::testMatchBitap() { - // Bitap algorithm. - dmp.Match_Distance = 100; - dmp.Match_Threshold = 0.5f; - assertEquals("match_bitap: Exact match #1.", 5, dmp.match_bitap("abcdefghijk", "fgh", 5)); +void diff_match_patch_test::testMatchBitap() +{ + // Bitap algorithm. + dmp.Match_Distance = 100; + dmp.Match_Threshold = 0.5f; + assertEquals( "match_bitap: Exact match #1.", 5, dmp.match_bitap( "abcdefghijk", "fgh", 5 ) ); - assertEquals("match_bitap: Exact match #2.", 5, dmp.match_bitap("abcdefghijk", "fgh", 0)); + assertEquals( "match_bitap: Exact match #2.", 5, dmp.match_bitap( "abcdefghijk", "fgh", 0 ) ); - assertEquals("match_bitap: Fuzzy match #1.", 4, dmp.match_bitap("abcdefghijk", "efxhi", 0)); + assertEquals( "match_bitap: Fuzzy match #1.", 4, dmp.match_bitap( "abcdefghijk", "efxhi", 0 ) ); - assertEquals("match_bitap: Fuzzy match #2.", 2, dmp.match_bitap("abcdefghijk", "cdefxyhijk", 5)); + assertEquals( "match_bitap: Fuzzy match #2.", 2, dmp.match_bitap( "abcdefghijk", "cdefxyhijk", 5 ) ); - assertEquals("match_bitap: Fuzzy match #3.", -1, dmp.match_bitap("abcdefghijk", "bxy", 1)); + assertEquals( "match_bitap: Fuzzy match #3.", -1, dmp.match_bitap( "abcdefghijk", "bxy", 1 ) ); - assertEquals("match_bitap: Overflow.", 2, dmp.match_bitap("123456789xx0", "3456789x0", 2)); + assertEquals( "match_bitap: Overflow.", 2, dmp.match_bitap( "123456789xx0", "3456789x0", 2 ) ); - assertEquals("match_bitap: Before start match.", 0, dmp.match_bitap("abcdef", "xxabc", 4)); + assertEquals( "match_bitap: Before start match.", 0, dmp.match_bitap( "abcdef", "xxabc", 4 ) ); - assertEquals("match_bitap: Beyond end match.", 3, dmp.match_bitap("abcdef", "defyy", 4)); + assertEquals( "match_bitap: Beyond end match.", 3, dmp.match_bitap( "abcdef", "defyy", 4 ) ); - assertEquals("match_bitap: Oversized pattern.", 0, dmp.match_bitap("abcdef", "xabcdefy", 0)); + assertEquals( "match_bitap: Oversized pattern.", 0, dmp.match_bitap( "abcdef", "xabcdefy", 0 ) ); - dmp.Match_Threshold = 0.4f; - assertEquals("match_bitap: Threshold #1.", 4, dmp.match_bitap("abcdefghijk", "efxyhi", 1)); + dmp.Match_Threshold = 0.4f; + assertEquals( "match_bitap: Threshold #1.", 4, dmp.match_bitap( "abcdefghijk", "efxyhi", 1 ) ); - dmp.Match_Threshold = 0.3f; - assertEquals("match_bitap: Threshold #2.", -1, dmp.match_bitap("abcdefghijk", "efxyhi", 1)); + dmp.Match_Threshold = 0.3f; + assertEquals( "match_bitap: Threshold #2.", -1, dmp.match_bitap( "abcdefghijk", "efxyhi", 1 ) ); - dmp.Match_Threshold = 0.0f; - assertEquals("match_bitap: Threshold #3.", 1, dmp.match_bitap("abcdefghijk", "bcdef", 1)); + dmp.Match_Threshold = 0.0f; + assertEquals( "match_bitap: Threshold #3.", 1, dmp.match_bitap( "abcdefghijk", "bcdef", 1 ) ); - dmp.Match_Threshold = 0.5f; - assertEquals("match_bitap: Multiple select #1.", 0, dmp.match_bitap("abcdexyzabcde", "abccde", 3)); + dmp.Match_Threshold = 0.5f; + assertEquals( "match_bitap: Multiple select #1.", 0, dmp.match_bitap( "abcdexyzabcde", "abccde", 3 ) ); - assertEquals("match_bitap: Multiple select #2.", 8, dmp.match_bitap("abcdexyzabcde", "abccde", 5)); + assertEquals( "match_bitap: Multiple select #2.", 8, dmp.match_bitap( "abcdexyzabcde", "abccde", 5 ) ); - dmp.Match_Distance = 10; // Strict location. - assertEquals("match_bitap: Distance test #1.", -1, dmp.match_bitap("abcdefghijklmnopqrstuvwxyz", "abcdefg", 24)); + dmp.Match_Distance = 10; // Strict location. + assertEquals( "match_bitap: Distance test #1.", -1, dmp.match_bitap( "abcdefghijklmnopqrstuvwxyz", "abcdefg", 24 ) ); - assertEquals("match_bitap: Distance test #2.", 0, dmp.match_bitap("abcdefghijklmnopqrstuvwxyz", "abcdxxefg", 1)); + assertEquals( "match_bitap: Distance test #2.", 0, dmp.match_bitap( "abcdefghijklmnopqrstuvwxyz", "abcdxxefg", 1 ) ); - dmp.Match_Distance = 1000; // Loose location. - assertEquals("match_bitap: Distance test #3.", 0, dmp.match_bitap("abcdefghijklmnopqrstuvwxyz", "abcdefg", 24)); + dmp.Match_Distance = 1000; // Loose location. + assertEquals( "match_bitap: Distance test #3.", 0, dmp.match_bitap( "abcdefghijklmnopqrstuvwxyz", "abcdefg", 24 ) ); } -void diff_match_patch_test::testMatchMain() { - // Full match. - assertEquals("match_main: Equality.", 0, dmp.match_main("abcdef", "abcdef", 1000)); +void diff_match_patch_test::testMatchMain() +{ + // Full match. + assertEquals( "match_main: Equality.", 0, dmp.match_main( "abcdef", "abcdef", 1000 ) ); - assertEquals("match_main: Null text.", -1, dmp.match_main("", "abcdef", 1)); + assertEquals( "match_main: Null text.", -1, dmp.match_main( "", "abcdef", 1 ) ); - assertEquals("match_main: Null pattern.", 3, dmp.match_main("abcdef", "", 3)); + assertEquals( "match_main: Null pattern.", 3, dmp.match_main( "abcdef", "", 3 ) ); - assertEquals("match_main: Exact match.", 3, dmp.match_main("abcdef", "de", 3)); + assertEquals( "match_main: Exact match.", 3, dmp.match_main( "abcdef", "de", 3 ) ); - dmp.Match_Threshold = 0.7f; - assertEquals("match_main: Complex match.", 4, dmp.match_main("I am the very model of a modern major general.", " that berry ", 5)); - dmp.Match_Threshold = 0.5f; + dmp.Match_Threshold = 0.7f; + assertEquals( "match_main: Complex match.", 4, dmp.match_main( "I am the very model of a modern major general.", " that berry ", 5 ) ); + dmp.Match_Threshold = 0.5f; - // Test null inputs. - try { - dmp.match_main(NULL, NULL, 0); - assertFalse("match_main: Null inputs.", true); - } catch (const char* ex) { - // Exception expected. - } + // Test null inputs. + try + { + dmp.match_main( NULL, NULL, 0 ); + assertFalse( "match_main: Null inputs.", true ); + } + catch ( const char *ex ) + { + // Exception expected. + } } - // PATCH TEST FUNCTIONS - -void diff_match_patch_test::testPatchObj() { - // Patch Object. - Patch p; - p.start1 = 20; - p.start2 = 21; - p.length1 = 18; - p.length2 = 17; - p.diffs = diffList(Diff(EQUAL, "jump"), Diff(DELETE, "s"), Diff(INSERT, "ed"), Diff(EQUAL, " over "), Diff(DELETE, "the"), Diff(INSERT, "a"), Diff(EQUAL, "\nlaz")); - QString strp = "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n"; - assertEquals("Patch: toString.", strp, p.toString()); +void diff_match_patch_test::testPatchObj() +{ + // Patch Object. + Patch p; + p.start1 = 20; + p.start2 = 21; + p.length1 = 18; + p.length2 = 17; + p.diffs = diffList( Diff( EQUAL, "jump" ), Diff( DELETE, "s" ), Diff( INSERT, "ed" ), Diff( EQUAL, " over " ), Diff( DELETE, "the" ), Diff( INSERT, "a" ), Diff( EQUAL, "\nlaz" ) ); + QString strp = "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n"; + assertEquals( "Patch: toString.", strp, p.toString() ); } -void diff_match_patch_test::testPatchFromText() { - assertTrue("patch_fromText: #0.", dmp.patch_fromText("").isEmpty()); +void diff_match_patch_test::testPatchFromText() +{ + assertTrue( "patch_fromText: #0.", dmp.patch_fromText( "" ).isEmpty() ); - QString strp = "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n"; - assertEquals("patch_fromText: #1.", strp, dmp.patch_fromText(strp).value(0).toString()); + QString strp = "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n"; + assertEquals( "patch_fromText: #1.", strp, dmp.patch_fromText( strp ).value( 0 ).toString() ); - assertEquals("patch_fromText: #2.", "@@ -1 +1 @@\n-a\n+b\n", dmp.patch_fromText("@@ -1 +1 @@\n-a\n+b\n").value(0).toString()); + assertEquals( "patch_fromText: #2.", "@@ -1 +1 @@\n-a\n+b\n", dmp.patch_fromText( "@@ -1 +1 @@\n-a\n+b\n" ).value( 0 ).toString() ); - assertEquals("patch_fromText: #3.", "@@ -1,3 +0,0 @@\n-abc\n", dmp.patch_fromText("@@ -1,3 +0,0 @@\n-abc\n").value(0).toString()); + assertEquals( "patch_fromText: #3.", "@@ -1,3 +0,0 @@\n-abc\n", dmp.patch_fromText( "@@ -1,3 +0,0 @@\n-abc\n" ).value( 0 ).toString() ); - assertEquals("patch_fromText: #4.", "@@ -0,0 +1,3 @@\n+abc\n", dmp.patch_fromText("@@ -0,0 +1,3 @@\n+abc\n").value(0).toString()); + assertEquals( "patch_fromText: #4.", "@@ -0,0 +1,3 @@\n+abc\n", dmp.patch_fromText( "@@ -0,0 +1,3 @@\n+abc\n" ).value( 0 ).toString() ); - // Generates error. - try { - dmp.patch_fromText("Bad\nPatch\n"); - assertFalse("patch_fromText: #5.", true); - } catch (QString ex) { - // Exception expected. - } + // Generates error. + try + { + dmp.patch_fromText( "Bad\nPatch\n" ); + assertFalse( "patch_fromText: #5.", true ); + } + catch ( QString ex ) + { + // Exception expected. + } } -void diff_match_patch_test::testPatchToText() { - QString strp = "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n"; - QList patches; - patches = dmp.patch_fromText(strp); - assertEquals("patch_toText: Single", strp, dmp.patch_toText(patches)); +void diff_match_patch_test::testPatchToText() +{ + QString strp = "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n"; + QList< Patch > patches; + patches = dmp.patch_fromText( strp ); + assertEquals( "patch_toText: Single", strp, dmp.patch_toText( patches ) ); - strp = "@@ -1,9 +1,9 @@\n-f\n+F\n oo+fooba\n@@ -7,9 +7,9 @@\n obar\n-,\n+.\n tes\n"; - patches = dmp.patch_fromText(strp); - assertEquals("patch_toText: Dual", strp, dmp.patch_toText(patches)); + strp = "@@ -1,9 +1,9 @@\n-f\n+F\n oo+fooba\n@@ -7,9 +7,9 @@\n obar\n-,\n+.\n tes\n"; + patches = dmp.patch_fromText( strp ); + assertEquals( "patch_toText: Dual", strp, dmp.patch_toText( patches ) ); } -void diff_match_patch_test::testPatchAddContext() { - dmp.Patch_Margin = 4; - Patch p; - p = dmp.patch_fromText("@@ -21,4 +21,10 @@\n-jump\n+somersault\n").value(0); - dmp.patch_addContext(p, "The quick brown fox jumps over the lazy dog."); - assertEquals("patch_addContext: Simple case.", "@@ -17,12 +17,18 @@\n fox \n-jump\n+somersault\n s ov\n", p.toString()); - - p = dmp.patch_fromText("@@ -21,4 +21,10 @@\n-jump\n+somersault\n").value(0); - dmp.patch_addContext(p, "The quick brown fox jumps."); - assertEquals("patch_addContext: Not enough trailing context.", "@@ -17,10 +17,16 @@\n fox \n-jump\n+somersault\n s.\n", p.toString()); - - p = dmp.patch_fromText("@@ -3 +3,2 @@\n-e\n+at\n").value(0); - dmp.patch_addContext(p, "The quick brown fox jumps."); - assertEquals("patch_addContext: Not enough leading context.", "@@ -1,7 +1,8 @@\n Th\n-e\n+at\n qui\n", p.toString()); - - p = dmp.patch_fromText("@@ -3 +3,2 @@\n-e\n+at\n").value(0); - dmp.patch_addContext(p, "The quick brown fox jumps. The quick brown fox crashes."); - assertEquals("patch_addContext: Ambiguity.", "@@ -1,27 +1,28 @@\n Th\n-e\n+at\n quick brown fox jumps. \n", p.toString()); +void diff_match_patch_test::testPatchAddContext() +{ + dmp.Patch_Margin = 4; + Patch p; + p = dmp.patch_fromText( "@@ -21,4 +21,10 @@\n-jump\n+somersault\n" ).value( 0 ); + dmp.patch_addContext( p, "The quick brown fox jumps over the lazy dog." ); + assertEquals( "patch_addContext: Simple case.", "@@ -17,12 +17,18 @@\n fox \n-jump\n+somersault\n s ov\n", p.toString() ); + + p = dmp.patch_fromText( "@@ -21,4 +21,10 @@\n-jump\n+somersault\n" ).value( 0 ); + dmp.patch_addContext( p, "The quick brown fox jumps." ); + assertEquals( "patch_addContext: Not enough trailing context.", "@@ -17,10 +17,16 @@\n fox \n-jump\n+somersault\n s.\n", p.toString() ); + + p = dmp.patch_fromText( "@@ -3 +3,2 @@\n-e\n+at\n" ).value( 0 ); + dmp.patch_addContext( p, "The quick brown fox jumps." ); + assertEquals( "patch_addContext: Not enough leading context.", "@@ -1,7 +1,8 @@\n Th\n-e\n+at\n qui\n", p.toString() ); + + p = dmp.patch_fromText( "@@ -3 +3,2 @@\n-e\n+at\n" ).value( 0 ); + dmp.patch_addContext( p, "The quick brown fox jumps. The quick brown fox crashes." ); + assertEquals( "patch_addContext: Ambiguity.", "@@ -1,27 +1,28 @@\n Th\n-e\n+at\n quick brown fox jumps. \n", p.toString() ); } -void diff_match_patch_test::testPatchMake() { - QList patches; - patches = dmp.patch_make("", ""); - assertEquals("patch_make: Null case", "", dmp.patch_toText(patches)); - - QString text1 = "The quick brown fox jumps over the lazy dog."; - QString text2 = "That quick brown fox jumped over a lazy dog."; - QString expectedPatch = "@@ -1,8 +1,7 @@\n Th\n-at\n+e\n qui\n@@ -21,17 +21,18 @@\n jump\n-ed\n+s\n over \n-a\n+the\n laz\n"; - // The second patch must be "-21,17 +21,18", not "-22,17 +21,18" due to rolling context. - patches = dmp.patch_make(text2, text1); - assertEquals("patch_make: Text2+Text1 inputs", expectedPatch, dmp.patch_toText(patches)); +void diff_match_patch_test::testPatchMake() +{ + QList< Patch > patches; + patches = dmp.patch_make( "", "" ); + assertEquals( "patch_make: Null case", "", dmp.patch_toText( patches ) ); - expectedPatch = "@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n"; - patches = dmp.patch_make(text1, text2); - assertEquals("patch_make: Text1+Text2 inputs", expectedPatch, dmp.patch_toText(patches)); + QString text1 = "The quick brown fox jumps over the lazy dog."; + QString text2 = "That quick brown fox jumped over a lazy dog."; + QString expectedPatch = "@@ -1,8 +1,7 @@\n Th\n-at\n+e\n qui\n@@ -21,17 +21,18 @@\n jump\n-ed\n+s\n over \n-a\n+the\n laz\n"; + // The second patch must be "-21,17 +21,18", not "-22,17 +21,18" due to rolling context. + patches = dmp.patch_make( text2, text1 ); + assertEquals( "patch_make: Text2+Text1 inputs", expectedPatch, dmp.patch_toText( patches ) ); - QList diffs = dmp.diff_main(text1, text2, false); - patches = dmp.patch_make(diffs); - assertEquals("patch_make: Diff input", expectedPatch, dmp.patch_toText(patches)); + expectedPatch = "@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n"; + patches = dmp.patch_make( text1, text2 ); + assertEquals( "patch_make: Text1+Text2 inputs", expectedPatch, dmp.patch_toText( patches ) ); - patches = dmp.patch_make(text1, diffs); - assertEquals("patch_make: Text1+Diff inputs", expectedPatch, dmp.patch_toText(patches)); + QList< Diff > diffs = dmp.diff_main( text1, text2, false ); + patches = dmp.patch_make( diffs ); + assertEquals( "patch_make: Diff input", expectedPatch, dmp.patch_toText( patches ) ); - patches = dmp.patch_make(text1, text2, diffs); - assertEquals("patch_make: Text1+Text2+Diff inputs (deprecated)", expectedPatch, dmp.patch_toText(patches)); + patches = dmp.patch_make( text1, diffs ); + assertEquals( "patch_make: Text1+Diff inputs", expectedPatch, dmp.patch_toText( patches ) ); - patches = dmp.patch_make("`1234567890-=[]\\;',./", "~!@#$%^&*()_+{}|:\"<>?"); - assertEquals("patch_toText: Character encoding.", "@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n", dmp.patch_toText(patches)); + patches = dmp.patch_make( text1, text2, diffs ); + assertEquals( "patch_make: Text1+Text2+Diff inputs (deprecated)", expectedPatch, dmp.patch_toText( patches ) ); - diffs = diffList(Diff(DELETE, "`1234567890-=[]\\;',./"), Diff(INSERT, "~!@#$%^&*()_+{}|:\"<>?")); - assertEquals("patch_fromText: Character decoding.", diffs, dmp.patch_fromText("@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n").value(0).diffs); + patches = dmp.patch_make( "`1234567890-=[]\\;',./", "~!@#$%^&*()_+{}|:\"<>?" ); + assertEquals( "patch_toText: Character encoding.", "@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n", dmp.patch_toText( patches ) ); - text1 = ""; - for (int x = 0; x < 100; x++) { - text1 += "abcdef"; - } - text2 = text1 + "123"; - expectedPatch = "@@ -573,28 +573,31 @@\n cdefabcdefabcdefabcdefabcdef\n+123\n"; - patches = dmp.patch_make(text1, text2); - assertEquals("patch_make: Long string with repeats.", expectedPatch, dmp.patch_toText(patches)); + diffs = diffList( Diff( DELETE, "`1234567890-=[]\\;',./" ), Diff( INSERT, "~!@#$%^&*()_+{}|:\"<>?" ) ); + assertEquals( "patch_fromText: Character decoding.", diffs, dmp.patch_fromText( "@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n" ).value( 0 ).diffs ); - // Test null inputs. - try { - dmp.patch_make(NULL, NULL); - assertFalse("patch_make: Null inputs.", true); - } catch (const char* ex) { - // Exception expected. - } + text1 = ""; + for ( int x = 0; x < 100; x++ ) + { + text1 += "abcdef"; + } + text2 = text1 + "123"; + expectedPatch = "@@ -573,28 +573,31 @@\n cdefabcdefabcdefabcdefabcdef\n+123\n"; + patches = dmp.patch_make( text1, text2 ); + assertEquals( "patch_make: Long string with repeats.", expectedPatch, dmp.patch_toText( patches ) ); + + // Test null inputs. + try + { + dmp.patch_make( NULL, NULL ); + assertFalse( "patch_make: Null inputs.", true ); + } + catch ( const char *ex ) + { + // Exception expected. + } } -void diff_match_patch_test::testPatchSplitMax() { - // Assumes that Match_MaxBits is 32. - QList patches; - patches = dmp.patch_make("abcdefghijklmnopqrstuvwxyz01234567890", "XabXcdXefXghXijXklXmnXopXqrXstXuvXwxXyzX01X23X45X67X89X0"); - dmp.patch_splitMax(patches); - assertEquals("patch_splitMax: #1.", "@@ -1,32 +1,46 @@\n+X\n ab\n+X\n cd\n+X\n ef\n+X\n gh\n+X\n ij\n+X\n kl\n+X\n mn\n+X\n op\n+X\n qr\n+X\n st\n+X\n uv\n+X\n wx\n+X\n yz\n+X\n 012345\n@@ -25,13 +39,18 @@\n zX01\n+X\n 23\n+X\n 45\n+X\n 67\n+X\n 89\n+X\n 0\n", dmp.patch_toText(patches)); - - patches = dmp.patch_make("abcdef1234567890123456789012345678901234567890123456789012345678901234567890uvwxyz", "abcdefuvwxyz"); - QString oldToText = dmp.patch_toText(patches); - dmp.patch_splitMax(patches); - assertEquals("patch_splitMax: #2.", oldToText, dmp.patch_toText(patches)); - - patches = dmp.patch_make("1234567890123456789012345678901234567890123456789012345678901234567890", "abc"); - dmp.patch_splitMax(patches); - assertEquals("patch_splitMax: #3.", "@@ -1,32 +1,4 @@\n-1234567890123456789012345678\n 9012\n@@ -29,32 +1,4 @@\n-9012345678901234567890123456\n 7890\n@@ -57,14 +1,3 @@\n-78901234567890\n+abc\n", dmp.patch_toText(patches)); - - patches = dmp.patch_make("abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1", "abcdefghij , h : 1 , t : 1 abcdefghij , h : 1 , t : 1 abcdefghij , h : 0 , t : 1"); - dmp.patch_splitMax(patches); - assertEquals("patch_splitMax: #4.", "@@ -2,32 +2,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n@@ -29,32 +29,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n", dmp.patch_toText(patches)); +void diff_match_patch_test::testPatchSplitMax() +{ + // Assumes that Match_MaxBits is 32. + QList< Patch > patches; + patches = dmp.patch_make( "abcdefghijklmnopqrstuvwxyz01234567890", "XabXcdXefXghXijXklXmnXopXqrXstXuvXwxXyzX01X23X45X67X89X0" ); + dmp.patch_splitMax( patches ); + assertEquals( "patch_splitMax: #1.", "@@ -1,32 +1,46 @@\n+X\n ab\n+X\n cd\n+X\n ef\n+X\n gh\n+X\n ij\n+X\n kl\n+X\n mn\n+X\n op\n+X\n qr\n+X\n st\n+X\n uv\n+X\n wx\n+X\n yz\n+X\n 012345\n@@ -25,13 +39,18 @@\n zX01\n+X\n 23\n+X\n 45\n+X\n 67\n+X\n 89\n+X\n 0\n", dmp.patch_toText( patches ) ); + + patches = dmp.patch_make( "abcdef1234567890123456789012345678901234567890123456789012345678901234567890uvwxyz", "abcdefuvwxyz" ); + QString oldToText = dmp.patch_toText( patches ); + dmp.patch_splitMax( patches ); + assertEquals( "patch_splitMax: #2.", oldToText, dmp.patch_toText( patches ) ); + + patches = dmp.patch_make( "1234567890123456789012345678901234567890123456789012345678901234567890", "abc" ); + dmp.patch_splitMax( patches ); + assertEquals( "patch_splitMax: #3.", "@@ -1,32 +1,4 @@\n-1234567890123456789012345678\n 9012\n@@ -29,32 +1,4 @@\n-9012345678901234567890123456\n 7890\n@@ -57,14 +1,3 @@\n-78901234567890\n+abc\n", dmp.patch_toText( patches ) ); + + patches = dmp.patch_make( "abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1", "abcdefghij , h : 1 , t : 1 abcdefghij , h : 1 , t : 1 abcdefghij , h : 0 , t : 1" ); + dmp.patch_splitMax( patches ); + assertEquals( "patch_splitMax: #4.", "@@ -2,32 +2,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n@@ -29,32 +29,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n", dmp.patch_toText( patches ) ); } -void diff_match_patch_test::testPatchAddPadding() { - QList patches; - patches = dmp.patch_make("", "test"); - assertEquals("patch_addPadding: Both edges full.", "@@ -0,0 +1,4 @@\n+test\n", dmp.patch_toText(patches)); - dmp.patch_addPadding(patches); - assertEquals("patch_addPadding: Both edges full.", "@@ -1,8 +1,12 @@\n %01%02%03%04\n+test\n %01%02%03%04\n", dmp.patch_toText(patches)); - - patches = dmp.patch_make("XY", "XtestY"); - assertEquals("patch_addPadding: Both edges partial.", "@@ -1,2 +1,6 @@\n X\n+test\n Y\n", dmp.patch_toText(patches)); - dmp.patch_addPadding(patches); - assertEquals("patch_addPadding: Both edges partial.", "@@ -2,8 +2,12 @@\n %02%03%04X\n+test\n Y%01%02%03\n", dmp.patch_toText(patches)); - - patches = dmp.patch_make("XXXXYYYY", "XXXXtestYYYY"); - assertEquals("patch_addPadding: Both edges none.", "@@ -1,8 +1,12 @@\n XXXX\n+test\n YYYY\n", dmp.patch_toText(patches)); - dmp.patch_addPadding(patches); - assertEquals("patch_addPadding: Both edges none.", "@@ -5,8 +5,12 @@\n XXXX\n+test\n YYYY\n", dmp.patch_toText(patches)); +void diff_match_patch_test::testPatchAddPadding() +{ + QList< Patch > patches; + patches = dmp.patch_make( "", "test" ); + assertEquals( "patch_addPadding: Both edges full.", "@@ -0,0 +1,4 @@\n+test\n", dmp.patch_toText( patches ) ); + dmp.patch_addPadding( patches ); + assertEquals( "patch_addPadding: Both edges full.", "@@ -1,8 +1,12 @@\n %01%02%03%04\n+test\n %01%02%03%04\n", dmp.patch_toText( patches ) ); + + patches = dmp.patch_make( "XY", "XtestY" ); + assertEquals( "patch_addPadding: Both edges partial.", "@@ -1,2 +1,6 @@\n X\n+test\n Y\n", dmp.patch_toText( patches ) ); + dmp.patch_addPadding( patches ); + assertEquals( "patch_addPadding: Both edges partial.", "@@ -2,8 +2,12 @@\n %02%03%04X\n+test\n Y%01%02%03\n", dmp.patch_toText( patches ) ); + + patches = dmp.patch_make( "XXXXYYYY", "XXXXtestYYYY" ); + assertEquals( "patch_addPadding: Both edges none.", "@@ -1,8 +1,12 @@\n XXXX\n+test\n YYYY\n", dmp.patch_toText( patches ) ); + dmp.patch_addPadding( patches ); + assertEquals( "patch_addPadding: Both edges none.", "@@ -5,8 +5,12 @@\n XXXX\n+test\n YYYY\n", dmp.patch_toText( patches ) ); } -void diff_match_patch_test::testPatchApply() { - dmp.Match_Distance = 1000; - dmp.Match_Threshold = 0.5f; - dmp.Patch_DeleteThreshold = 0.5f; - QList patches; - patches = dmp.patch_make("", ""); - QPair > results = dmp.patch_apply(patches, "Hello world."); - QVector boolArray = results.second; - - QString resultStr = QString("%1\t%2").arg(results.first).arg(boolArray.count()); - assertEquals("patch_apply: Null case.", "Hello world.\t0", resultStr); - - patches = dmp.patch_make("The quick brown fox jumps over the lazy dog.", "That quick brown fox jumped over a lazy dog."); - results = dmp.patch_apply(patches, "The quick brown fox jumps over the lazy dog."); - boolArray = results.second; - resultStr = results.first + "\t" + (boolArray[0] ? "true" : "false") + "\t" + (boolArray[1] ? "true" : "false"); - assertEquals("patch_apply: Exact match.", "That quick brown fox jumped over a lazy dog.\ttrue\ttrue", resultStr); - - results = dmp.patch_apply(patches, "The quick red rabbit jumps over the tired tiger."); - boolArray = results.second; - resultStr = results.first + "\t" + (boolArray[0] ? "true" : "false") + "\t" + (boolArray[1] ? "true" : "false"); - assertEquals("patch_apply: Partial match.", "That quick red rabbit jumped over a tired tiger.\ttrue\ttrue", resultStr); - - results = dmp.patch_apply(patches, "I am the very model of a modern major general."); - boolArray = results.second; - resultStr = results.first + "\t" + (boolArray[0] ? "true" : "false") + "\t" + (boolArray[1] ? "true" : "false"); - assertEquals("patch_apply: Failed match.", "I am the very model of a modern major general.\tfalse\tfalse", resultStr); - - patches = dmp.patch_make("x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy"); - results = dmp.patch_apply(patches, "x123456789012345678901234567890-----++++++++++-----123456789012345678901234567890y"); - boolArray = results.second; - resultStr = results.first + "\t" + (boolArray[0] ? "true" : "false") + "\t" + (boolArray[1] ? "true" : "false"); - assertEquals("patch_apply: Big delete, small change.", "xabcy\ttrue\ttrue", resultStr); - - patches = dmp.patch_make("x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy"); - results = dmp.patch_apply(patches, "x12345678901234567890---------------++++++++++---------------12345678901234567890y"); - boolArray = results.second; - resultStr = results.first + "\t" + (boolArray[0] ? "true" : "false") + "\t" + (boolArray[1] ? "true" : "false"); - assertEquals("patch_apply: Big delete, large change 1.", "xabc12345678901234567890---------------++++++++++---------------12345678901234567890y\tfalse\ttrue", resultStr); - - dmp.Patch_DeleteThreshold = 0.6f; - patches = dmp.patch_make("x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy"); - results = dmp.patch_apply(patches, "x12345678901234567890---------------++++++++++---------------12345678901234567890y"); - boolArray = results.second; - resultStr = results.first + "\t" + (boolArray[0] ? "true" : "false") + "\t" + (boolArray[1] ? "true" : "false"); - assertEquals("patch_apply: Big delete, large change 2.", "xabcy\ttrue\ttrue", resultStr); - dmp.Patch_DeleteThreshold = 0.5f; - - dmp.Match_Threshold = 0.0f; - dmp.Match_Distance = 0; - patches = dmp.patch_make("abcdefghijklmnopqrstuvwxyz--------------------1234567890", "abcXXXXXXXXXXdefghijklmnopqrstuvwxyz--------------------1234567YYYYYYYYYY890"); - results = dmp.patch_apply(patches, "ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567890"); - boolArray = results.second; - resultStr = results.first + "\t" + (boolArray[0] ? "true" : "false") + "\t" + (boolArray[1] ? "true" : "false"); - assertEquals("patch_apply: Compensate for failed patch.", "ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567YYYYYYYYYY890\tfalse\ttrue", resultStr); - dmp.Match_Threshold = 0.5f; - dmp.Match_Distance = 1000; - - patches = dmp.patch_make("", "test"); - QString patchStr = dmp.patch_toText(patches); - dmp.patch_apply(patches, ""); - assertEquals("patch_apply: No side effects.", patchStr, dmp.patch_toText(patches)); - - patches = dmp.patch_make("The quick brown fox jumps over the lazy dog.", "Woof"); - patchStr = dmp.patch_toText(patches); - dmp.patch_apply(patches, "The quick brown fox jumps over the lazy dog."); - assertEquals("patch_apply: No side effects with major delete.", patchStr, dmp.patch_toText(patches)); - - patches = dmp.patch_make("", "test"); - results = dmp.patch_apply(patches, ""); - boolArray = results.second; - resultStr = results.first + "\t" + (boolArray[0] ? "true" : "false"); - assertEquals("patch_apply: Edge exact match.", "test\ttrue", resultStr); - - patches = dmp.patch_make("XY", "XtestY"); - results = dmp.patch_apply(patches, "XY"); - boolArray = results.second; - resultStr = results.first + "\t" + (boolArray[0] ? "true" : "false"); - assertEquals("patch_apply: Near edge exact match.", "XtestY\ttrue", resultStr); - - patches = dmp.patch_make("y", "y123"); - results = dmp.patch_apply(patches, "x"); - boolArray = results.second; - resultStr = results.first + "\t" + (boolArray[0] ? "true" : "false"); - assertEquals("patch_apply: Edge partial match.", "x123\ttrue", resultStr); +void diff_match_patch_test::testPatchApply() +{ + dmp.Match_Distance = 1000; + dmp.Match_Threshold = 0.5f; + dmp.Patch_DeleteThreshold = 0.5f; + QList< Patch > patches; + patches = dmp.patch_make( "", "" ); + QPair< QString, QVector< bool > > results = dmp.patch_apply( patches, "Hello world." ); + QVector< bool > boolArray = results.second; + + QString resultStr = QString( "%1\t%2" ).arg( results.first ).arg( boolArray.count() ); + assertEquals( "patch_apply: Null case.", "Hello world.\t0", resultStr ); + + patches = dmp.patch_make( "The quick brown fox jumps over the lazy dog.", "That quick brown fox jumped over a lazy dog." ); + results = dmp.patch_apply( patches, "The quick brown fox jumps over the lazy dog." ); + boolArray = results.second; + resultStr = results.first + "\t" + ( boolArray[ 0 ] ? "true" : "false" ) + "\t" + ( boolArray[ 1 ] ? "true" : "false" ); + assertEquals( "patch_apply: Exact match.", "That quick brown fox jumped over a lazy dog.\ttrue\ttrue", resultStr ); + + results = dmp.patch_apply( patches, "The quick red rabbit jumps over the tired tiger." ); + boolArray = results.second; + resultStr = results.first + "\t" + ( boolArray[ 0 ] ? "true" : "false" ) + "\t" + ( boolArray[ 1 ] ? "true" : "false" ); + assertEquals( "patch_apply: Partial match.", "That quick red rabbit jumped over a tired tiger.\ttrue\ttrue", resultStr ); + + results = dmp.patch_apply( patches, "I am the very model of a modern major general." ); + boolArray = results.second; + resultStr = results.first + "\t" + ( boolArray[ 0 ] ? "true" : "false" ) + "\t" + ( boolArray[ 1 ] ? "true" : "false" ); + assertEquals( "patch_apply: Failed match.", "I am the very model of a modern major general.\tfalse\tfalse", resultStr ); + + patches = dmp.patch_make( "x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy" ); + results = dmp.patch_apply( patches, "x123456789012345678901234567890-----++++++++++-----123456789012345678901234567890y" ); + boolArray = results.second; + resultStr = results.first + "\t" + ( boolArray[ 0 ] ? "true" : "false" ) + "\t" + ( boolArray[ 1 ] ? "true" : "false" ); + assertEquals( "patch_apply: Big delete, small change.", "xabcy\ttrue\ttrue", resultStr ); + + patches = dmp.patch_make( "x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy" ); + results = dmp.patch_apply( patches, "x12345678901234567890---------------++++++++++---------------12345678901234567890y" ); + boolArray = results.second; + resultStr = results.first + "\t" + ( boolArray[ 0 ] ? "true" : "false" ) + "\t" + ( boolArray[ 1 ] ? "true" : "false" ); + assertEquals( "patch_apply: Big delete, large change 1.", "xabc12345678901234567890---------------++++++++++---------------12345678901234567890y\tfalse\ttrue", resultStr ); + + dmp.Patch_DeleteThreshold = 0.6f; + patches = dmp.patch_make( "x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy" ); + results = dmp.patch_apply( patches, "x12345678901234567890---------------++++++++++---------------12345678901234567890y" ); + boolArray = results.second; + resultStr = results.first + "\t" + ( boolArray[ 0 ] ? "true" : "false" ) + "\t" + ( boolArray[ 1 ] ? "true" : "false" ); + assertEquals( "patch_apply: Big delete, large change 2.", "xabcy\ttrue\ttrue", resultStr ); + dmp.Patch_DeleteThreshold = 0.5f; + + dmp.Match_Threshold = 0.0f; + dmp.Match_Distance = 0; + patches = dmp.patch_make( "abcdefghijklmnopqrstuvwxyz--------------------1234567890", "abcXXXXXXXXXXdefghijklmnopqrstuvwxyz--------------------1234567YYYYYYYYYY890" ); + results = dmp.patch_apply( patches, "ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567890" ); + boolArray = results.second; + resultStr = results.first + "\t" + ( boolArray[ 0 ] ? "true" : "false" ) + "\t" + ( boolArray[ 1 ] ? "true" : "false" ); + assertEquals( "patch_apply: Compensate for failed patch.", "ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567YYYYYYYYYY890\tfalse\ttrue", resultStr ); + dmp.Match_Threshold = 0.5f; + dmp.Match_Distance = 1000; + + patches = dmp.patch_make( "", "test" ); + QString patchStr = dmp.patch_toText( patches ); + dmp.patch_apply( patches, "" ); + assertEquals( "patch_apply: No side effects.", patchStr, dmp.patch_toText( patches ) ); + + patches = dmp.patch_make( "The quick brown fox jumps over the lazy dog.", "Woof" ); + patchStr = dmp.patch_toText( patches ); + dmp.patch_apply( patches, "The quick brown fox jumps over the lazy dog." ); + assertEquals( "patch_apply: No side effects with major delete.", patchStr, dmp.patch_toText( patches ) ); + + patches = dmp.patch_make( "", "test" ); + results = dmp.patch_apply( patches, "" ); + boolArray = results.second; + resultStr = results.first + "\t" + ( boolArray[ 0 ] ? "true" : "false" ); + assertEquals( "patch_apply: Edge exact match.", "test\ttrue", resultStr ); + + patches = dmp.patch_make( "XY", "XtestY" ); + results = dmp.patch_apply( patches, "XY" ); + boolArray = results.second; + resultStr = results.first + "\t" + ( boolArray[ 0 ] ? "true" : "false" ); + assertEquals( "patch_apply: Near edge exact match.", "XtestY\ttrue", resultStr ); + + patches = dmp.patch_make( "y", "y123" ); + results = dmp.patch_apply( patches, "x" ); + boolArray = results.second; + resultStr = results.first + "\t" + ( boolArray[ 0 ] ? "true" : "false" ); + assertEquals( "patch_apply: Edge partial match.", "x123\ttrue", resultStr ); } - -void diff_match_patch_test::assertEquals(const QString &strCase, int n1, int n2) { - if (n1 != n2) { - qDebug("%s FAIL\nExpected: %d\nActual: %d", qPrintable(strCase), n1, n2); - throw strCase; - } - qDebug("%s OK", qPrintable(strCase)); +void diff_match_patch_test::assertEquals( const QString &strCase, int n1, int n2 ) +{ + if ( n1 != n2 ) + { + qDebug( "%s FAIL\nExpected: %d\nActual: %d", qPrintable( strCase ), n1, n2 ); + throw strCase; + } + qDebug( "%s OK", qPrintable( strCase ) ); } -void diff_match_patch_test::assertEquals(const QString &strCase, const QString &s1, const QString &s2) { - if (s1 != s2) { - qDebug("%s FAIL\nExpected: %s\nActual: %s", - qPrintable(strCase), qPrintable(s1), qPrintable(s2)); - throw strCase; - } - qDebug("%s OK", qPrintable(strCase)); +void diff_match_patch_test::assertEquals( const QString &strCase, const QString &s1, const QString &s2 ) +{ + if ( s1 != s2 ) + { + qDebug( "%s FAIL\nExpected: %s\nActual: %s", qPrintable( strCase ), qPrintable( s1 ), qPrintable( s2 ) ); + throw strCase; + } + qDebug( "%s OK", qPrintable( strCase ) ); } -void diff_match_patch_test::assertEquals(const QString &strCase, const Diff &d1, const Diff &d2) { - if (d1 != d2) { - qDebug("%s FAIL\nExpected: %s\nActual: %s", qPrintable(strCase), - qPrintable(d1.toString()), qPrintable(d2.toString())); - throw strCase; - } - qDebug("%s OK", qPrintable(strCase)); +void diff_match_patch_test::assertEquals( const QString &strCase, const Diff &d1, const Diff &d2 ) +{ + if ( d1 != d2 ) + { + qDebug( "%s FAIL\nExpected: %s\nActual: %s", qPrintable( strCase ), qPrintable( d1.toString() ), qPrintable( d2.toString() ) ); + throw strCase; + } + qDebug( "%s OK", qPrintable( strCase ) ); } -void diff_match_patch_test::assertEquals(const QString &strCase, const QList &list1, const QList &list2) { - bool fail = false; - if (list1.count() == list2.count()) { - int i = 0; - foreach(Diff d1, list1) { - Diff d2 = list2.value(i); - if (d1 != d2) { +void diff_match_patch_test::assertEquals( const QString &strCase, const QList< Diff > &list1, const QList< Diff > &list2 ) +{ + bool fail = false; + if ( list1.count() == list2.count() ) + { + int i = 0; + foreach( Diff d1, list1 ) + { + Diff d2 = list2.value( i ); + if ( d1 != d2 ) + { + fail = true; + break; + } + i++; + } + } + else + { fail = true; - break; - } - i++; } - } else { - fail = true; - } - if (fail) { - // Build human readable description of both lists. - QString listString1 = "("; - bool first = true; - foreach(Diff d1, list1) { - if (!first) { - listString1 += ", "; - } - listString1 += d1.toString(); - first = false; - } - listString1 += ")"; - QString listString2 = "("; - first = true; - foreach(Diff d2, list2) { - if (!first) { - listString2 += ", "; - } - listString2 += d2.toString(); - first = false; + if ( fail ) + { + // Build human readable description of both lists. + QString listString1 = "("; + bool first = true; + foreach( Diff d1, list1 ) + { + if ( !first ) + { + listString1 += ", "; + } + listString1 += d1.toString(); + first = false; + } + listString1 += ")"; + QString listString2 = "("; + first = true; + foreach( Diff d2, list2 ) + { + if ( !first ) + { + listString2 += ", "; + } + listString2 += d2.toString(); + first = false; + } + listString2 += ")"; + qDebug( "%s FAIL\nExpected: %s\nActual: %s", qPrintable( strCase ), qPrintable( listString1 ), qPrintable( listString2 ) ); + throw strCase; } - listString2 += ")"; - qDebug("%s FAIL\nExpected: %s\nActual: %s", - qPrintable(strCase), qPrintable(listString1), qPrintable(listString2)); - throw strCase; - } - qDebug("%s OK", qPrintable(strCase)); + qDebug( "%s OK", qPrintable( strCase ) ); } -void diff_match_patch_test::assertEquals(const QString &strCase, const QList &list1, const QList &list2) { - bool fail = false; - if (list1.count() == list2.count()) { - int i = 0; - foreach(QVariant q1, list1) { - QVariant q2 = list2.value(i); - if (q1 != q2) { +void diff_match_patch_test::assertEquals( const QString &strCase, const QList< QVariant > &list1, const QList< QVariant > &list2 ) +{ + bool fail = false; + if ( list1.count() == list2.count() ) + { + int i = 0; + foreach( QVariant q1, list1 ) + { + QVariant q2 = list2.value( i ); + if ( q1 != q2 ) + { + fail = true; + break; + } + i++; + } + } + else + { fail = true; - break; - } - i++; } - } else { - fail = true; - } - if (fail) { - // Build human readable description of both lists. - QString listString1 = "("; - bool first = true; - foreach(QVariant q1, list1) { - if (!first) { - listString1 += ", "; - } - listString1 += q1.toString(); - first = false; - } - listString1 += ")"; - QString listString2 = "("; - first = true; - foreach(QVariant q2, list2) { - if (!first) { - listString2 += ", "; - } - listString2 += q2.toString(); - first = false; + if ( fail ) + { + // Build human readable description of both lists. + QString listString1 = "("; + bool first = true; + foreach( QVariant q1, list1 ) + { + if ( !first ) + { + listString1 += ", "; + } + listString1 += q1.toString(); + first = false; + } + listString1 += ")"; + QString listString2 = "("; + first = true; + foreach( QVariant q2, list2 ) + { + if ( !first ) + { + listString2 += ", "; + } + listString2 += q2.toString(); + first = false; + } + listString2 += ")"; + qDebug( "%s FAIL\nExpected: %s\nActual: %s", qPrintable( strCase ), qPrintable( listString1 ), qPrintable( listString2 ) ); + throw strCase; } - listString2 += ")"; - qDebug("%s FAIL\nExpected: %s\nActual: %s", - qPrintable(strCase), qPrintable(listString1), qPrintable(listString2)); - throw strCase; - } - qDebug("%s OK", qPrintable(strCase)); + qDebug( "%s OK", qPrintable( strCase ) ); } -void diff_match_patch_test::assertEquals(const QString &strCase, const QVariant &var1, const QVariant &var2) { - if (var1 != var2) { - qDebug("%s FAIL\nExpected: %s\nActual: %s", qPrintable(strCase), - qPrintable(var1.toString()), qPrintable(var2.toString())); - throw strCase; - } - qDebug("%s OK", qPrintable(strCase)); +void diff_match_patch_test::assertEquals( const QString &strCase, const QVariant &var1, const QVariant &var2 ) +{ + if ( var1 != var2 ) + { + qDebug( "%s FAIL\nExpected: %s\nActual: %s", qPrintable( strCase ), qPrintable( var1.toString() ), qPrintable( var2.toString() ) ); + throw strCase; + } + qDebug( "%s OK", qPrintable( strCase ) ); } -void diff_match_patch_test::assertEquals(const QString &strCase, const QMap &m1, const QMap &m2) { - QMapIterator i1(m1), i2(m2); - - while (i1.hasNext() && i2.hasNext()) { - i1.next(); - i2.next(); - if (i1.key() != i2.key() || i1.value() != i2.value()) { - qDebug("%s FAIL\nExpected: (%c, %d)\nActual: (%c, %d)", qPrintable(strCase), - i1.key().toAscii(), i1.value(), i2.key().toAscii(), i2.value()); - throw strCase; +void diff_match_patch_test::assertEquals( const QString &strCase, const QMap< QChar, int > &m1, const QMap< QChar, int > &m2 ) +{ + QMapIterator< QChar, int > i1( m1 ), i2( m2 ); + + while ( i1.hasNext() && i2.hasNext() ) + { + i1.next(); + i2.next(); + if ( i1.key() != i2.key() || i1.value() != i2.value() ) + { + qDebug( "%s FAIL\nExpected: (%c, %d)\nActual: (%c, %d)", qPrintable( strCase ), i1.key().toAscii(), i1.value(), i2.key().toAscii(), i2.value() ); + throw strCase; + } } - } - if (i1.hasNext()) { - i1.next(); - qDebug("%s FAIL\nExpected: (%c, %d)\nActual: none", - qPrintable(strCase), i1.key().toAscii(), i1.value()); - throw strCase; - } - if (i2.hasNext()) { - i2.next(); - qDebug("%s FAIL\nExpected: none\nActual: (%c, %d)", - qPrintable(strCase), i2.key().toAscii(), i2.value()); - throw strCase; - } - qDebug("%s OK", qPrintable(strCase)); + if ( i1.hasNext() ) + { + i1.next(); + qDebug( "%s FAIL\nExpected: (%c, %d)\nActual: none", qPrintable( strCase ), i1.key().toAscii(), i1.value() ); + throw strCase; + } + if ( i2.hasNext() ) + { + i2.next(); + qDebug( "%s FAIL\nExpected: none\nActual: (%c, %d)", qPrintable( strCase ), i2.key().toAscii(), i2.value() ); + throw strCase; + } + qDebug( "%s OK", qPrintable( strCase ) ); } -void diff_match_patch_test::assertEquals(const QString &strCase, const QStringList &list1, const QStringList &list2) { - if (list1 != list2) { - qDebug("%s FAIL\nExpected: %s\nActual: %s", qPrintable(strCase), - qPrintable(list1.join(",")), qPrintable(list2.join(","))); - throw strCase; - } - qDebug("%s OK", qPrintable(strCase)); +void diff_match_patch_test::assertEquals( const QString &strCase, const QStringList &list1, const QStringList &list2 ) +{ + if ( list1 != list2 ) + { + qDebug( "%s FAIL\nExpected: %s\nActual: %s", qPrintable( strCase ), qPrintable( list1.join( "," ) ), qPrintable( list2.join( "," ) ) ); + throw strCase; + } + qDebug( "%s OK", qPrintable( strCase ) ); } -void diff_match_patch_test::assertTrue(const QString &strCase, bool value) { - if (!value) { - qDebug("%s FAIL\nExpected: true\nActual: false", qPrintable(strCase)); - throw strCase; - } - qDebug("%s OK", qPrintable(strCase)); +void diff_match_patch_test::assertTrue( const QString &strCase, bool value ) +{ + if ( !value ) + { + qDebug( "%s FAIL\nExpected: true\nActual: false", qPrintable( strCase ) ); + throw strCase; + } + qDebug( "%s OK", qPrintable( strCase ) ); } -void diff_match_patch_test::assertFalse(const QString &strCase, bool value) { - if (value) { - qDebug("%s FAIL\nExpected: false\nActual: true", qPrintable(strCase)); - throw strCase; - } - qDebug("%s OK", qPrintable(strCase)); +void diff_match_patch_test::assertFalse( const QString &strCase, bool value ) +{ + if ( value ) + { + qDebug( "%s FAIL\nExpected: false\nActual: true", qPrintable( strCase ) ); + throw strCase; + } + qDebug( "%s OK", qPrintable( strCase ) ); } - // Construct the two texts which made up the diff originally. -QStringList diff_match_patch_test::diff_rebuildtexts(QList diffs) { - QStringList text; - text << QString("") << QString(""); - foreach (Diff myDiff, diffs) { - if (myDiff.operation != INSERT) { - text[0] += myDiff.text; +QStringList diff_match_patch_test::diff_rebuildtexts( QList< Diff > diffs ) +{ + QStringList text; + text << QString( "" ) << QString( "" ); + foreach( Diff myDiff, diffs ) + { + if ( myDiff.operation != INSERT ) + { + text[ 0 ] += myDiff.text; + } + if ( myDiff.operation != DELETE ) + { + text[ 1 ] += myDiff.text; + } } - if (myDiff.operation != DELETE) { - text[1] += myDiff.text; - } - } - return text; + return text; } -void diff_match_patch_test::assertEmpty(const QString &strCase, const QStringList &list) { - if (!list.isEmpty()) { - throw strCase; - } +void diff_match_patch_test::assertEmpty( const QString &strCase, const QStringList &list ) +{ + if ( !list.isEmpty() ) + { + throw strCase; + } } - // Private function for quickly building lists of diffs. -QList diff_match_patch_test::diffList(Diff d1, Diff d2, Diff d3, Diff d4, Diff d5, - Diff d6, Diff d7, Diff d8, Diff d9, Diff d10) { - // Diff(INSERT, NULL) is invalid and thus is used as the default argument. - QList listRet; - if (d1.operation == INSERT && d1.text == NULL) { - return listRet; - } - listRet << d1; +QList< Diff > diff_match_patch_test::diffList( Diff d1, Diff d2, Diff d3, Diff d4, Diff d5, Diff d6, Diff d7, Diff d8, Diff d9, Diff d10 ) +{ + // Diff(INSERT, NULL) is invalid and thus is used as the default argument. + QList< Diff > listRet; + if ( d1.operation == INSERT && d1.text == NULL ) + { + return listRet; + } + listRet << d1; - if (d2.operation == INSERT && d2.text == NULL) { - return listRet; - } - listRet << d2; + if ( d2.operation == INSERT && d2.text == NULL ) + { + return listRet; + } + listRet << d2; - if (d3.operation == INSERT && d3.text == NULL) { - return listRet; - } - listRet << d3; + if ( d3.operation == INSERT && d3.text == NULL ) + { + return listRet; + } + listRet << d3; - if (d4.operation == INSERT && d4.text == NULL) { - return listRet; - } - listRet << d4; + if ( d4.operation == INSERT && d4.text == NULL ) + { + return listRet; + } + listRet << d4; - if (d5.operation == INSERT && d5.text == NULL) { - return listRet; - } - listRet << d5; + if ( d5.operation == INSERT && d5.text == NULL ) + { + return listRet; + } + listRet << d5; - if (d6.operation == INSERT && d6.text == NULL) { - return listRet; - } - listRet << d6; + if ( d6.operation == INSERT && d6.text == NULL ) + { + return listRet; + } + listRet << d6; - if (d7.operation == INSERT && d7.text == NULL) { - return listRet; - } - listRet << d7; + if ( d7.operation == INSERT && d7.text == NULL ) + { + return listRet; + } + listRet << d7; - if (d8.operation == INSERT && d8.text == NULL) { - return listRet; - } - listRet << d8; + if ( d8.operation == INSERT && d8.text == NULL ) + { + return listRet; + } + listRet << d8; - if (d9.operation == INSERT && d9.text == NULL) { - return listRet; - } - listRet << d9; + if ( d9.operation == INSERT && d9.text == NULL ) + { + return listRet; + } + listRet << d9; - if (d10.operation == INSERT && d10.text == NULL) { - return listRet; - } - listRet << d10; + if ( d10.operation == INSERT && d10.text == NULL ) + { + return listRet; + } + listRet << d10; - return listRet; + return listRet; } - /* Compile instructions for MinGW and QT4 on Windows: qmake -project diff --git a/cpp/diff_match_patch_test.h b/cpp/diff_match_patch_test.h index 97922229..e3aa6d1e 100644 --- a/cpp/diff_match_patch_test.h +++ b/cpp/diff_match_patch_test.h @@ -19,71 +19,68 @@ #ifndef DIFF_MATCH_PATCH_TEST_H #define DIFF_MATCH_PATCH_TEST_H -class diff_match_patch_test { - public: - diff_match_patch_test(); - void run_all_tests(); +class diff_match_patch_test +{ +public: + diff_match_patch_test(); + void run_all_tests(); - // DIFF TEST FUNCTIONS - void testDiffCommonPrefix(); - void testDiffCommonSuffix(); - void testDiffCommonOverlap(); - void testDiffHalfmatch(); - void testDiffLinesToChars(); - void testDiffCharsToLines(); - void testDiffCleanupMerge(); - void testDiffCleanupSemanticLossless(); - void testDiffCleanupSemantic(); - void testDiffCleanupEfficiency(); - void testDiffPrettyHtml(); - void testDiffText(); - void testDiffDelta(); - void testDiffXIndex(); - void testDiffLevenshtein(); - void testDiffBisect(); - void testDiffMain(); + // DIFF TEST FUNCTIONS + void testDiffCommonPrefix(); + void testDiffCommonSuffix(); + void testDiffCommonOverlap(); + void testDiffHalfmatch(); + void testDiffLinesToChars(); + void testDiffCharsToLines(); + void testDiffCleanupMerge(); + void testDiffCleanupSemanticLossless(); + void testDiffCleanupSemantic(); + void testDiffCleanupEfficiency(); + void testDiffPrettyHtml(); + void testDiffText(); + void testDiffDelta(); + void testDiffXIndex(); + void testDiffLevenshtein(); + void testDiffBisect(); + void testDiffMain(); - // MATCH TEST FUNCTIONS - void testMatchAlphabet(); - void testMatchBitap(); - void testMatchMain(); + // MATCH TEST FUNCTIONS + void testMatchAlphabet(); + void testMatchBitap(); + void testMatchMain(); - // PATCH TEST FUNCTIONS - void testPatchObj(); - void testPatchFromText(); - void testPatchToText(); - void testPatchAddContext(); - void testPatchMake(); - void testPatchSplitMax(); - void testPatchAddPadding(); - void testPatchApply(); + // PATCH TEST FUNCTIONS + void testPatchObj(); + void testPatchFromText(); + void testPatchToText(); + void testPatchAddContext(); + void testPatchMake(); + void testPatchSplitMax(); + void testPatchAddPadding(); + void testPatchApply(); - private: - diff_match_patch dmp; +private: + diff_match_patch dmp; - // Define equality. - void assertEquals(const QString &strCase, int n1, int n2); - void assertEquals(const QString &strCase, const QString &s1, const QString &s2); - void assertEquals(const QString &strCase, const Diff &d1, const Diff &d2); - void assertEquals(const QString &strCase, const QList &list1, const QList &list2); - void assertEquals(const QString &strCase, const QList &list1, const QList &list2); - void assertEquals(const QString &strCase, const QVariant &var1, const QVariant &var2); - void assertEquals(const QString &strCase, const QMap &m1, const QMap &m2); - void assertEquals(const QString &strCase, const QStringList &list1, const QStringList &list2); - void assertTrue(const QString &strCase, bool value); - void assertFalse(const QString &strCase, bool value); - void assertEmpty(const QString &strCase, const QStringList &list); + // Define equality. + void assertEquals( const QString &strCase, int n1, int n2 ); + void assertEquals( const QString &strCase, const QString &s1, const QString &s2 ); + void assertEquals( const QString &strCase, const Diff &d1, const Diff &d2 ); + void assertEquals( const QString &strCase, const QList< Diff > &list1, const QList< Diff > &list2 ); + void assertEquals( const QString &strCase, const QList< QVariant > &list1, const QList< QVariant > &list2 ); + void assertEquals( const QString &strCase, const QVariant &var1, const QVariant &var2 ); + void assertEquals( const QString &strCase, const QMap< QChar, int > &m1, const QMap< QChar, int > &m2 ); + void assertEquals( const QString &strCase, const QStringList &list1, const QStringList &list2 ); + void assertTrue( const QString &strCase, bool value ); + void assertFalse( const QString &strCase, bool value ); + void assertEmpty( const QString &strCase, const QStringList &list ); - // Construct the two texts which made up the diff originally. - QStringList diff_rebuildtexts(QList diffs); - // Private function for quickly building lists of diffs. - QList diffList( - // Diff(INSERT, NULL) is invalid and thus is used as the default argument. - Diff d1 = Diff(INSERT, NULL), Diff d2 = Diff(INSERT, NULL), - Diff d3 = Diff(INSERT, NULL), Diff d4 = Diff(INSERT, NULL), - Diff d5 = Diff(INSERT, NULL), Diff d6 = Diff(INSERT, NULL), - Diff d7 = Diff(INSERT, NULL), Diff d8 = Diff(INSERT, NULL), - Diff d9 = Diff(INSERT, NULL), Diff d10 = Diff(INSERT, NULL)); + // Construct the two texts which made up the diff originally. + QStringList diff_rebuildtexts( QList< Diff > diffs ); + // Private function for quickly building lists of diffs. + QList< Diff > diffList( + // Diff(INSERT, NULL) is invalid and thus is used as the default argument. + Diff d1 = Diff( INSERT, NULL ), Diff d2 = Diff( INSERT, NULL ), Diff d3 = Diff( INSERT, NULL ), Diff d4 = Diff( INSERT, NULL ), Diff d5 = Diff( INSERT, NULL ), Diff d6 = Diff( INSERT, NULL ), Diff d7 = Diff( INSERT, NULL ), Diff d8 = Diff( INSERT, NULL ), Diff d9 = Diff( INSERT, NULL ), Diff d10 = Diff( INSERT, NULL ) ); }; -#endif // DIFF_MATCH_PATCH_TEST_H +#endif // DIFF_MATCH_PATCH_TEST_H diff --git a/cpp17/diff_match_patch_test_assertEquals.cpp b/cpp17/diff_match_patch_test_assertEquals.cpp index 0ae6453d..853fa4c4 100644 --- a/cpp17/diff_match_patch_test_assertEquals.cpp +++ b/cpp17/diff_match_patch_test_assertEquals.cpp @@ -147,7 +147,7 @@ namespace NDiffMatchPatch { text[ 0 ] += myDiff.text(); } - + if ( !myDiff.isDelete() ) { text[ 1 ] += myDiff.text(); diff --git a/cpp17/diff_match_patch_utils.h b/cpp17/diff_match_patch_utils.h index 326abf5f..332eb1f1 100644 --- a/cpp17/diff_match_patch_utils.h +++ b/cpp17/diff_match_patch_utils.h @@ -338,14 +338,13 @@ namespace NDiffMatchPatch return to_wstring( string, false ); else if constexpr ( is_charstar< STRING_TYPE >::value ) return to_wstring( std::string( string ? string : "" ), false ); - else if constexpr( std::is_same_v< char, STRING_TYPE > ) + else if constexpr ( std::is_same_v< char, STRING_TYPE > ) return to_wstring( string, false ); - else if constexpr( std::is_same_v< wchar_t, STRING_TYPE > ) + else if constexpr ( std::is_same_v< wchar_t, STRING_TYPE > ) return to_wstring( string, false ); } } } - #endif diff --git a/objectivec/DiffMatchPatchCFUtilities.h b/objectivec/DiffMatchPatchCFUtilities.h index a9c93a20..690b9fa9 100755 --- a/objectivec/DiffMatchPatchCFUtilities.h +++ b/objectivec/DiffMatchPatchCFUtilities.h @@ -22,27 +22,29 @@ #ifndef _DIFFMATCHPATCHCFUTILITIES_H #define _DIFFMATCHPATCHCFUTILITIES_H -CFStringRef diff_CFStringCreateFromUnichar(UniChar ch); -CFStringRef diff_CFStringCreateJavaSubstring(CFStringRef s, CFIndex begin, CFIndex end); +CFStringRef diff_CFStringCreateFromUnichar( UniChar ch ); +CFStringRef diff_CFStringCreateJavaSubstring( CFStringRef s, CFIndex begin, CFIndex end ); -CFIndex diff_commonPrefix(CFStringRef text1, CFStringRef text2); -CFIndex diff_commonSuffix(CFStringRef text1, CFStringRef text2); -CFIndex diff_commonOverlap(CFStringRef text1, CFStringRef text2); -CFArrayRef diff_halfMatchCreate(CFStringRef text1, CFStringRef text2, const float diffTimeout); -CFArrayRef diff_halfMatchICreate(CFStringRef longtext, CFStringRef shorttext, CFIndex i); +CFIndex diff_commonPrefix( CFStringRef text1, CFStringRef text2 ); +CFIndex diff_commonSuffix( CFStringRef text1, CFStringRef text2 ); +CFIndex diff_commonOverlap( CFStringRef text1, CFStringRef text2 ); +CFArrayRef diff_halfMatchCreate( CFStringRef text1, CFStringRef text2, const float diffTimeout ); +CFArrayRef diff_halfMatchICreate( CFStringRef longtext, CFStringRef shorttext, CFIndex i ); -CFStringRef diff_linesToCharsMungeCFStringCreate(CFStringRef text, CFMutableArrayRef lineArray, CFMutableDictionaryRef lineHash, CFIndex maxLines); +CFStringRef diff_linesToCharsMungeCFStringCreate( CFStringRef text, CFMutableArrayRef lineArray, CFMutableDictionaryRef lineHash, CFIndex maxLines ); -CFIndex diff_cleanupSemanticScore(CFStringRef one, CFStringRef two); +CFIndex diff_cleanupSemanticScore( CFStringRef one, CFStringRef two ); -CF_INLINE void diff_CFStringPrepareUniCharBuffer(CFStringRef string, const UniChar **string_chars, UniChar **string_buffer, CFRange string_range) { - *string_chars = CFStringGetCharactersPtr(string); - if (*string_chars == NULL) { - // Fallback in case CFStringGetCharactersPtr() didn’t work. - *string_buffer = malloc(string_range.length * sizeof(UniChar)); - CFStringGetCharacters(string, string_range, *string_buffer); - *string_chars = *string_buffer; - } +CF_INLINE void diff_CFStringPrepareUniCharBuffer( CFStringRef string, const UniChar **string_chars, UniChar **string_buffer, CFRange string_range ) +{ + *string_chars = CFStringGetCharactersPtr( string ); + if ( *string_chars == NULL ) + { + // Fallback in case CFStringGetCharactersPtr() didn’t work. + *string_buffer = malloc( string_range.length * sizeof( UniChar ) ); + CFStringGetCharacters( string, string_range, *string_buffer ); + *string_chars = *string_buffer; + } } -#endif //ifndef _DIFFMATCHPATCHCFUTILITIES_H +#endif //ifndef _DIFFMATCHPATCHCFUTILITIES_H diff --git a/objectivec/MinMaxMacros.h b/objectivec/MinMaxMacros.h index 2765e0fa..28059ae3 100755 --- a/objectivec/MinMaxMacros.h +++ b/objectivec/MinMaxMacros.h @@ -19,22 +19,28 @@ * ObjC port: jan@geheimwerk.de (Jan Weiß) */ -#if !defined(MIN) - #define MIN(A,B) \ - ({__typeof__(A) a = (A); \ - __typeof__(B) b = (B); \ - (a < b) ? a : b; }) +#if !defined( MIN ) + #define MIN( A, B ) \ + ( { \ + __typeof__( A ) a = ( A ); \ + __typeof__( B ) b = ( B ); \ + ( a < b ) ? a : b; \ + } ) #endif -#if !defined(MAX) - #define MAX(A,B) \ - ({__typeof__(A) a = (A); \ - __typeof__(B) b = (B); \ - (a > b) ? a : b; }) +#if !defined( MAX ) + #define MAX( A, B ) \ + ( { \ + __typeof__( A ) a = ( A ); \ + __typeof__( B ) b = ( B ); \ + ( a > b ) ? a : b; \ + } ) #endif -#if !defined(ABS) - #define ABS(A) \ - ({__typeof__(A) a = (A); \ - (a > 0) ? a : -a; }) +#if !defined( ABS ) + #define ABS( A ) \ + ( { \ + __typeof__( A ) a = ( A ); \ + ( a > 0 ) ? a : -a; \ + } ) #endif