Bug 1101337 - Make the ReplaceSubstring() XPCOM string API linear; r=froydnj

ReplaceSubstring() is an O(n*m) algorithm (n being the length of the
string and m being the number of occurrences of aTarget) because we have
to move the remainder of the string, search it again and potentially
memmove most of it again as we find more matches.  This patch rewrites
that function to make it O(n+m).

Note that we currently don't build TestStrings.cpp, so the test case in
this patch is not run automatically, but the test case has been verified
to pass separately by moving the test function into Gecko and calling it
during startup and stepping through it in the debugger.

--HG--
extra : rebase_source : b020e17c1973330b0dbbd6bf956c073cfdcb775e
This commit is contained in:
Ehsan Akhgari 2014-12-12 18:57:09 -05:00
parent 014909a0f9
commit 1576e9507a
4 changed files with 242 additions and 7 deletions

View File

@ -106,7 +106,7 @@ static nsStringStats gStringStats;
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
inline void void
ReleaseData(void* aData, uint32_t aFlags) ReleaseData(void* aData, uint32_t aFlags)
{ {
if (aFlags & nsSubstring::F_SHARED) { if (aFlags & nsSubstring::F_SHARED) {

View File

@ -386,6 +386,11 @@ public:
#ifdef CharT_is_PRUnichar #ifdef CharT_is_PRUnichar
void ReplaceChar(const char16_t* aSet, char16_t aNewChar); void ReplaceChar(const char16_t* aSet, char16_t aNewChar);
#endif #endif
/**
* Replace all occurrences of aTarget with aNewValue.
* The complexity of this function is O(n+m), n being the length of the string
* and m being the length of aNewValue.
*/
void ReplaceSubstring(const self_type& aTarget, const self_type& aNewValue); void ReplaceSubstring(const self_type& aTarget, const self_type& aNewValue);
void ReplaceSubstring(const char_type* aTarget, const char_type* aNewValue); void ReplaceSubstring(const char_type* aTarget, const char_type* aNewValue);
@ -458,6 +463,14 @@ protected:
: substring_type(aData, aLength, aFlags) : substring_type(aData, aLength, aFlags)
{ {
} }
struct Segment {
uint32_t mBegin, mLength;
Segment(uint32_t aBegin, uint32_t aLength)
: mBegin(aBegin)
, mLength(aLength)
{}
};
}; };

View File

@ -4,7 +4,7 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this * License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "nsTArray.h"
/** /**
* nsTString::Find * nsTString::Find
@ -451,6 +451,8 @@ nsTString_CharT::ReplaceChar( const char* aSet, char_type aNewChar )
} }
} }
void ReleaseData(void* aData, uint32_t aFlags);
void void
nsTString_CharT::ReplaceSubstring( const char_type* aTarget, const char_type* aNewValue ) nsTString_CharT::ReplaceSubstring( const char_type* aTarget, const char_type* aNewValue )
{ {
@ -464,18 +466,87 @@ nsTString_CharT::ReplaceSubstring( const self_type& aTarget, const self_type& aN
if (aTarget.Length() == 0) if (aTarget.Length() == 0)
return; return;
// Remember all of the non-matching parts.
nsAutoTArray<Segment, 16> nonMatching;
uint32_t i = 0; uint32_t i = 0;
while (i < mLength) uint32_t newLength = 0;
while (true)
{ {
int32_t r = FindSubstring(mData + i, mLength - i, static_cast<const char_type*>(aTarget.Data()), aTarget.Length(), false); int32_t r = FindSubstring(mData + i, mLength - i, static_cast<const char_type*>(aTarget.Data()), aTarget.Length(), false);
if (r == kNotFound) int32_t until = (r == kNotFound) ? mLength - i : r;
nonMatching.AppendElement(Segment(i, until));
newLength += until;
if (r == kNotFound) {
break; break;
}
Replace(i + r, aTarget.Length(), aNewValue); newLength += aNewValue.Length();
i += r + aNewValue.Length(); i += r + aTarget.Length();
if (i >= mLength) {
// Add an auxiliary entry at the end of the list to help as an edge case
// for the algorithms below.
nonMatching.AppendElement(Segment(mLength, 0));
break;
}
} }
}
// If there's only one non-matching segment, then the target string was not
// found, and there's nothing to do.
if (nonMatching.Length() == 1) {
MOZ_ASSERT(nonMatching[0].mBegin == 0 && nonMatching[0].mLength == mLength,
"We should have the correct non-matching segment.");
return;
}
// Make sure that we can mutate our buffer.
char_type* oldData;
uint32_t oldFlags;
if (!MutatePrep(XPCOM_MAX(mLength, newLength), &oldData, &oldFlags))
return;
if (oldData) {
// Copy all of the old data to the new buffer.
char_traits::copy(mData, oldData, XPCOM_MAX(mLength, newLength));
::ReleaseData(oldData, oldFlags);
}
if (aTarget.Length() >= aNewValue.Length()) {
// In the shrinking case, start filling the buffer from the beginning.
const uint32_t delta = (aTarget.Length() - aNewValue.Length());
for (i = 1; i < nonMatching.Length(); ++i) {
// When we move the i'th non-matching segment into position, we need to
// account for the characters deleted by the previous |i| replacements by
// subtracting |i * delta|.
const char_type* sourceSegmentPtr = mData + nonMatching[i].mBegin;
char_type* destinationSegmentPtr = mData + nonMatching[i].mBegin - i * delta;
// Write the i'th replacement immediately before the new i'th non-matching
// segment.
char_traits::copy(destinationSegmentPtr - aNewValue.Length(),
aNewValue.Data(), aNewValue.Length());
char_traits::move(destinationSegmentPtr, sourceSegmentPtr,
nonMatching[i].mLength);
}
} else {
// In the growing case, start filling the buffer from the end.
const uint32_t delta = (aNewValue.Length() - aTarget.Length());
for (i = nonMatching.Length() - 1; i > 0; --i) {
// When we move the i'th non-matching segment into position, we need to
// account for the characters added by the previous |i| replacements by
// adding |i * delta|.
const char_type* sourceSegmentPtr = mData + nonMatching[i].mBegin;
char_type* destinationSegmentPtr = mData + nonMatching[i].mBegin + i * delta;
char_traits::move(destinationSegmentPtr, sourceSegmentPtr,
nonMatching[i].mLength);
// Write the i'th replacement immediately before the new i'th non-matching
// segment.
char_traits::copy(destinationSegmentPtr - aNewValue.Length(),
aNewValue.Data(), aNewValue.Length());
}
}
// Adjust the length and make sure the string is null terminated.
mLength = newLength;
mData[mLength] = char_type(0);
}
/** /**
* nsTString::Trim * nsTString::Trim

View File

@ -451,6 +451,156 @@ bool test_replace_substr_2()
return true; return true;
} }
bool test_replace_substr_3()
{
nsCString s;
s.Assign("abcabcabc");
s.ReplaceSubstring("ca", "X");
bool r = strcmp(s.get(), "abXbXbc") == 0;
if (!r)
{
printf("[s=%s]\n", s.get());
return false;
}
s.Assign("abcabcabc");
s.ReplaceSubstring("ca", "XYZ");
r = strcmp(s.get(), "abXYZbXYZbc") == 0;
if (!r)
{
printf("[s=%s]\n", s.get());
return false;
}
s.Assign("abcabcabc");
s.ReplaceSubstring("ca", "XY");
r = strcmp(s.get(), "abXYbXYbc") == 0;
if (!r)
{
printf("[s=%s]\n", s.get());
return false;
}
s.Assign("abcabcabc");
s.ReplaceSubstring("ca", "XYZ!");
r = strcmp(s.get(), "abXYZ!bXYZ!bc") == 0;
if (!r)
{
printf("[s=%s]\n", s.get());
return false;
}
s.Assign("abcdabcdabcd");
s.ReplaceSubstring("bcd", "X");
r = strcmp(s.get(), "aXaXaX") == 0;
if (!r)
{
printf("[s=%s]\n", s.get());
return false;
}
s.Assign("abcdabcdabcd");
s.ReplaceSubstring("bcd", "XYZ!");
r = strcmp(s.get(), "aXYZ!aXYZ!aXYZ!") == 0;
if (!r)
{
printf("[s=%s]\n", s.get());
return false;
}
s.Assign("abcdabcdabcd");
s.ReplaceSubstring("bcd", "XY");
r = strcmp(s.get(), "aXYaXYaXY") == 0;
if (!r)
{
printf("[s=%s]\n", s.get());
return false;
}
s.Assign("abcdabcdabcd");
s.ReplaceSubstring("bcd", "XYZABC");
r = strcmp(s.get(), "aXYZABCaXYZABCaXYZABC") == 0;
if (!r)
{
printf("[s=%s]\n", s.get());
return false;
}
s.Assign("abcdabcdabcd");
s.ReplaceSubstring("bcd", "XYZ");
r = strcmp(s.get(), "aXYZaXYZaXYZ") == 0;
if (!r)
{
printf("[s=%s]\n", s.get());
return false;
}
s.Assign("abcdabcdabcd");
s.ReplaceSubstring("bcd", "XYZ!");
r = strcmp(s.get(), "aXYZ!aXYZ!aXYZ!") == 0;
if (!r)
{
printf("[s=%s]\n", s.get());
return false;
}
s.Assign("abcdabcdabcd");
s.ReplaceSubstring("ab", "X");
r = strcmp(s.get(), "XcdXcdXcd") == 0;
if (!r)
{
printf("[s=%s]\n", s.get());
return false;
}
s.Assign("abcdabcdabcd");
s.ReplaceSubstring("ab", "XYZABC");
r = strcmp(s.get(), "XYZABCcdXYZABCcdXYZABCcd") == 0;
if (!r)
{
printf("[s=%s]\n", s.get());
return false;
}
s.Assign("abcdabcdabcd");
s.ReplaceSubstring("ab", "XY");
r = strcmp(s.get(), "XYcdXYcdXYcd") == 0;
if (!r)
{
printf("[s=%s]\n", s.get());
return false;
}
s.Assign("abcdabcdabcd");
s.ReplaceSubstring("ab", "XYZ!");
r = strcmp(s.get(), "XYZ!cdXYZ!cdXYZ!cd") == 0;
if (!r)
{
printf("[s=%s]\n", s.get());
return false;
}
s.Assign("abcdabcdabcd");
s.ReplaceSubstring("notfound", "X");
r = strcmp(s.get(), "abcdabcdabcd") == 0;
if (!r)
{
printf("[s=%s]\n", s.get());
return false;
}
s.Assign("abcdabcdabcd");
s.ReplaceSubstring("notfound", "longlongstring");
r = strcmp(s.get(), "abcdabcdabcd") == 0;
if (!r)
{
printf("[s=%s]\n", s.get());
return false;
}
return true;
}
bool test_strip_ws() bool test_strip_ws()
{ {
const char text[] = " a $ "; const char text[] = " a $ ";
@ -1201,6 +1351,7 @@ tests[] =
{ "test_trim", test_trim }, { "test_trim", test_trim },
{ "test_replace_substr", test_replace_substr }, { "test_replace_substr", test_replace_substr },
{ "test_replace_substr_2", test_replace_substr_2 }, { "test_replace_substr_2", test_replace_substr_2 },
{ "test_replace_substr_3", test_replace_substr_3 },
{ "test_strip_ws", test_strip_ws }, { "test_strip_ws", test_strip_ws },
{ "test_equals_ic", test_equals_ic }, { "test_equals_ic", test_equals_ic },
{ "test_fixed_string", test_fixed_string }, { "test_fixed_string", test_fixed_string },