gecko/content/base/src/nsTextFragment.cpp

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* ***** BEGIN LICENSE BLOCK *****
 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
 *
 * The contents of this file are subject to the Mozilla Public License Version
 * 1.1 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 * for the specific language governing rights and limitations under the
 * License.
 *
 * The Original Code is Mozilla Communicator client code.
 *
 * The Initial Developer of the Original Code is
 * Netscape Communications Corporation.
 * Portions created by the Initial Developer are Copyright (C) 1998
 * the Initial Developer. All Rights Reserved.
 *
 * Contributor(s):
 *
 * Alternatively, the contents of this file may be used under the terms of
 * either of the GNU General Public License Version 2 or later (the "GPL"),
 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 * in which case the provisions of the GPL or the LGPL are applicable instead
 * of those above. If you wish to allow use of your version of this file only
 * under the terms of either the GPL or the LGPL, and not to allow others to
 * use your version of this file under the terms of the MPL, indicate your
 * decision by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL or the LGPL. If you do not delete
 * the provisions above, a recipient may use your version of this file under
 * the terms of any one of the MPL, the GPL or the LGPL.
 *
 * ***** END LICENSE BLOCK ***** */

/*
 * A class which represents a fragment of text (eg inside a text
 * node); if only codepoints below 256 are used, the text is stored as
 * a char*; otherwise the text is stored as a PRUnichar*
 */

#include "nsTextFragment.h"
#include "nsCRT.h"
#include "nsReadableUtils.h"
#include "nsMemory.h"
#include "nsBidiUtils.h"
#include "nsUnicharUtils.h"
#include "nsUTF8Utils.h"

#define TEXTFRAG_WHITE_AFTER_NEWLINE 50
#define TEXTFRAG_MAX_NEWLINES 7

// Static buffer used for common fragments
static char* sSpaceSharedString[TEXTFRAG_MAX_NEWLINES + 1];
static char* sTabSharedString[TEXTFRAG_MAX_NEWLINES + 1];
static char sSingleCharSharedString[256];

// static
nsresult
nsTextFragment::Init()
{
  // Create whitespace strings
  PRUint32 i;
  for (i = 0; i <= TEXTFRAG_MAX_NEWLINES; ++i) {
    sSpaceSharedString[i] = new char[1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE];
    sTabSharedString[i] = new char[1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE];
    NS_ENSURE_TRUE(sSpaceSharedString[i] && sTabSharedString[i],
                   NS_ERROR_OUT_OF_MEMORY);
    sSpaceSharedString[i][0] = ' ';
    sTabSharedString[i][0] = ' ';
    PRUint32 j;
    for (j = 1; j < 1 + i; ++j) {
      sSpaceSharedString[i][j] = '\n';
      sTabSharedString[i][j] = '\n';
    }
    for (; j < (1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE); ++j) {
      sSpaceSharedString[i][j] = ' ';
      sTabSharedString[i][j] = '\t';
    }
  }

  // Create single-char strings
  for (i = 0; i < 256; ++i) {
    sSingleCharSharedString[i] = i;
  }

  return NS_OK;
}

// static
void
nsTextFragment::Shutdown()
{
  PRUint32  i;
  for (i = 0; i <= TEXTFRAG_MAX_NEWLINES; ++i) {
    delete [] sSpaceSharedString[i];
    delete [] sTabSharedString[i];
    sSpaceSharedString[i] = nsnull;
    sTabSharedString[i] = nsnull;
  }
}

nsTextFragment::~nsTextFragment()
{
  ReleaseText();
  MOZ_COUNT_DTOR(nsTextFragment);
}

void
nsTextFragment::ReleaseText()
{
  if (mState.mLength && m1b && mState.mInHeap) {
    nsMemory::Free(m2b); // m1b == m2b as far as nsMemory is concerned
  }

  m1b = nsnull;

  // Set mState.mIs2b, mState.mInHeap, and mState.mLength = 0 with mAllBits;
  mAllBits = 0;
}

nsTextFragment&
nsTextFragment::operator=(const nsTextFragment& aOther)
{
  ReleaseText();

  if (aOther.mState.mLength) {
    if (!aOther.mState.mInHeap) {
      m1b = aOther.m1b; // This will work even if aOther is using m2b
    }
    else {
      m2b = static_cast<PRUnichar*>
                       (nsMemory::Clone(aOther.m2b, aOther.mState.mLength *
                                    (aOther.mState.mIs2b ? sizeof(PRUnichar) : sizeof(char))));
    }

    if (m1b) {
      mAllBits = aOther.mAllBits;
    }
  }

  return *this;
}

void
nsTextFragment::SetTo(const PRUnichar* aBuffer, PRInt32 aLength)
{
  ReleaseText();

  if (aLength == 0) {
    return;
  }

  PRUnichar firstChar = *aBuffer;
  if (aLength == 1 && firstChar < 256) {
    m1b = sSingleCharSharedString + firstChar;
    mState.mInHeap = PR_FALSE;
    mState.mIs2b = PR_FALSE;
    mState.mLength = 1;

    return;
  }

  const PRUnichar *ucp = aBuffer;
  const PRUnichar *uend = aBuffer + aLength;

  // Check if we can use a shared string
  if (firstChar == ' ' || firstChar == '\n' || firstChar == '\t') {
    if (firstChar == ' ') {
      ++ucp;
    }

    const PRUnichar* start = ucp;
    while (ucp < uend && *ucp == '\n') {
      ++ucp;
    }
    const PRUnichar* endNewLine = ucp;

    PRUnichar space = ucp < uend && *ucp == '\t' ? '\t' : ' ';
    while (ucp < uend && *ucp == space) {
      ++ucp;
    }

    if (ucp == uend &&
        endNewLine - start <= TEXTFRAG_MAX_NEWLINES &&
        ucp - endNewLine <= TEXTFRAG_WHITE_AFTER_NEWLINE) {
      char** strings = space == ' ' ? sSpaceSharedString : sTabSharedString;
      m1b = strings[endNewLine - start];

      // If we didn't find a space in the beginning, skip it now.
      if (firstChar != ' ') {
        ++m1b;
      }

      mState.mInHeap = PR_FALSE;
      mState.mIs2b = PR_FALSE;
      mState.mLength = aLength;

      return;
    }
  }

  // We don't attempt to detect if large text nodes can be stored compactly,
  // because that wastes too much time.
  const PRInt32 LARGE_STRING_THRESHOLD = 10240; // 10KB
  PRBool need2 = aLength >= LARGE_STRING_THRESHOLD;
  if (!need2) {
    // See if we need to store the data in ucs2 or not
    while (ucp < uend) {
      PRUnichar ch = *ucp++;
      if (ch >= 256) {
        need2 = PR_TRUE;
        break;
      }
    }
  }

  if (need2) {
    // Use ucs2 storage because we have to
    m2b = (PRUnichar *)nsMemory::Clone(aBuffer,
                                       aLength * sizeof(PRUnichar));
    if (!m2b) {
      return;
    }
  } else {
    // Use 1 byte storage because we can
    char* buff = (char *)nsMemory::Alloc(aLength * sizeof(char));
    if (!buff) {
      return;
    }

    // Copy data
    // Use the same copying code we use elsewhere; it's likely to be
    // carefully tuned.
    LossyConvertEncoding<PRUnichar, char> converter(buff);
    copy_string(aBuffer, aBuffer+aLength, converter);
    m1b = buff;
  }

  // Setup our fields
  mState.mInHeap = PR_TRUE;
  mState.mIs2b = need2;
  mState.mLength = aLength;
}

void
nsTextFragment::CopyTo(PRUnichar *aDest, PRInt32 aOffset, PRInt32 aCount)
{
  NS_ASSERTION(aOffset >= 0, "Bad offset passed to nsTextFragment::CopyTo()!");
  NS_ASSERTION(aCount >= 0, "Bad count passed to nsTextFragment::CopyTo()!");

  if (aOffset < 0) {
    aOffset = 0;
  }

  if (PRUint32(aOffset + aCount) > GetLength()) {
    aCount = mState.mLength - aOffset;
  }

  if (aCount != 0) {
    if (mState.mIs2b) {
      memcpy(aDest, m2b + aOffset, sizeof(PRUnichar) * aCount);
    } else {
      const char *cp = m1b + aOffset;
      const char *end = cp + aCount;
      while (cp < end) {
        *aDest++ = (unsigned char)(*cp++);
      }
    }
  }
}

void
nsTextFragment::Append(const PRUnichar* aBuffer, PRUint32 aLength)
{
  // This is a common case because some callsites create a textnode
  // with a value by creating the node and then calling AppendData.
  if (mState.mLength == 0) {
    SetTo(aBuffer, aLength);

    return;
  }

  // Should we optimize for aData.Length() == 0?

  if (mState.mIs2b) {
    // Already a 2-byte string so the result will be too
    PRUnichar* buff = (PRUnichar*)nsMemory::Realloc(m2b, (mState.mLength + aLength) * sizeof(PRUnichar));
    if (!buff) {
      return;
    }

    memcpy(buff + mState.mLength, aBuffer, aLength * sizeof(PRUnichar));
    mState.mLength += aLength;
    m2b = buff;

    return;
  }

  // Current string is a 1-byte string, check if the new data fits in one byte too.

  const PRUnichar* ucp = aBuffer;
  const PRUnichar* uend = ucp + aLength;
  PRBool need2 = PR_FALSE;
  while (ucp < uend) {
    PRUnichar ch = *ucp++;
    if (ch >= 256) {
      need2 = PR_TRUE;
      break;
    }
  }

  if (need2) {
    // The old data was 1-byte, but the new is not so we have to expand it
    // all to 2-byte
    PRUnichar* buff = (PRUnichar*)nsMemory::Alloc((mState.mLength + aLength) *
                                                  sizeof(PRUnichar));
    if (!buff) {
      return;
    }

    // Copy data
    for (PRUint32 i = 0; i < mState.mLength; ++i) {
      buff[i] = (unsigned char)m1b[i];
    }

    memcpy(buff + mState.mLength, aBuffer, aLength * sizeof(PRUnichar));

    mState.mLength += aLength;
    mState.mIs2b = PR_TRUE;

    if (mState.mInHeap) {
      nsMemory::Free(m2b);
    }
    m2b = buff;

    mState.mInHeap = PR_TRUE;

    return;
  }

  // The new and the old data is all 1-byte
  char* buff;
  if (mState.mInHeap) {
    buff = (char*)nsMemory::Realloc(const_cast<char*>(m1b),
                                    (mState.mLength + aLength) * sizeof(char));
    if (!buff) {
      return;
    }
  }
  else {
    buff = (char*)nsMemory::Alloc((mState.mLength + aLength) * sizeof(char));
    if (!buff) {
      return;
    }

    memcpy(buff, m1b, mState.mLength);
    mState.mInHeap = PR_TRUE;
  }

  for (PRUint32 i = 0; i < aLength; ++i) {
    buff[mState.mLength + i] = (char)aBuffer[i];
  }

  m1b = buff;
  mState.mLength += aLength;

}

// To save time we only do this when we really want to know, not during
// every allocation
void
nsTextFragment::UpdateBidiFlag(const PRUnichar* aBuffer, PRUint32 aLength)
{
  if (mState.mIs2b && !mState.mIsBidi) {
    const PRUnichar* cp = aBuffer;
    const PRUnichar* end = cp + aLength;
    while (cp < end) {
      PRUnichar ch1 = *cp++;
      PRUint32 utf32Char = ch1;
      if (NS_IS_HIGH_SURROGATE(ch1) &&
          cp < end &&
          NS_IS_LOW_SURROGATE(*cp)) {
        PRUnichar ch2 = *cp++;
        utf32Char = SURROGATE_TO_UCS4(ch1, ch2);
      }
      if (UTF32_CHAR_IS_BIDI(utf32Char) || IS_BIDI_CONTROL_CHAR(utf32Char)) {
        mState.mIsBidi = PR_TRUE;
        break;
      }
    }
  }
}