• Skip to content
  • Skip to link menu
KDE 4.3 API Reference
  • KDE API Reference
  • kdelibs
  • Sitemap
  • Contact Us
 

KDECore

kstringhandler.cpp

Go to the documentation of this file.
00001 /* This file is part of the KDE libraries
00002    Copyright (C) 1999 Ian Zepp (icszepp@islc.net)
00003    Copyright (C) 2006 by Dominic Battre <dominic@battre.de>
00004    Copyright (C) 2006 by Martin Pool <mbp@canonical.com>
00005 
00006    This library is free software; you can redistribute it and/or
00007    modify it under the terms of the GNU Library General Public
00008    License as published by the Free Software Foundation; either
00009    version 2 of the License, or (at your option) any later version.
00010 
00011    This library is distributed in the hope that it will be useful,
00012    but WITHOUT ANY WARRANTY; without even the implied warranty of
00013    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014    Library General Public License for more details.
00015 
00016    You should have received a copy of the GNU Library General Public License
00017    along with this library; see the file COPYING.LIB.  If not, write to
00018    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00019    Boston, MA 02110-1301, USA.
00020 */
00021 
00022 #include "kstringhandler.h"
00023 
00024 #include <stdlib.h>     // random()
00025 
00026 #include <kglobal.h>
00027 
00028 #include <QtCore/QRegExp>            // for the word ranges
00029 #include <QtCore/QCharRef>
00030 #include <QtCore/QMutableStringListIterator>
00031 
00032 
00033 
00034 //
00035 // Capitalization routines
00036 //
00037 QString KStringHandler::capwords( const QString &text )
00038 {
00039     if ( text.isEmpty() ) {
00040         return text;
00041     }
00042 
00043     const QString strippedText = text.trimmed();
00044     const QStringList words = capwords( strippedText.split(' '));
00045 
00046 
00047     QString result = text;
00048     result.replace( strippedText, words.join( " " ) );
00049     return result;
00050 }
00051 
00052 QStringList KStringHandler::capwords( const QStringList &list )
00053 {
00054     QStringList tmp = list;
00055     for ( QStringList::Iterator it = tmp.begin(); it != tmp.end(); ++it ) {
00056         *it = ( *it )[ 0 ].toUpper() + ( *it ).mid( 1 );
00057     }
00058     return tmp;
00059 }
00060 
00061 
00062 QString KStringHandler::lsqueeze( const QString & str, int maxlen )
00063 {
00064   if (str.length() > maxlen) {
00065     int part = maxlen-3;
00066     return QString("..." + str.right(part));
00067   }
00068   else return str;
00069 }
00070 
00071 QString KStringHandler::csqueeze( const QString & str, int maxlen )
00072 {
00073   if (str.length() > maxlen && maxlen > 3) {
00074     int part = (maxlen-3)/2;
00075     return QString(str.left(part) + "..." + str.right(part));
00076   }
00077   else return str;
00078 }
00079 
00080 QString KStringHandler::rsqueeze( const QString & str, int maxlen )
00081 {
00082   if (str.length() > maxlen) {
00083     int part = maxlen-3;
00084     return QString(str.left(part) + "...");
00085   }
00086   else return str;
00087 }
00088 
00089 QStringList KStringHandler::perlSplit(const QString & sep, const QString & s, int max)
00090 {
00091   bool ignoreMax = 0 == max;
00092 
00093   QStringList l;
00094 
00095   int searchStart = 0;
00096 
00097   int tokenStart = s.indexOf(sep, searchStart);
00098 
00099   while (-1 != tokenStart && (ignoreMax || l.count() < max - 1))
00100   {
00101     if (!s.mid(searchStart, tokenStart - searchStart).isEmpty())
00102       l << s.mid(searchStart, tokenStart - searchStart);
00103 
00104     searchStart = tokenStart + sep.length();
00105     tokenStart = s.indexOf(sep, searchStart);
00106   }
00107 
00108   if (!s.mid(searchStart, s.length() - searchStart).isEmpty())
00109     l << s.mid(searchStart, s.length() - searchStart);
00110 
00111   return l;
00112 }
00113 
00114 QStringList KStringHandler::perlSplit(const QChar & sep, const QString & s, int max)
00115 {
00116   bool ignoreMax = 0 == max;
00117 
00118   QStringList l;
00119 
00120   int searchStart = 0;
00121 
00122   int tokenStart = s.indexOf(sep, searchStart);
00123 
00124   while (-1 != tokenStart && (ignoreMax || l.count() < max - 1))
00125   {
00126     if (!s.mid(searchStart, tokenStart - searchStart).isEmpty())
00127       l << s.mid(searchStart, tokenStart - searchStart);
00128 
00129     searchStart = tokenStart + 1;
00130     tokenStart = s.indexOf(sep, searchStart);
00131   }
00132 
00133   if (!s.mid(searchStart, s.length() - searchStart).isEmpty())
00134     l << s.mid(searchStart, s.length() - searchStart);
00135 
00136   return l;
00137 }
00138 
00139 QStringList KStringHandler::perlSplit(const QRegExp & sep, const QString & s, int max)
00140 {
00141   bool ignoreMax = 0 == max;
00142 
00143   QStringList l;
00144 
00145   int searchStart = 0;
00146   int tokenStart = sep.indexIn(s, searchStart);
00147   int len = sep.matchedLength();
00148 
00149   while (-1 != tokenStart && (ignoreMax || l.count() < max - 1))
00150   {
00151     if (!s.mid(searchStart, tokenStart - searchStart).isEmpty())
00152       l << s.mid(searchStart, tokenStart - searchStart);
00153 
00154     searchStart = tokenStart + len;
00155     tokenStart = sep.indexIn(s, searchStart);
00156     len = sep.matchedLength();
00157   }
00158 
00159   if (!s.mid(searchStart, s.length() - searchStart).isEmpty())
00160     l << s.mid(searchStart, s.length() - searchStart);
00161 
00162   return l;
00163 }
00164 
00165 QString KStringHandler::tagUrls( const QString& text )
00166 {
00167     /*static*/ QRegExp urlEx("(www\\.(?!\\.)|(fish|(f|ht)tp(|s))://)[\\d\\w\\./,:_~\\?=&;#@\\-\\+\\%\\$]+[\\d\\w/]");
00168 
00169     QString richText( text );
00170     int urlPos = 0, urlLen;
00171     while ((urlPos = urlEx.indexIn(richText, urlPos)) >= 0)
00172     {
00173         urlLen = urlEx.matchedLength();
00174         QString href = richText.mid( urlPos, urlLen );
00175         // Qt doesn't support (?<=pattern) so we do it here
00176         if((urlPos > 0) && richText[urlPos-1].isLetterOrNumber()){
00177             urlPos++;
00178             continue;
00179         }
00180         // Don't use QString::arg since %01, %20, etc could be in the string
00181         QString anchor = "<a href=\"" + href + "\">" + href + "</a>";
00182         richText.replace( urlPos, urlLen, anchor );
00183 
00184 
00185         urlPos += anchor.length();
00186     }
00187     return richText;
00188 }
00189 
00190 QString KStringHandler::obscure( const QString &str )
00191 {
00192   QString result;
00193   const QChar *unicode = str.unicode();
00194   for ( int i = 0; i < str.length(); ++i )
00195     // yes, no typo. can't encode ' ' or '!' because
00196     // they're the unicode BOM. stupid scrambling. stupid.
00197     result += ( unicode[ i ].unicode() <= 0x21 ) ? unicode[ i ] :
00198         QChar( 0x1001F - unicode[ i ].unicode() );
00199 
00200   return result;
00201 }
00202 
00203 
00204 bool KStringHandler::isUtf8( const char *buf )
00205 {
00206   int i, n;
00207   register unsigned char c;
00208   bool gotone = false;
00209 
00210   if (!buf)
00211     return true; // whatever, just don't crash
00212 
00213 #define F 0   /* character never appears in text */
00214 #define T 1   /* character appears in plain ASCII text */
00215 #define I 2   /* character appears in ISO-8859 text */
00216 #define X 3   /* character appears in non-ISO extended ASCII (Mac, IBM PC) */
00217 
00218   static const unsigned char text_chars[256] = {
00219         /*                  BEL BS HT LF    FF CR    */
00220         F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F,  /* 0x0X */
00221         /*                              ESC          */
00222         F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F,  /* 0x1X */
00223         T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x2X */
00224         T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x3X */
00225         T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x4X */
00226         T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x5X */
00227         T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x6X */
00228         T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F,  /* 0x7X */
00229         /*            NEL                            */
00230         X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X,  /* 0x8X */
00231         X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,  /* 0x9X */
00232         I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xaX */
00233         I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xbX */
00234         I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xcX */
00235         I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xdX */
00236         I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xeX */
00237         I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I   /* 0xfX */
00238   };
00239 
00240   /* *ulen = 0; */
00241   for (i = 0; (c = buf[i]); ++i) {
00242     if ((c & 0x80) == 0) {        /* 0xxxxxxx is plain ASCII */
00243       /*
00244        * Even if the whole file is valid UTF-8 sequences,
00245        * still reject it if it uses weird control characters.
00246        */
00247 
00248       if (text_chars[c] != T)
00249         return false;
00250 
00251     } else if ((c & 0x40) == 0) { /* 10xxxxxx never 1st byte */
00252       return false;
00253     } else {                           /* 11xxxxxx begins UTF-8 */
00254       int following;
00255 
00256     if ((c & 0x20) == 0) {             /* 110xxxxx */
00257       following = 1;
00258     } else if ((c & 0x10) == 0) {      /* 1110xxxx */
00259       following = 2;
00260     } else if ((c & 0x08) == 0) {      /* 11110xxx */
00261       following = 3;
00262     } else if ((c & 0x04) == 0) {      /* 111110xx */
00263       following = 4;
00264     } else if ((c & 0x02) == 0) {      /* 1111110x */
00265       following = 5;
00266     } else
00267       return false;
00268 
00269       for (n = 0; n < following; ++n) {
00270         i++;
00271         if (!(c = buf[i]))
00272           goto done;
00273 
00274         if ((c & 0x80) == 0 || (c & 0x40))
00275           return false;
00276       }
00277       gotone = true;
00278     }
00279   }
00280 done:
00281   return gotone;   /* don't claim it's UTF-8 if it's all 7-bit */
00282 }
00283 
00284 #undef F
00285 #undef T
00286 #undef I
00287 #undef X
00288 
00289 QString KStringHandler::from8Bit( const char *str )
00290 {
00291   if (!str)
00292     return QString();
00293   if (!*str) {
00294     static const QString &emptyString = KGlobal::staticQString("");
00295     return emptyString;
00296   }
00297   return KStringHandler::isUtf8( str ) ?
00298              QString::fromUtf8( str ) :
00299              QString::fromLocal8Bit( str );
00300 }
00301 
00302 int KStringHandler::naturalCompare(const QString &_a, const QString &_b, Qt::CaseSensitivity caseSensitivity)
00303 {
00304     // This method chops the input a and b into pieces of
00305     // digits and non-digits (a1.05 becomes a | 1 | . | 05)
00306     // and compares these pieces of a and b to each other
00307     // (first with first, second with second, ...).
00308     //
00309     // This is based on the natural sort order code code by Martin Pool
00310     // http://sourcefrog.net/projects/natsort/
00311     // Martin Pool agreed to license this under LGPL or GPL.
00312 
00313     // FIXME: Using toLower() to implement case insensitive comparison is
00314     // sub-optimal, but is needed because we compare strings with
00315     // localeAwareCompare(), which does not know about case sensitivity.
00316     // A task has been filled for this in Qt Task Tracker with ID 205990.
00317     // http://trolltech.com/developer/task-tracker/index_html?method=entry&id=205990
00318     QString a;
00319     QString b;
00320     if (caseSensitivity == Qt::CaseSensitive) {
00321         a = _a;
00322         b = _b;
00323     } else {
00324         a = _a.toLower();
00325         b = _b.toLower();
00326     }
00327 
00328     const QChar* currA = a.unicode(); // iterator over a
00329     const QChar* currB = b.unicode(); // iterator over b
00330 
00331     if (currA == currB) {
00332         return 0;
00333     }
00334 
00335     const QChar* begSeqA = currA; // beginning of a new character sequence of a
00336     const QChar* begSeqB = currB;
00337 
00338     while (!currA->isNull() && !currB->isNull()) {
00339         if (currA->unicode() == QChar::ObjectReplacementCharacter) {
00340             return 1;
00341         }
00342 
00343         if (currB->unicode() == QChar::ObjectReplacementCharacter) {
00344             return -1;
00345         }
00346 
00347         if (currA->unicode() == QChar::ReplacementCharacter) {
00348             return 1;
00349         }
00350 
00351         if (currB->unicode() == QChar::ReplacementCharacter) {
00352             return -1;
00353         }
00354 
00355         // find sequence of characters ending at the first non-character
00356         while (!currA->isNull() && !currA->isDigit()) {
00357             ++currA;
00358         }
00359 
00360         while (!currB->isNull() && !currB->isDigit()) {
00361             ++currB;
00362         }
00363 
00364         // compare these sequences
00365         const QStringRef& subA(a.midRef(begSeqA - a.unicode(), currA - begSeqA));
00366         const QStringRef& subB(b.midRef(begSeqB - b.unicode(), currB - begSeqB));
00367         const int cmp = QStringRef::localeAwareCompare(subA, subB);
00368         if (cmp != 0) {
00369             return cmp < 0 ? -1 : 1;
00370         }
00371 
00372         if (currA->isNull() || currB->isNull()) {
00373             break;
00374         }
00375 
00376         // now some digits follow...
00377         if ((*currA == '0') || (*currB == '0')) {
00378             // one digit-sequence starts with 0 -> assume we are in a fraction part
00379             // do left aligned comparison (numbers are considered left aligned)
00380             while (1) {
00381                 if (!currA->isDigit() && !currB->isDigit()) {
00382                     break;
00383                 } else if (!currA->isDigit()) {
00384                     return -1;
00385                 } else if (!currB->isDigit()) {
00386                     return + 1;
00387                 } else if (*currA < *currB) {
00388                     return -1;
00389                 } else if (*currA > *currB) {
00390                     return + 1;
00391                 }
00392                 ++currA;
00393                 ++currB;
00394             }
00395         } else {
00396             // No digit-sequence starts with 0 -> assume we are looking at some integer
00397             // do right aligned comparison.
00398             //
00399             // The longest run of digits wins. That aside, the greatest
00400             // value wins, but we can't know that it will until we've scanned
00401             // both numbers to know that they have the same magnitude.
00402 
00403             int weight = 0;
00404             while (1) {
00405                 if (!currA->isDigit() && !currB->isDigit()) {
00406                     if (weight != 0) {
00407                         return weight;
00408                     }
00409                     break;
00410                 } else if (!currA->isDigit()) {
00411                     return -1;
00412                 } else if (!currB->isDigit()) {
00413                     return + 1;
00414                 } else if ((*currA < *currB) && (weight == 0)) {
00415                     weight = -1;
00416                 } else if ((*currA > *currB) && (weight == 0)) {
00417                     weight = + 1;
00418                 }
00419                 ++currA;
00420                 ++currB;
00421             }
00422         }
00423 
00424         begSeqA = currA;
00425         begSeqB = currB;
00426     }
00427 
00428     if (currA->isNull() && currB->isNull()) {
00429         return 0;
00430     }
00431 
00432     return currA->isNull() ? -1 : + 1;
00433 }

KDECore

Skip menu "KDECore"
  • Main Page
  • Modules
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Namespace Members
  • Class Members
  • Related Pages

kdelibs

Skip menu "kdelibs"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • Kate
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • kio
  • KIOSlave
  • KJS
  •   KJS-API
  •   WTF
  • kjsembed
  • KNewStuff
  • KParts
  • KPty
  • Kross
  • KUtils
  • Nepomuk
  • Plasma
  • Solid
  • Sonnet
  • ThreadWeaver
Generated for kdelibs by doxygen 1.6.1
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal