Ai
1 Star 0 Fork 0

c0ding/pystring

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
pywstring.cpp 40.41 KB
一键复制 编辑 原始数据 按行查看 历史
c0ding 提交于 2024-07-25 10:02 +08:00 . pystring
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865
///////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2008-2010, Sony Pictures Imageworks Inc
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// Neither the name of the organization Sony Pictures Imageworks nor the
// names of its contributors
// may be used to endorse or promote products derived from this software
// without specific prior written permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS
// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER
// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
///////////////////////////////////////////////////////////////////////////////
#include "pywstring.h"
#include <algorithm>
#include <cctype>
#include <cstring>
#include <iostream>
#include <sstream>
#if defined(_WIN32) || defined(_WIN64) || defined(_WINDOWS) || defined(_MSC_VER)
#ifndef WINDOWS
#define WINDOWS
#endif
#endif
// This definition codes from configure.in in the python src.
// Strictly speaking this limits us to str sizes of 2**31.
// Should we wish to handle this limit, we could use an architecture
// specific #defines and read from ssize_t (unistd.h) if the header exists.
// But in the meantime, the use of int assures maximum arch compatibility.
// This must also equal the size used in the end = MAX_32BIT_INT default arg.
typedef size_t Py_ssize_t;
/* helper macro to fixup start/end slice values */
#define ADJUST_INDICES(start, end, len) \
if (end > len) \
end = len; \
else if (end < 0) { \
end += len; \
if (end < 0) \
end = 0; \
} \
if (start < 0) { \
start += len; \
if (start < 0) \
start = 0; \
}
namespace {
static inline int py_isspace(wchar_t c)
{
return c > 0 && ::iswspace(c);
}
static inline int py_islower(wchar_t c)
{
return c > 0 && ::iswlower(c);
}
static inline int py_isupper(wchar_t c)
{
return c > 0 && ::iswupper(c);
}
static inline int py_isdigit(wchar_t c)
{
return c > 0 && ::iswdigit(c);
}
static inline int py_isalnum(wchar_t c)
{
return c > 0 && ::iswalnum(c);
}
static inline int py_isalpha(wchar_t c)
{
return c > 0 && ::iswalpha(c);
}
//////////////////////////////////////////////////////////////////////////////////////////////
/// why doesn't the std::reverse work?
///
static void reverse_strings(std::vector< std::wstring > & result)
{
for (std::vector< std::wstring >::size_type i = 0; i < result.size() / 2; i++)
{
std::swap(result[i], result[result.size() - 1 - i]);
}
}
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
static void split_whitespace(const std::wstring & str, std::vector< std::wstring > & result, int maxsplit)
{
std::wstring::size_type i, j, len = str.size();
for (i = j = 0; i < len; )
{
while (i < len && py_isspace(str[i])) i++;
j = i;
while (i < len && !py_isspace(str[i])) i++;
if (j < i)
{
if (maxsplit-- <= 0) break;
result.push_back(str.substr(j, i - j));
while (i < len && py_isspace(str[i])) i++;
j = i;
}
}
if (j < len)
{
result.push_back(str.substr(j, len - j));
}
}
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
static void rsplit_whitespace(const std::wstring & str, std::vector< std::wstring > & result, int maxsplit)
{
std::wstring::size_type len = str.size();
std::wstring::size_type i, j;
for (i = j = len; i > 0; )
{
while (i > 0 && py_isspace(str[i - 1])) i--;
j = i;
while (i > 0 && !py_isspace(str[i - 1])) i--;
if (j > i)
{
if (maxsplit-- <= 0) break;
result.push_back(str.substr(i, j - i));
while (i > 0 && py_isspace(str[i - 1])) i--;
j = i;
}
}
if (j > 0)
{
result.push_back(str.substr(0, j));
}
//std::reverse( result, result.begin(), result.end() );
reverse_strings(result);
}
} //anonymous namespace
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
void pywstring::split(const std::wstring & str, std::vector< std::wstring > & result, const std::wstring & sep, int maxsplit)
{
result.clear();
if (maxsplit < 0) maxsplit = MAX_32BIT_INT;//result.max_size();
if (sep.size() == 0)
{
split_whitespace(str, result, maxsplit);
return;
}
std::wstring::size_type i, j, len = str.size(), n = sep.size();
i = j = 0;
while (i + n <= len)
{
if (str[i] == sep[0] && str.substr(i, n) == sep)
{
if (maxsplit-- <= 0) break;
result.push_back(str.substr(j, i - j));
i = j = i + n;
}
else
{
i++;
}
}
result.push_back(str.substr(j, len - j));
}
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
void pywstring::rsplit(const std::wstring & str, std::vector< std::wstring > & result, const std::wstring & sep, int maxsplit)
{
if (maxsplit < 0)
{
split(str, result, sep, maxsplit);
return;
}
result.clear();
if (sep.size() == 0)
{
rsplit_whitespace(str, result, maxsplit);
return;
}
Py_ssize_t i, j, len = (Py_ssize_t)str.size(), n = (Py_ssize_t)sep.size();
i = j = len;
while (i >= n)
{
if (str[i - 1] == sep[n - 1] && str.substr(i - n, n) == sep)
{
if (maxsplit-- <= 0) break;
result.push_back(str.substr(i, j - i));
i = j = i - n;
}
else
{
i--;
}
}
result.push_back(str.substr(0, j));
reverse_strings(result);
}
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
#define LEFTSTRIP 0
#define RIGHTSTRIP 1
#define BOTHSTRIP 2
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
static std::wstring do_strip(const std::wstring & str, int striptype, const std::wstring & chars)
{
Py_ssize_t len = (Py_ssize_t)str.size(), i, j, charslen = (Py_ssize_t)chars.size();
if (len == 0)
return str;
if (charslen == 0)
{
i = 0;
if (striptype != RIGHTSTRIP)
{
while (i < len && py_isspace(str[i]))
{
i++;
}
}
j = len;
if (striptype != LEFTSTRIP)
{
do
{
j--;
} while (j >= i && py_isspace(str[j]));
j++;
}
}
else
{
const wchar_t * sep = chars.c_str();
i = 0;
if (striptype != RIGHTSTRIP)
{
while (i < len && wmemchr(sep, str[i], charslen))
{
i++;
}
}
j = len;
if (striptype != LEFTSTRIP)
{
do
{
j--;
} while (j >= i && wmemchr(sep, str[j], charslen));
j++;
}
}
if (i == 0 && j == len)
{
return str;
}
else
{
return str.substr(i, j - i);
}
}
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
void pywstring::partition(const std::wstring & str, const std::wstring & sep, std::vector< std::wstring > & result)
{
result.resize(3);
int index = find(str, sep);
if (index < 0)
{
result[0] = str;
result[1] = L"";
result[2] = L"";
}
else
{
result[0] = str.substr(0, index);
result[1] = sep;
result[2] = str.substr(index + sep.size(), str.size());
}
}
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
void pywstring::rpartition(const std::wstring & str, const std::wstring & sep, std::vector< std::wstring > & result)
{
result.resize(3);
int index = rfind(str, sep);
if (index < 0)
{
result[0] = L"";
result[1] = L"";
result[2] = str;
}
else
{
result[0] = str.substr(0, index);
result[1] = sep;
result[2] = str.substr(index + sep.size(), str.size());
}
}
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
std::wstring pywstring::strip(const std::wstring & str, const std::wstring & chars)
{
return do_strip(str, BOTHSTRIP, chars);
}
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
std::wstring pywstring::lstrip(const std::wstring & str, const std::wstring & chars)
{
return do_strip(str, LEFTSTRIP, chars);
}
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
std::wstring pywstring::rstrip(const std::wstring & str, const std::wstring & chars)
{
return do_strip(str, RIGHTSTRIP, chars);
}
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
std::wstring pywstring::join(const std::wstring & str, const std::vector< std::wstring > & seq)
{
std::vector< std::wstring >::size_type seqlen = seq.size(), i;
if (seqlen == 0) return L"";
if (seqlen == 1) return seq[0];
std::wstring result(seq[0]);
for (i = 1; i < seqlen; ++i)
{
result += str + seq[i];
}
return result;
}
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
namespace
{
/* Matches the end (direction >= 0) or start (direction < 0) of self
* against substr, using the start and end arguments. Returns
* -1 on error, 0 if not found and 1 if found.
*/
static int _string_tailmatch(const std::wstring & self, const std::wstring & substr,
Py_ssize_t start, Py_ssize_t end,
int direction)
{
Py_ssize_t len = (Py_ssize_t)self.size();
Py_ssize_t slen = (Py_ssize_t)substr.size();
const wchar_t* sub = substr.c_str();
const wchar_t* str = self.c_str();
ADJUST_INDICES(start, end, len);
if (direction < 0) {
// startswith
if (start + slen > len)
return 0;
}
else {
// endswith
if (end - start < slen || start > len)
return 0;
if (end - slen > start)
start = end - slen;
}
if (end - start >= slen)
return (!std::wmemcmp(str + start, sub, slen));
return 0;
}
}
bool pywstring::endswith(const std::wstring & str, const std::wstring & suffix, int start, int end)
{
int result = _string_tailmatch(str, suffix,
(Py_ssize_t)start, (Py_ssize_t)end, +1);
//if (result == -1) // TODO: Error condition
return static_cast<bool>(result);
}
bool pywstring::startswith(const std::wstring & str, const std::wstring & prefix, int start, int end)
{
int result = _string_tailmatch(str, prefix,
(Py_ssize_t)start, (Py_ssize_t)end, -1);
//if (result == -1) // TODO: Error condition
return static_cast<bool>(result);
}
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
bool pywstring::isalnum(const std::wstring & str)
{
std::wstring::size_type len = str.size(), i;
if (len == 0) return false;
if (len == 1)
{
return py_isalnum(str[0]);
}
for (i = 0; i < len; ++i)
{
if (!py_isalnum(str[i])) return false;
}
return true;
}
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
bool pywstring::isalpha(const std::wstring & str)
{
std::wstring::size_type len = str.size(), i;
if (len == 0) return false;
if (len == 1) return py_isalpha((int)str[0]);
for (i = 0; i < len; ++i)
{
if (!py_isalpha((int)str[i])) return false;
}
return true;
}
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
bool pywstring::isdigit(const std::wstring & str)
{
std::wstring::size_type len = str.size(), i;
if (len == 0) return false;
if (len == 1) return py_isdigit(str[0]);
for (i = 0; i < len; ++i)
{
if (!py_isdigit(str[i])) return false;
}
return true;
}
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
bool pywstring::islower(const std::wstring & str)
{
std::wstring::size_type len = str.size(), i;
if (len == 0) return false;
if (len == 1) return py_islower(str[0]);
for (i = 0; i < len; ++i)
{
if (!py_islower(str[i])) return false;
}
return true;
}
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
bool pywstring::isspace(const std::wstring & str)
{
std::wstring::size_type len = str.size(), i;
if (len == 0) return false;
if (len == 1) return py_isspace(str[0]);
for (i = 0; i < len; ++i)
{
if (!py_isspace(str[i])) return false;
}
return true;
}
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
bool pywstring::istitle(const std::wstring & str)
{
std::wstring::size_type len = str.size(), i;
if (len == 0) return false;
if (len == 1) return py_isupper(str[0]);
bool cased = false, previous_is_cased = false;
for (i = 0; i < len; ++i)
{
if (py_isupper(str[i]))
{
if (previous_is_cased)
{
return false;
}
previous_is_cased = true;
cased = true;
}
else if (py_islower(str[i]))
{
if (!previous_is_cased)
{
return false;
}
previous_is_cased = true;
cased = true;
}
else
{
previous_is_cased = false;
}
}
return cased;
}
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
bool pywstring::isupper(const std::wstring & str)
{
std::wstring::size_type len = str.size(), i;
if (len == 0) return false;
if (len == 1) return py_isupper(str[0]);
for (i = 0; i < len; ++i)
{
if (!py_isupper(str[i])) return false;
}
return true;
}
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
std::wstring pywstring::capitalize(const std::wstring & str)
{
std::wstring s(str);
std::wstring::size_type len = s.size(), i;
if (len > 0)
{
if (py_islower(s[0])) s[0] = (wchar_t) ::towupper(s[0]);
}
for (i = 1; i < len; ++i)
{
if (py_isupper(s[i])) s[i] = (wchar_t) ::towlower(s[i]);
}
return s;
}
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
std::wstring pywstring::lower(const std::wstring & str)
{
std::wstring s(str);
std::wstring::size_type len = s.size(), i;
for (i = 0; i < len; ++i)
{
if (py_isupper(s[i])) s[i] = (wchar_t) ::towlower(s[i]);
}
return s;
}
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
std::wstring pywstring::upper(const std::wstring & str)
{
std::wstring s(str);
std::wstring::size_type len = s.size(), i;
for (i = 0; i < len; ++i)
{
if (py_islower(s[i])) s[i] = (wchar_t) ::towupper(s[i]);
}
return s;
}
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
std::wstring pywstring::swapcase(const std::wstring & str)
{
std::wstring s(str);
std::wstring::size_type len = s.size(), i;
for (i = 0; i < len; ++i)
{
if (py_islower(s[i])) s[i] = (wchar_t) ::towupper(s[i]);
else if (py_isupper(s[i])) s[i] = (wchar_t) ::towlower(s[i]);
}
return s;
}
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
std::wstring pywstring::title(const std::wstring & str)
{
std::wstring s(str);
std::wstring::size_type len = s.size(), i;
bool previous_is_cased = false;
for (i = 0; i < len; ++i)
{
int c = s[i];
if (py_islower(c))
{
if (!previous_is_cased)
{
s[i] = (wchar_t) ::towupper(c);
}
previous_is_cased = true;
}
else if (py_isupper(c))
{
if (previous_is_cased)
{
s[i] = (wchar_t) ::towlower(c);
}
previous_is_cased = true;
}
else
{
previous_is_cased = false;
}
}
return s;
}
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
std::wstring pywstring::translate(const std::wstring & str, const std::wstring & table, const std::wstring & deletechars)
{
std::wstring s;
std::wstring::size_type len = str.size(), dellen = deletechars.size();
if (table.size() != 256)
{
// TODO : raise exception instead
return str;
}
//if nothing is deleted, use faster code
if (dellen == 0)
{
s = str;
for (std::wstring::size_type i = 0; i < len; ++i)
{
s[i] = table[s[i]];
}
return s;
}
int trans_table[256];
for (int i = 0; i < 256; i++)
{
trans_table[i] = table[i];
}
for (std::wstring::size_type i = 0; i < dellen; i++)
{
trans_table[(int)deletechars[i]] = -1;
}
for (std::wstring::size_type i = 0; i < len; ++i)
{
if (trans_table[(int)str[i]] != -1)
{
s += table[str[i]];
}
}
return s;
}
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
std::wstring pywstring::zfill(const std::wstring & str, int width, wchar_t fillChar/* = '0'*/)
{
int len = (int)str.size();
if (len >= width)
{
return str;
}
std::wstring s(str);
int fill = width - len;
s = std::wstring(fill, fillChar) + s;
if (s[fill] == '+' || s[fill] == '-')
{
s[0] = s[fill];
s[fill] = fillChar;
}
return s;
}
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
std::wstring pywstring::ljust(const std::wstring & str, int width)
{
std::wstring::size_type len = str.size();
if (((int)len) >= width) return str;
return str + std::wstring(width - len, ' ');
}
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
std::wstring pywstring::rjust(const std::wstring & str, int width)
{
std::wstring::size_type len = str.size();
if (((int)len) >= width) return str;
return std::wstring(width - len, ' ') + str;
}
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
std::wstring pywstring::center(const std::wstring & str, int width)
{
int len = (int)str.size();
int marg, left;
if (len >= width) return str;
marg = width - len;
left = marg / 2 + (marg & width & 1);
return std::wstring(left, ' ') + str + std::wstring(marg - left, ' ');
}
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
std::wstring pywstring::slice(const std::wstring & str, int start, int end)
{
ADJUST_INDICES(start, end, (int)str.size());
if (start >= end) return L"";
if (str.size() <= start) return L"";
return str.substr(start, end - start);
}
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
std::wstring pywstring::alignment(const std::wstring& str, int maxLen, int align /*= 0*/, wchar_t fillChar /*= ' ' */)
{
int fillLen = maxLen - str.length();
if (fillLen < 1)
return str;
if (align == 0)//left alignment
{
return str + std::wstring(fillLen, fillChar);
}
else if (align == 1)//center alignment
{
std::wstring sRet;
int left = (int)fillLen / (int)2;
if (left > 0)
sRet += std::wstring(left, fillChar);
sRet += str;
int right = fillLen - left;
if (right > 0)
sRet += std::wstring(right, fillChar);
return sRet;
}
return str;
}
bool pywstring::iscempty(const std::wstring& str)
{
return (length(str) == 0);
}
bool pywstring::equal(const std::wstring& str1, const std::wstring& str2, bool ignoreCase/* = false*/)
{
if (ignoreCase)
return _wcsicmp(str1.c_str(), str2.c_str()) == 0;
else
return wcscmp(str1.c_str(), str2.c_str()) == 0;
}
std::wstring pywstring::concat(const std::wstring& left, const std::wstring& right)
{
return std::wstring(left.c_str()) + std::wstring(right.c_str());
}
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
int pywstring::find(const std::wstring & str, const std::wstring & sub, int start, int end)
{
ADJUST_INDICES(start, end, (int)str.size());
std::wstring::size_type result = str.find(sub, start);
// If we cannot find the string, or if the end-point of our found substring is past
// the allowed end limit, return that it can't be found.
if (result == std::wstring::npos ||
(result + sub.size() > (std::wstring::size_type)end))
{
return -1;
}
return (int)result;
}
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
int pywstring::index(const std::wstring & str, const std::wstring & sub, int start, int end)
{
return find(str, sub, start, end);
}
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
int pywstring::rfind(const std::wstring & str, const std::wstring & sub, int start, int end)
{
ADJUST_INDICES(start, end, (int)str.size());
std::wstring::size_type result = str.rfind(sub, end);
if (result == std::wstring::npos ||
result < (std::wstring::size_type)start ||
(result + sub.size() > (std::wstring::size_type)end))
return -1;
return (int)result;
}
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
int pywstring::rindex(const std::wstring & str, const std::wstring & sub, int start, int end)
{
return rfind(str, sub, start, end);
}
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
std::wstring pywstring::expandtabs(const std::wstring & str, int tabsize)
{
std::wstring s(str);
std::wstring::size_type len = str.size(), i = 0;
int offset = 0;
int j = 0;
for (i = 0; i < len; ++i)
{
if (str[i] == '\t')
{
if (tabsize > 0)
{
int fillsize = tabsize - (j % tabsize);
j += fillsize;
s.replace(i + offset, 1, std::wstring(fillsize, ' '));
offset += fillsize - 1;
}
else
{
s.replace(i + offset, 1, L"");
offset -= 1;
}
}
else
{
j++;
if (str[i] == '\n' || str[i] == '\r')
{
j = 0;
}
}
}
return s;
}
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
int pywstring::count(const std::wstring & str, const std::wstring & substr, int start, int end)
{
int nummatches = 0;
int cursor = start;
while (1)
{
cursor = find(str, substr, cursor, end);
if (cursor < 0) break;
cursor += (int)substr.size();
nummatches += 1;
}
return nummatches;
}
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
std::wstring pywstring::replace(const std::wstring & str, const std::wstring & oldstr, const std::wstring & newstr, int count)
{
int sofar = 0;
int cursor = 0;
std::wstring s(str);
std::wstring::size_type oldlen = oldstr.size(), newlen = newstr.size();
cursor = find(s, oldstr, cursor);
while (cursor != -1 && cursor <= (int)s.size())
{
if (count > -1 && sofar >= count)
{
break;
}
s.replace(cursor, oldlen, newstr);
cursor += (int)newlen;
if (oldlen != 0)
{
cursor = find(s, oldstr, cursor);
}
else
{
++cursor;
}
++sofar;
}
return s;
}
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
void pywstring::splitlines(const std::wstring & str, std::vector< std::wstring > & result, bool keepends)
{
result.clear();
std::wstring::size_type len = str.size(), i, j, eol;
for (i = j = 0; i < len; )
{
while (i < len && str[i] != '\n' && str[i] != '\r') i++;
eol = i;
if (i < len)
{
if (str[i] == '\r' && i + 1 < len && str[i + 1] == '\n')
{
i += 2;
}
else
{
i++;
}
if (keepends)
eol = i;
}
result.push_back(str.substr(j, eol - j));
j = i;
}
if (j < len)
{
result.push_back(str.substr(j, len - j));
}
}
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
std::wstring pywstring::mul(const std::wstring & str, int n)
{
// Early exits
if (n <= 0) return L"";
if (n == 1) return str;
std::wostringstream os;
for (int i = 0; i < n; ++i)
{
os << str;
}
return os.str();
}
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
/// These functions are C++ ports of the python2.6 versions of os.path,
/// and come from genericpath.py, ntpath.py, posixpath.py
/// Split a pathname into drive and path specifiers.
/// Returns drivespec, pathspec. Either part may be empty.
void os_pathw::splitdrive_nt(std::wstring & drivespec, std::wstring & pathspec,
const std::wstring & p)
{
if (pywstring::slice(p, 1, 2) == L":")
{
std::wstring path = p; // In case drivespec == p
drivespec = pywstring::slice(path, 0, 2);
pathspec = pywstring::slice(path, 2);
}
else
{
drivespec = L"";
pathspec = p;
}
}
// On Posix, drive is always empty
void os_pathw::splitdrive_posix(std::wstring & drivespec, std::wstring & pathspec,
const std::wstring & path)
{
drivespec = L"";
pathspec = path;
}
void os_pathw::splitdrive(std::wstring & drivespec, std::wstring & pathspec,
const std::wstring & path)
{
#ifdef WINDOWS
return splitdrive_nt(drivespec, pathspec, path);
#else
return splitdrive_posix(drivespec, pathspec, path);
#endif
}
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
// Test whether a path is absolute
// In windows, if the character to the right of the colon
// is a forward or backslash it's absolute.
bool os_pathw::isabs_nt(const std::wstring & path)
{
std::wstring drivespec, pathspec;
splitdrive_nt(drivespec, pathspec, path);
if (pathspec.empty()) return false;
return ((pathspec[0] == '/') || (pathspec[0] == '\\'));
}
bool os_pathw::isabs_posix(const std::wstring & s)
{
return pywstring::startswith(s, L"/");
}
bool os_pathw::isabs(const std::wstring & path)
{
#ifdef WINDOWS
return isabs_nt(path);
#else
return isabs_posix(path);
#endif
}
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
std::wstring os_pathw::abspath_nt(const std::wstring & path, const std::wstring & cwd)
{
std::wstring p = path;
if (!isabs_nt(p)) p = join_nt(cwd, p);
return normpath_nt(p);
}
std::wstring os_pathw::abspath_posix(const std::wstring & path, const std::wstring & cwd)
{
std::wstring p = path;
if (!isabs_posix(p)) p = join_posix(cwd, p);
return normpath_posix(p);
}
std::wstring os_pathw::abspath(const std::wstring & path, const std::wstring & cwd)
{
#ifdef WINDOWS
return abspath_nt(path, cwd);
#else
return abspath_posix(path, cwd);
#endif
}
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
std::wstring os_pathw::join_nt(const std::vector< std::wstring > & paths)
{
if (paths.empty()) return L"";
if (paths.size() == 1) return paths[0];
std::wstring path = paths[0].c_str();
for (unsigned int i = 1; i < paths.size(); ++i)
{
std::wstring b = paths[i];
bool b_nts = false;
if (path.empty())
{
b_nts = true;
}
else if (isabs_nt(b))
{
// This probably wipes out path so far. However, it's more
// complicated if path begins with a drive letter:
// 1. join('c:', '/a') == 'c:/a'
// 2. join('c:/', '/a') == 'c:/a'
// But
// 3. join('c:/a', '/b') == '/b'
// 4. join('c:', 'd:/') = 'd:/'
// 5. join('c:/', 'd:/') = 'd:/'
if ((pywstring::slice(path, 1, 2) != L":") ||
(pywstring::slice(b, 1, 2) == L":"))
{
// Path doesnt start with a drive letter
b_nts = true;
}
// Else path has a drive letter, and b doesn't but is absolute.
else if ((path.size() > 3) ||
((path.size() == 3) && !pywstring::endswith(path, L"/") && !pywstring::endswith(path, L"\\")))
{
b_nts = true;
}
}
if (b_nts)
{
path = b;
}
else
{
// Join, and ensure there's a separator.
// assert len(path) > 0
if (pywstring::endswith(path, L"/") || pywstring::endswith(path, L"\\"))
{
if (pywstring::startswith(b, L"/") || pywstring::startswith(b, L"\\"))
{
path += pywstring::slice(b, 1);
}
else
{
path += b;
}
}
else if (pywstring::endswith(path, L":"))
{
path += L"\\" + b;
}
else if (!b.empty())
{
if (pywstring::startswith(b, L"/") || pywstring::startswith(b, L"\\"))
{
path += b;
}
else
{
path += L"\\" + b;
}
}
else
{
// path is not empty and does not end with a backslash,
// but b is empty; since, e.g., split('a/') produces
// ('a', ''), it's best if join() adds a backslash in
// this case.
path += L"\\";
}
}
}
return path;
}
// Join two or more pathname components, inserting "\\" as needed.
std::wstring os_pathw::join_nt(const std::wstring & a, const std::wstring & b)
{
std::vector< std::wstring > paths(2);
paths[0] = a;
paths[1] = b;
return join_nt(paths);
}
// Join pathnames.
// If any component is an absolute path, all previous path components
// will be discarded.
// Ignore the previous parts if a part is absolute.
// Insert a '/' unless the first part is empty or already ends in '/'.
std::wstring os_pathw::join_posix(const std::vector< std::wstring > & paths)
{
if (paths.empty()) return L"";
if (paths.size() == 1) return paths[0];
std::wstring path = paths[0].c_str();
for (unsigned int i = 1; i < paths.size(); ++i)
{
std::wstring b = paths[i];
if (pywstring::startswith(b, L"/"))
{
path = b;
}
else if (path.empty() || pywstring::endswith(path, L"/"))
{
path += b;
}
else
{
path += L"/" + b;
}
}
return path;
}
std::wstring os_pathw::join_posix(const std::wstring & a, const std::wstring & b)
{
std::vector< std::wstring > paths(2);
paths[0] = a;
paths[1] = b;
return join_posix(paths);
}
std::wstring os_pathw::join(const std::wstring & path1, const std::wstring & path2)
{
#ifdef WINDOWS
return join_nt(path1, path2);
#else
return join_posix(path1, path2);
#endif
}
std::wstring os_pathw::join(const std::vector< std::wstring > & paths)
{
#ifdef WINDOWS
return join_nt(paths);
#else
return join_posix(paths);
#endif
}
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
// Split a pathname.
// Return (head, tail) where tail is everything after the final slash.
// Either part may be empty
void os_pathw::split_nt(std::wstring & head, std::wstring & tail, const std::wstring & path)
{
#if 0
std::wstring d, p;
splitdrive_nt(d, p, path);
// set i to index beyond p's last slash
int i = (int)p.size();
while (i > 0 && (p[i - 1] != '\\') && (p[i - 1] != '/'))
{
i = i - 1;
}
head = pywstring::slice(p, 0, i);
tail = pywstring::slice(p, i); // now tail has no slashes
// remove trailing slashes from head, unless it's all slashes
std::wstring head2 = head;
while (!head2.empty() && ((pywstring::slice(head2, -1) == L"/") ||
(pywstring::slice(head2, -1) == L"\\")))
{
head2 = pywstring::slice(head2, 0, -1);
}
if (!head2.empty()) head = head2;
head = d + head;
#else
int nLen = (int)pywstring::length(path);
while (nLen > 0 && (path[nLen - 1] == '\\' || path[nLen - 1] == '/'))
{
--nLen;
}
if (nLen <= 0)
{
head = L"";
tail = L"";
return;
}
size_t idx1 = path.rfind('\\', nLen - 1);
if (idx1 == std::string::npos)
{
idx1 = path.rfind('/', nLen - 1);
}
else
{
size_t idx2 = path.rfind('/', nLen - 1);
if (idx2 != std::string::npos && idx2 > idx1)
{
idx1 = idx2;
}
}
if (idx1 == std::string::npos)
{
if (path.find(':') == std::string::npos)
{
head = L"";
tail = path.substr(0, nLen);
}
else
{
head = path.substr(0, nLen);
tail = L"";
}
return;
}
head = path.substr(0, idx1);
tail = path.substr(idx1 + 1, nLen - (idx1 + 1));
#endif
}
// Split a path in head (everything up to the last '/') and tail (the
// rest). If the path ends in '/', tail will be empty. If there is no
// '/' in the path, head will be empty.
// Trailing '/'es are stripped from head unless it is the root.
void os_pathw::split_posix(std::wstring & head, std::wstring & tail, const std::wstring & path)
{
#if 0
int i = pywstring::rfind(p, L"/") + 1;
head = pywstring::slice(p, 0, i);
tail = pywstring::slice(p, i);
if (!head.empty() && (head != pywstring::mul(L"/", (int)head.size())))
{
head = pywstring::rstrip(head, L"/");
}
#else
int nLen = (int)pywstring::length(path);
while (nLen > 0 && path[nLen - 1] == '/')
{
--nLen;
}
if (nLen <= 0)
{
head = L"";
tail = L"";
return;
}
size_t idx1 = path.rfind('/', nLen - 1);
if (idx1 == std::string::npos)
{
head = L"";
tail = path.substr(0, nLen);
return;
}
head = path.substr(0, idx1);
tail = path.substr(idx1 + 1, nLen - (idx1 + 1));
#endif
}
void os_pathw::split(std::wstring & head, std::wstring & tail, const std::wstring & path)
{
#ifdef WINDOWS
return split_nt(head, tail, path);
#else
return split_posix(head, tail, path);
#endif
}
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
std::wstring os_pathw::basename_nt(const std::wstring & path)
{
std::wstring head, tail;
split_nt(head, tail, path);
return tail;
}
std::wstring os_pathw::basename_posix(const std::wstring & path)
{
std::wstring head, tail;
split_posix(head, tail, path);
return tail;
}
std::wstring os_pathw::basename_no_ext(const std::wstring & path)
{
if (pywstring::endswith(path, L"\\") || pywstring::endswith(path, L"/"))
return L"";
std::wstring bn = basename(path);
size_t idx = bn.find_last_of('.');
if (idx != std::wstring::npos)
return bn.substr(0, idx);
else
return bn;
}
std::wstring os_pathw::extension(const std::wstring & path)
{
if (pywstring::endswith(path, L"\\") || pywstring::endswith(path, L"/"))
return L"";
size_t idx = path.find_last_of('.');
if (idx == std::wstring::npos)
return L"";
std::wstring ext = path.substr(idx + 1);
if (ext.find('/') != std::wstring::npos || ext.find('\\') != std::wstring::npos)
return L"";
return ext;
}
std::wstring os_pathw::basename(const std::wstring & path)
{
#ifdef WINDOWS
return basename_nt(path);
#else
return basename_posix(path);
#endif
}
std::wstring os_pathw::dirname_nt(const std::wstring & path)
{
std::wstring head, tail;
split_nt(head, tail, path);
return head;
}
std::wstring os_pathw::dirname_posix(const std::wstring & path)
{
std::wstring head, tail;
split_posix(head, tail, path);
return head;
}
std::wstring os_pathw::dirname(const std::wstring & path)
{
#ifdef WINDOWS
return dirname_nt(path);
#else
return dirname_posix(path);
#endif
}
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
// Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A\B.
std::wstring os_pathw::normpath_nt(const std::wstring & p)
{
std::wstring path = p;
path = pywstring::replace(path, L"/", L"\\");
std::wstring prefix;
splitdrive_nt(prefix, path, path);
// We need to be careful here. If the prefix is empty, and the path starts
// with a backslash, it could either be an absolute path on the current
// drive (\dir1\dir2\file) or a UNC filename (\\server\mount\dir1\file). It
// is therefore imperative NOT to collapse multiple backslashes blindly in
// that case.
// The code below preserves multiple backslashes when there is no drive
// letter. This means that the invalid filename \\\a\b is preserved
// unchanged, where a\\\b is normalised to a\b. It's not clear that there
// is any better behaviour for such edge cases.
if (prefix.empty())
{
// No drive letter - preserve initial backslashes
while (pywstring::slice(path, 0, 1) == L"\\")
{
prefix = prefix + L"\\";
path = pywstring::slice(path, 1);
}
}
else
{
// We have a drive letter - collapse initial backslashes
if (pywstring::startswith(path, L"\\"))
{
prefix = prefix + L"\\";
path = pywstring::lstrip(path, L"\\");
}
}
std::vector<std::wstring> comps;
pywstring::split(path, comps, L"\\");
int i = 0;
while (i < (int)comps.size())
{
if (comps[i].empty() || comps[i] == L".")
{
comps.erase(comps.begin() + i);
}
else if (comps[i] == L"..")
{
if (i > 0 && comps[i - 1] != L"..")
{
comps.erase(comps.begin() + i - 1, comps.begin() + i + 1);
i -= 1;
}
else if (i == 0 && pywstring::endswith(prefix, L"\\"))
{
comps.erase(comps.begin() + i);
}
else
{
i += 1;
}
}
else
{
i += 1;
}
}
// If the path is now empty, substitute '.'
if (prefix.empty() && comps.empty())
{
comps.push_back(L".");
}
return prefix + pywstring::join(L"\\", comps);
}
// Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A/B.
// It should be understood that this may change the meaning of the path
// if it contains symbolic links!
// Normalize path, eliminating double slashes, etc.
std::wstring os_pathw::normpath_posix(const std::wstring & p)
{
if (p.empty()) return L".";
std::wstring path = p;
int initial_slashes = pywstring::startswith(path, L"/") ? 1 : 0;
// POSIX allows one or two initial slashes, but treats three or more
// as single slash.
if (initial_slashes && pywstring::startswith(path, L"//")
&& !pywstring::startswith(path, L"///"))
initial_slashes = 2;
std::vector<std::wstring> comps, new_comps;
pywstring::split(path, comps, L"/");
for (unsigned int i = 0; i < comps.size(); ++i)
{
std::wstring comp = comps[i];
if (comp.empty() || comp == L".")
continue;
if ((comp != L"..") || ((initial_slashes == 0) && new_comps.empty()) ||
(!new_comps.empty() && new_comps[new_comps.size() - 1] == L".."))
{
new_comps.push_back(comp);
}
else if (!new_comps.empty())
{
new_comps.pop_back();
}
}
path = pywstring::join(L"/", new_comps);
if (initial_slashes > 0)
path = pywstring::mul(L"/", initial_slashes) + path;
if (path.empty()) return L".";
return path;
}
bool os_pathw::equal_path(const std::wstring & path1, const std::wstring & path2)
{
#ifdef WINDOWS
return equal_path_nt(path1, path2);
#else
return equal_path_posix(path1, path2);
#endif
}
bool os_pathw::equal_path_nt(const std::wstring & path1, const std::wstring & path2)
{
std::wstring _path1 = normpath(path1);
std::wstring _path2 = normpath(path2);
if (pywstring::endswith(_path1, L"\\"))
_path1[_path1.size()-1] = '\0';
if (pywstring::endswith(_path2, L"\\"))
_path2[_path2.size()-1] = '\0';
_path1 = pywstring::lower(_path1);
_path2 = pywstring::lower(_path2);
return pywstring::equal(_path1, _path2);
}
bool os_pathw::equal_path_posix(const std::wstring & path1, const std::wstring & path2)
{
std::wstring _path1 = normpath(path1);
std::wstring _path2 = normpath(path2);
if (pywstring::endswith(_path1, L"/"))
_path1[_path1.size()-1] = '\0';
if (pywstring::endswith(_path2, L"/"))
_path2[_path2.size()-1] = '\0';
return pywstring::equal(_path1, _path2);
}
std::wstring os_pathw::normpath(const std::wstring & path)
{
#ifdef WINDOWS
return normpath_nt(path);
#else
return normpath_posix(path);
#endif
}
//////////////////////////////////////////////////////////////////////////////////////////////
///
///
// Split the extension from a pathname.
// Extension is everything from the last dot to the end, ignoring
// leading dots. Returns "(root, ext)"; ext may be empty.
// It is always true that root + ext == p
static void splitext_generic(std::wstring & root, std::wstring & ext,
const std::wstring & p,
const std::wstring & sep,
const std::wstring & altsep,
const std::wstring & extsep)
{
int sepIndex = pywstring::rfind(p, sep);
if (!altsep.empty())
{
int altsepIndex = pywstring::rfind(p, altsep);
sepIndex = std::max<>(sepIndex, altsepIndex);
}
int dotIndex = pywstring::rfind(p, extsep);
if (dotIndex > sepIndex)
{
// Skip all leading dots
int filenameIndex = sepIndex + 1;
while (filenameIndex < dotIndex)
{
if (pywstring::slice(p, filenameIndex) != extsep)
{
root = pywstring::slice(p, 0, dotIndex);
ext = pywstring::slice(p, dotIndex);
return;
}
filenameIndex += 1;
}
}
root = p;
ext = L"";
}
void os_pathw::splitext_nt(std::wstring & root, std::wstring & ext, const std::wstring & path)
{
return splitext_generic(root, ext, path,
L"\\", L"/", L".");
}
void os_pathw::splitext_posix(std::wstring & root, std::wstring & ext, const std::wstring & path)
{
return splitext_generic(root, ext, path,
L"/", L"", L".");
}
void os_pathw::splitext(std::wstring & root, std::wstring & ext, const std::wstring & path)
{
#ifdef WINDOWS
return splitext_nt(root, ext, path);
#else
return splitext_posix(root, ext, path);
#endif
}
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
C++
1
https://gitee.com/c0ding/pystring.git
git@gitee.com:c0ding/pystring.git
c0ding
pystring
pystring
master

搜索帮助