// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* * * Copyright (C) 2001-2012, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* * file name: ustr_wcs.cpp * encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * * created on: 2004sep07 * created by: Markus W. Scherer * * u_strToWCS() and u_strFromWCS() functions * moved here from ustrtrns.c for better modularization. */ #include "unicode/utypes.h" #include "unicode/ustring.h" #include "cstring.h" #include "cwchar.h" #include "cmemory.h" #include "ustr_imp.h" #include "ustr_cnv.h" #if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION #define _STACK_BUFFER_CAPACITY 1000 #define _BUFFER_CAPACITY_MULTIPLIER 2 #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) // TODO: We should use CharString for char buffers and UnicodeString for UChar buffers. // Then we could change this to work only with wchar_t buffers. static inline UBool u_growAnyBufferFromStatic(void *context, void **pBuffer, int32_t *pCapacity, int32_t reqCapacity, int32_t length, int32_t size) { // Use char* not void* to avoid the compiler's strict-aliasing assumptions // and related warnings. char *newBuffer=(char *)uprv_malloc(reqCapacity*size); if(newBuffer!=NULL) { if(length>0) { uprv_memcpy(newBuffer, *pBuffer, (size_t)length*size); } *pCapacity=reqCapacity; } else { *pCapacity=0; } /* release the old pBuffer if it was not statically allocated */ if(*pBuffer!=(char *)context) { uprv_free(*pBuffer); } *pBuffer=newBuffer; return (UBool)(newBuffer!=NULL); } /* helper function */ static wchar_t* _strToWCS(wchar_t *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UErrorCode *pErrorCode){ char stackBuffer [_STACK_BUFFER_CAPACITY]; char* tempBuf = stackBuffer; int32_t tempBufCapacity = _STACK_BUFFER_CAPACITY; char* tempBufLimit = stackBuffer + tempBufCapacity; UConverter* conv = NULL; char* saveBuf = tempBuf; wchar_t* intTarget=NULL; int32_t intTargetCapacity=0; int count=0,retVal=0; const UChar *pSrcLimit =NULL; const UChar *pSrc = src; conv = u_getDefaultConverter(pErrorCode); if(U_FAILURE(*pErrorCode)){ return NULL; } if(srcLength == -1){ srcLength = u_strlen(pSrc); } pSrcLimit = pSrc + srcLength; for(;;) { /* reset the error state */ *pErrorCode = U_ZERO_ERROR; /* convert to chars using default converter */ ucnv_fromUnicode(conv,&tempBuf,tempBufLimit,&pSrc,pSrcLimit,NULL,(UBool)(pSrc==pSrcLimit),pErrorCode); count =(tempBuf - saveBuf); /* This should rarely occur */ if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){ tempBuf = saveBuf; /* we don't have enough room on the stack grow the buffer */ int32_t newCapacity = 2 * srcLength; if(newCapacity <= tempBufCapacity) { newCapacity = _BUFFER_CAPACITY_MULTIPLIER * tempBufCapacity; } if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity, newCapacity, count, 1)) { goto cleanup; } saveBuf = tempBuf; tempBufLimit = tempBuf + tempBufCapacity; tempBuf = tempBuf + count; } else { break; } } if(U_FAILURE(*pErrorCode)){ goto cleanup; } /* done with conversion null terminate the char buffer */ if(count>=tempBufCapacity){ tempBuf = saveBuf; /* we don't have enough room on the stack grow the buffer */ if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity, count+1, count, 1)) { goto cleanup; } saveBuf = tempBuf; } saveBuf[count]=0; /* allocate more space than required * here we assume that every char requires * no more than 2 wchar_ts */ intTargetCapacity = (count * _BUFFER_CAPACITY_MULTIPLIER + 1) /*for null termination */; intTarget = (wchar_t*)uprv_malloc( intTargetCapacity * sizeof(wchar_t) ); if(intTarget){ int32_t nulLen = 0; int32_t remaining = intTargetCapacity; wchar_t* pIntTarget=intTarget; tempBuf = saveBuf; /* now convert the mbs to wcs */ for(;;){ /* we can call the system API since we are sure that * there is atleast 1 null in the input */ retVal = uprv_mbstowcs(pIntTarget,(tempBuf+nulLen),remaining); if(retVal==-1){ *pErrorCode = U_INVALID_CHAR_FOUND; break; }else if(retVal== remaining){/* should never occur */ int numWritten = (pIntTarget-intTarget); u_growAnyBufferFromStatic(NULL,(void**) &intTarget, &intTargetCapacity, intTargetCapacity * _BUFFER_CAPACITY_MULTIPLIER, numWritten, sizeof(wchar_t)); pIntTarget = intTarget; remaining=intTargetCapacity; if(nulLen!=count){ /*there are embedded nulls*/ pIntTarget+=numWritten; remaining-=numWritten; } }else{ int32_t nulVal; /*scan for nulls */ /* we donot check for limit since tempBuf is null terminated */ while(tempBuf[nulLen++] != 0){ } nulVal = (nulLen < srcLength) ? 1 : 0; pIntTarget = pIntTarget + retVal+nulVal; remaining -=(retVal+nulVal); /* check if we have reached the source limit*/ if(nulLen>=(count)){ break; } } } count = (int32_t)(pIntTarget-intTarget); if(0 < count && count <= destCapacity){ uprv_memcpy(dest, intTarget, (size_t)count*sizeof(wchar_t)); } if(pDestLength){ *pDestLength = count; } /* free the allocated memory */ uprv_free(intTarget); }else{ *pErrorCode = U_MEMORY_ALLOCATION_ERROR; } cleanup: /* are we still using stack buffer */ if(stackBuffer != saveBuf){ uprv_free(saveBuf); } u_terminateWChars(dest,destCapacity,count,pErrorCode); u_releaseDefaultConverter(conv); return dest; } #endif U_CAPI wchar_t* U_EXPORT2 u_strToWCS(wchar_t *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UErrorCode *pErrorCode){ /* args check */ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){ return NULL; } if( (src==NULL && srcLength!=0) || srcLength < -1 || (destCapacity<0) || (dest == NULL && destCapacity > 0) ) { *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; return NULL; } #ifdef U_WCHAR_IS_UTF16 /* wchar_t is UTF-16 just do a memcpy */ if(srcLength == -1){ srcLength = u_strlen(src); } if(0 < srcLength && srcLength <= destCapacity){ u_memcpy((UChar *)dest, src, srcLength); } if(pDestLength){ *pDestLength = srcLength; } u_terminateUChars((UChar *)dest,destCapacity,srcLength,pErrorCode); return dest; #elif defined U_WCHAR_IS_UTF32 return (wchar_t*)u_strToUTF32((UChar32*)dest, destCapacity, pDestLength, src, srcLength, pErrorCode); #else return _strToWCS(dest,destCapacity,pDestLength,src,srcLength, pErrorCode); #endif } #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) /* helper function */ static UChar* _strFromWCS( UChar *dest, int32_t destCapacity, int32_t *pDestLength, const wchar_t *src, int32_t srcLength, UErrorCode *pErrorCode) { int32_t retVal =0, count =0 ; UConverter* conv = NULL; UChar* pTarget = NULL; UChar* pTargetLimit = NULL; UChar* target = NULL; UChar uStack [_STACK_BUFFER_CAPACITY]; wchar_t wStack[_STACK_BUFFER_CAPACITY]; wchar_t* pWStack = wStack; char cStack[_STACK_BUFFER_CAPACITY]; int32_t cStackCap = _STACK_BUFFER_CAPACITY; char* pCSrc=cStack; char* pCSave=pCSrc; char* pCSrcLimit=NULL; const wchar_t* pSrc = src; const wchar_t* pSrcLimit = NULL; if(srcLength ==-1){ /* if the wchar_t source is null terminated we can safely * assume that there are no embedded nulls, this is a fast * path for null terminated strings. */ for(;;){ /* convert wchars to chars */ retVal = uprv_wcstombs(pCSrc,src, cStackCap); if(retVal == -1){ *pErrorCode = U_ILLEGAL_CHAR_FOUND; goto cleanup; }else if(retVal >= (cStackCap-1)){ /* Should rarely occur */ u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap, cStackCap * _BUFFER_CAPACITY_MULTIPLIER, 0, sizeof(char)); pCSave = pCSrc; }else{ /* converted every thing */ pCSrc = pCSrc+retVal; break; } } }else{ /* here the source is not null terminated * so it may have nulls embedded and we need to * do some extra processing */ int32_t remaining =cStackCap; pSrcLimit = src + srcLength; for(;;){ int32_t nulLen = 0; /* find nulls in the string */ while(nulLen= _STACK_BUFFER_CAPACITY){ /* Should rarely occur */ /* allocate new buffer buffer */ pWStack =(wchar_t*) uprv_malloc(sizeof(wchar_t) * (nulLen + 1)); if(pWStack==NULL){ *pErrorCode = U_MEMORY_ALLOCATION_ERROR; goto cleanup; } } if(nulLen>0){ /* copy the contents to tempStack */ uprv_memcpy(pWStack, pSrc, (size_t)nulLen*sizeof(wchar_t)); } /* null terminate the tempBuffer */ pWStack[nulLen] =0 ; if(remaining < (nulLen * MB_CUR_MAX)){ /* Should rarely occur */ int32_t len = (pCSrc-pCSave); pCSrc = pCSave; /* we do not have enough room so grow the buffer*/ u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap, cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char)); pCSave = pCSrc; pCSrc = pCSave+len; remaining = cStackCap-(pCSrc - pCSave); } /* convert to chars */ retVal = uprv_wcstombs(pCSrc,pWStack,remaining); pCSrc += retVal; pSrc += nulLen; srcLength-=nulLen; /* decrement the srcLength */ break; } } } /* OK..now we have converted from wchar_ts to chars now * convert chars to UChars */ pCSrcLimit = pCSrc; pCSrc = pCSave; pTarget = target= dest; pTargetLimit = dest + destCapacity; conv= u_getDefaultConverter(pErrorCode); if(U_FAILURE(*pErrorCode)|| conv==NULL){ goto cleanup; } for(;;) { *pErrorCode = U_ZERO_ERROR; /* convert to stack buffer*/ ucnv_toUnicode(conv,&pTarget,pTargetLimit,(const char**)&pCSrc,pCSrcLimit,NULL,(UBool)(pCSrc==pCSrcLimit),pErrorCode); /* increment count to number written to stack */ count+= pTarget - target; if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){ target = uStack; pTarget = uStack; pTargetLimit = uStack + _STACK_BUFFER_CAPACITY; } else { break; } } if(pDestLength){ *pDestLength =count; } u_terminateUChars(dest,destCapacity,count,pErrorCode); cleanup: if(cStack != pCSave){ uprv_free(pCSave); } if(wStack != pWStack){ uprv_free(pWStack); } u_releaseDefaultConverter(conv); return dest; } #endif U_CAPI UChar* U_EXPORT2 u_strFromWCS(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const wchar_t *src, int32_t srcLength, UErrorCode *pErrorCode) { /* args check */ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){ return NULL; } if( (src==NULL && srcLength!=0) || srcLength < -1 || (destCapacity<0) || (dest == NULL && destCapacity > 0) ) { *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; return NULL; } #ifdef U_WCHAR_IS_UTF16 /* wchar_t is UTF-16 just do a memcpy */ if(srcLength == -1){ srcLength = u_strlen((const UChar *)src); } if(0 < srcLength && srcLength <= destCapacity){ u_memcpy(dest, (const UChar *)src, srcLength); } if(pDestLength){ *pDestLength = srcLength; } u_terminateUChars(dest,destCapacity,srcLength,pErrorCode); return dest; #elif defined U_WCHAR_IS_UTF32 return u_strFromUTF32(dest, destCapacity, pDestLength, (UChar32*)src, srcLength, pErrorCode); #else return _strFromWCS(dest,destCapacity,pDestLength,src,srcLength,pErrorCode); #endif } #endif /* #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) && !UCONFIG_NO_CONVERSION */