Javascript converts unicode string to "Title Case"

I have a conversion problem in javascript that I cannot solve due to non-English letters. My main concern is the Turkish alphabet.

What I need to do is:

  • hello world => Hello World
  • HELLO WORLD => Hello World
  • hELLO wOrLd => Hello World

Here is what I have done so far:

String.prototype.turkishToUpper = function(){ var stringlow = this; var letterslow = { 'i': 'İ', 'ş': 'Ş', 'ğ': 'Ğ', 'ü': 'Ü', 'ö': 'Ö', 'ç': 'Ç', 'ı': 'I' }; stringlow = stringlow.replace(/(([iışğüçö]))/g, function(letterlow){ return letterslow[letterlow]; }) return stringlow.toUpperCase(); } String.prototype.turkishToLower = function(){ var stringup = this; var lettersup = { 'İ': 'i', 'I': 'ı', 'Ş': 'ş', 'Ğ': 'ğ', 'Ü': 'ü', 'Ö': 'ö', 'Ç': 'ç' }; stringup = stringup.replace(/(([İIŞĞÜÇÖ]))/g, function(letterup){ return lettersup[letterup]; }) return stringup.toLowerCase(); } String.prototype.toProperCase = function () { return this.replace(/\w\S*/g, function(txt){return txt.charAt(0).turkishToUpper() + txt.substr(1).turkishToLower();}); }; 

But this does not give me the correct results, and I suspect that the regex substitute is not used in Unicode, but ascii.

When I test with Turkish characters, I get the wrong results.

  • şeker becomes şEker instead of Şeker
  • çoban ırmak becomes çOban ıRmak means Çoban Irmak

Also, if this can ever be allowed, I need icing on the cake to separate the words not only with spaces, but with some other stop characters, such as: - = / etc, so that

  • hello-world becomes Hello-World
  • hello: the world is becoming Hello: World

I read a lot of similar questions here on SO, but so far no luck.

thanks

Note. I think this is called the Title Case, but some claim it is a Pascal Case. To be honest, I'm interested in resolving the unicode problem (which I believe is the main reason), not the semantics, so please forgive me if I used the wrong terminology :)

+4
source share
4 answers

Standalone function:

 function toProperCase(s){ return s.replace(/([^\s:\-])([^\s:\-]*)/g,function($0,$1,$2){ return $1.toUpperCase()+$2.toLowerCase(); }); } 

Or for extending String.prototype:

 String.prototype.toProperCase=function() { return this.replace(/([^\s:\-])([^\s:\-]*)/g,function($0,$1,$2){ return $1.toUpperCase()+$2.toLowerCase(); }); } "çoban ırmak becomes çOban ıRmak intead of Çoban Irmak Hello-wOrld".toProperCase(); // "Çoban Irmak Becomes Çoban Irmak Intead Of Çoban Irmak Hello-World" 

Update:

The following code uses custom functions to translate locale-specific characters (partially verified). The code adds functions to String.prototype : toLocaleProperCase2 , toLocaleLowerCase2 and toLocaleUpperCase2 .

 (function(){ // locale specific chars // IMPORTANT: name of locale must be always in lower case (for "tr-TR" locale - "tr-tr") !!! var localeInfos={ "tr-tr": { lower: { i:"İ", ı:"I", ş:"Ş", ğ:"Ğ", ü:"Ü", ç:"Ç", ö:"Ö" }, upper: { İ:"i", I:"ı", Ş:"ş", Ğ:"ğ", Ü:"ü", Ç:"ç", Ö:"ö" } } }, localeInfo; // helper vars var mask="\\s:\\-", // add additional delimeters chars to the mask if needed rg=new RegExp("([^"+mask+"])([^"+mask+"]*)","g"); var fnToLocaleLower=function(s){ return localeInfo.upper[s]; }, fnToLocaleUpper=function(s){ return localeInfo.lower[s]; }, fnToProper=function($0,$1,$2){ if(localeInfo){ if(localeInfo.lower.hasOwnProperty($1))$1=localeInfo.lower[$1]; $2=$2.replace(localeInfo.upperSearchRegExp,fnToLocaleLower); } return $1.toUpperCase()+$2.toLowerCase(); }; // helper calculations var localeInfosKeys=Object.keys(localeInfos); for(var i=0;localeInfo=localeInfos[localeInfosKeys[i]];i++){ localeInfo.lowerSearchRegExp=new RegExp("["+Object.keys(localeInfo.lower).join("")+"]","g"); localeInfo.upperSearchRegExp=new RegExp("["+Object.keys(localeInfo.upper).join("")+"]","g"); } // extending String.prototype String.prototype.toLocaleProperCase2=function toLocaleProperCase2(locale){ localeInfo=localeInfos[arguments.length?locale.toLowerCase():null]; return this.replace(rg,fnToProper); }; String.prototype.toLocaleLowerCase2=function toLocaleLowerCase2(locale){ return ((localeInfo=localeInfos[arguments.length?locale.toLowerCase():null]) ? this.replace(localeInfo.upperSearchRegExp,fnToLocaleLower): this).toLowerCase(); }; String.prototype.toLocaleUpperCase2=function toLocaleUpperCase2(locale){ return ((localeInfo=localeInfos[arguments.length?locale.toLowerCase():null]) ? this.replace(localeInfo.lowerSearchRegExp,fnToLocaleUpper) : this).toUpperCase(); }; })(); 

 // testing var sss="çoban ırmak ibecıoimes çOban ıRmak intead of Çoban IrImaİk Hello-wOrld"; console.log("Origin: ", sss); console.log("Proper TR: ", sss.toLocaleProperCase2("tr-TR")); console.log("Proper: ", sss.toLocaleProperCase2()); console.log("Lower TR: ", sss.toLocaleLowerCase2("tr-TR")); console.log("Lower: ", sss.toLocaleLowerCase2()); console.log("Upper TR: ", sss.toLocaleUpperCase2("tr-TR")); console.log("Upper: ", sss.toLocaleUpperCase2()); // Origin: çoban ırmak ibecıoimes çOban ıRmak intead of Çoban IrImaİk Hello-wOrld // Proper TR: Çoban Irmak İbecıoimes Çoban Irmak İntead Of Çoban Irımaik Hello-World // Proper: Çoban Irmak Ibecıoimes Çoban Irmak Intead Of Çoban Irimaik Hello-World // Lower TR: çoban ırmak ibecıoimes çoban ırmak intead of çoban ırımaik hello-world // Lower: çoban ırmak ibecıoimes çoban ırmak intead of çoban irimaik hello-world // Upper TR: ÇOBAN IRMAK İBECIOİMES ÇOBAN IRMAK İNTEAD OF ÇOBAN IRIMAİK HELLO-WORLD // Upper: ÇOBAN IRMAK IBECIOIMES ÇOBAN IRMAK INTEAD OF ÇOBAN IRIMAİK HELLO-WORLD 
+5
source

Here is the JS function that will do the job.

 function toProperCase(string) { output = ''; explodedString = string.split(' '); //explode string by space for(var i=0;i<explodedString.length;i++){ output += explodedString[i].charAt(0).toUpperCase() + explodedString[i].slice(1).toLowerCase()+' '; } return output.slice(0, -1); } 
0
source
 String.prototype.toProperCase = function (){ var arr= this.toLowerCase().split(' '); for( var i=0;i<arr.length;i++){ arr[i] = arr[i].charAt(0).toUpperCase() + arr[i].substr(1); }; return arr.join(' '); }; 'çoban ırmak'.toProperCase() // "Çoban Irmak" 
0
source

Another option, but with icing on the cake:

 function toProperCase( str ) { var i, j, chars, arr; arr = str.toLowerCase( ).split(""); chars = { " " : true, "-" : true, ":" : true, "=" : true, "/" : true }; for( var i = 0, j = -1; i < arr.length; i += 1, j += 1 ) { // if previous char (j) exists in chars and current (i) does not; // replace with uppercase equivalent. if ( ( arr[j] && chars[ arr[j] ] && !chars[ arr[i] ] ) || i === 0){ arr[i] = arr[i].toUpperCase( ); } } return arr.join(""); } 

Hope this helps :)

0
source

All Articles