Case insensitive string comparison in C++

(Not an original article)

The standard library component std::basic_string<> (better known as std::string, which is a convenient typedef for std::basic_string<char>), is an important and widely used element of the standard C++ library. The declaration of basic_string<> in the standard library looks like this:

namespace std
{
  template < typename E,
             typename T = char_traits<E>,
             typename A = allocator<E> > class basic_string ;
}

E is the character type of the string (usually, either char or wchar_t),
T is the traits that controls how strings are compared and copied, and
A is the allocator that controlls how memory/storage is to be managed.

Creating an efficient and correct implementation for basic_string<> is a fairly daunting task. In addition, writing all the member functions and type definitions as required by the standard on top the implementation of choice is certainly not trivial.

In spite of that, the policy-based design of basic_string<> enable us to customize it for our requirements by specifying simple policy implementations. here is an implementation that changes the policy that controls how strings are compared. 

#include <string>
#include <iostream>
#include <cctype>
#include <strings.h> // posix

// case insensitive character traits
// inherited copy (preserves case),
// case insensitive comparison, search
struct traits_nocase : std::char_traits<char>
{
  static bool eq( const char& c1, const char& c2 )
  { return toupper(c1) == toupper(c2) ; }
  static bool lt( const char& c1, const char& c2 )
  { return toupper(c1) < toupper(c2) ; }
  static int compare( const char* s1, const char* s2, size_t N )
  {
    return strncasecmp( s1, s2, N ) ; // posix
    // mirosoft C++ - use _strnicmp instead
  }
  static const char* find( const char* s, size_t N, const char& a )
  {
    for( size_t i=0 ; i<N ; ++i )
      if( toupper(s[i]) == toupper(a) ) return s+i ;
    return 0 ;
  }
  static bool eq_int_type ( const int_type& c1, const int_type& c2 )
  { return toupper(c1) == toupper(c2) ; }
};

// string preserves case; comparisons are case insensitive
typedef std::basic_string< char, traits_nocase > string_nocase ;

// make string_nocase work like a std::string
//           with streams using std::char_traits
// std::basic_istream< char, std::char_traits<char> > (std::istream) and
// std::basic_ostream< char, std::char_traits<char> > (std::ostream)
inline std::ostream& operator<< ( std::ostream& stm, const string_nocase& str )
{ return stm << reinterpret_cast<const std::string&>(str) ; }

inline std::istream& operator>> ( std::istream& stm, string_nocase& str )
{
  std::string s ; stm >> s ;
  if(stm) str.assign(s.begin(),s.end()) ;
  return stm ;
}

inline std::istream& getline( std::istream& stm, string_nocase& str )
{
  std::string s ; std::getline(stm,s) ;
  if(stm) str.assign(s.begin(),s.end()) ;
  return stm ;
}

// some examples of using string_nocase
#include <fstream>
#include <iterator>
#include <algorithm>
#include <map>
using namespace std;

int main()
{
  string_nocase str1 = "ITtoolbox Groups  | Cpp-l" ;
  string_nocase str2 = "ITtoolbox GROUPS  | Cpp-l" ;
  cout << "str1 == str2 ? " << boolalpha << (str1==str2) << '\n' ;

  string_nocase sub_str ;
  cout << "string to find? " ; getline(cin,sub_str) ;
  if( str1.find( sub_str ) != string_nocase::npos )
    cout << "found " << sub_str << " in " << str1 << '\n' ;

  ifstream file(__FILE__) ;
  istream_iterator<string_nocase> begin(file), end ;
  cout << "#includes: " << count( begin, end, "#INCLUDE" ) << '\n' ;

  map<string_nocase,int> phone_book ;
  phone_book[ "AMITHN" ] = -72 ;
  phone_book[ "pooja.upreti" ] = 1234567 ;
  phone_book[ "amithn" ] = 9999999 ;
  cout << phone_book[ "POOJA.Upreti" ] << '\t' << phone_book[ "AmithN" ] << '\n' ;
}

// what we have done is customize the character traits policy
// std::basic_string is also policy free wrt memory management
// see http://www.ddj.com/dept/cpp/184403784 for some examples of
// customizing the memory allocator policy.
    
/***
    here is sample output:
    > g++ -std=c++98 -Wall string_nocase.cpp -o string_nocase && ./string_nocase
    str1 == str2 ? true
    string to find? GrOuPs
    found GrOuPs in ITtoolbox Groups  | Cpp-l
    #includes: 8
    1234567 9999999
*/

 

You may also like...

Leave a Reply

Your email address will not be published. Required fields are marked *