Home > Projects > C String Utils
This project contains the following string functions:
- Levenshtein distance (code from Wikipedia Levenshtein Distance article)
- Soundex, Refined soundex, Metaphone, and Double metaphone (based on code from from Jakarta Commons Codec project)
- Case conversion, trimming whitespace, and startsWith/endsWith. These were mostly to make porting from Java easier.
It is released under the same license as the Jakarta Commons projects, i.e., Apache License Version 2.0.
Building
To build on systems other than FreeBSD:
% tar -xzvf stringutils.tar.gz % cd stringutils % make
To build on FreeBSD (uses gmake instead of make):
% tar -xzvf stringutils.tar.gz % cd stringutils % make -f makefile.freebsd
It is a good idea to run the test program after building. This should find any problems with the code on your particular system.
% make test
This code has been compiled and tested on the following systems without warnings or errors:
- gcc (GCC) 4.0.2 20051125 (Red Hat 4.0.2-8)
- i686-apple-darwin8-gcc-4.0.1 (GCC) 4.0.1 (Apple Computer, Inc. build 5250)
- powerpc-apple-darwin8-gcc-4.0.0 (GCC) 4.0.0 (Apple Computer, Inc. build 5026)
- gcc (GCC) 3.4.2 (mingw-special)
- gcc (GCC) 3.3.5 (Debian 1:3.3.5-13)
- gcc (GCC) 3.3.3 (cygwin special)
- gcc (GCC) 2.95.4 (FreeBSD 4.11-RELEASE-p14 i386)
Example of Use
The following example shows the basic use of string encoders for the various phonetic encoding methods (EncoderExample.cpp).
#include <iostream>
#include <iomanip>
#include <string>
#include <vector>
#include "Soundex.h"
#include "Metaphone.h"
using namespace std;
int main(int argc, char* argv[])
{
vector<Encoder *> encoders;
encoders.push_back(new Soundex());
encoders.push_back(new RefinedSoundex());
encoders.push_back(new Metaphone());
encoders.push_back(new DoubleMetaphone());
encoders.push_back(new DoubleMetaphone(false));
string names[]={"Soundex", "Refined soundex", "Metaphone",
"Double metaphone (pri)", "Double metaphone (alt)"};
for(int i=1; i<argc; i++)
{
cout<<setw(30)<<"Word"<<": "<<argv[i]<<endl;
for(unsigned j=0; j<encoders.size(); j++)
cout<<setw(30)<<names[j]<<": "<<encoders[j]->encode(argv[i])<<endl;
cout<<endl;
}
while(!encoders.empty())
{
Encoder *e=encoders.back();
encoders.pop_back();
delete e;
}
return 0;
}
The double metaphone class always calculates the primary and alternate. To access both without recalculating use the encodeString function as shown in the example below (DMExample.cpp).
#include <iostream>
#include <iomanip>
#include <string>
#include "Metaphone.h"
using namespace std;
int main(int argc, char* argv[])
{
DoubleMetaphone dm;
for(int i=1; i<argc; i++)
{
DoubleMetaphoneResult result=dm.encodeString(argv[i]);
cout<<setw(30)<<"Word"<<": "<<argv[i]<<endl;
cout<<setw(30)<<"Primary"<<": "<<result.getPrimary()<<endl;
cout<<setw(30)<<"Alternate"<<": "<<result.getAlternate()<<endl;
cout<<endl;
}
return 0;
}
These techniques and a few other convenient functions are also available through the StringUtils class as shown in the example below (StringUtilsExample.cpp).
#include <iostream>
#include <iomanip>
#include <string>
#include "StringUtils.h"
using namespace std;
int main(int argc, char* argv[])
{
for(int i=1; i<argc; i++)
{
cout<<setw(30)<<"Word"<<": ["<<argv[i]<<"]"<<endl;
cout<<setw(30)<<"toUpperCase"<<": ["
<<StringUtils::toUpperCase(argv[i])<<"]"<<endl;
cout<<setw(30)<<"toLowerCase"<<": ["
<<StringUtils::toLowerCase(argv[i])<<"]"<<endl;
cout<<setw(30)<<"trim"<<": ["
<<StringUtils::trim(argv[i])<<"]"<<endl;
if(StringUtils::startsWith(argv[i], "b"))
cout<<setw(30)<<"startsWith"<<": b -> [yes]"<<endl;
else
cout<<setw(30)<<"startsWith"<<": b -> [no]"<<endl;
if(StringUtils::endsWith(argv[i], "b"))
cout<<setw(30)<<"endsWith"<<": b -> [yes]"<<endl;
else
cout<<setw(30)<<"endsWith"<<": b -> [no]"<<endl;
cout<<setw(30)<<"levenshtein"<<": b -> ["
<<StringUtils::levenshtein(argv[i], "b")<<"]"<<endl;
cout<<setw(30)<<"soundex"<<": ["
<<StringUtils::soundex(argv[i])<<"]"<<endl;
cout<<setw(30)<<"refinedSoundex"<<": ["
<<StringUtils::refinedSoundex(argv[i])<<"]"<<endl;
cout<<setw(30)<<"metaphone"<<": ["
<<StringUtils::metaphone(argv[i])<<"]"<<endl;
cout<<setw(30)<<"doubleMetaphone (pri)"<<": ["
<<StringUtils::doubleMetaphone(argv[i])<<"]"<<endl;
cout<<setw(30)<<"doubleMetaphone (alt)"<<": ["
<<StringUtils::doubleMetaphone(argv[i], false)<<"]"<<endl;
cout<<endl;
}
return 0;
}