view levenshtein.c++ @ 5:49a1f3e7eff2

Remove debug code
author Jordi Gutiérrez Hermoso <jordigh@gmail.com>
date Thu, 30 Dec 2010 02:31:11 -0600
parents de7bbee608ee
children cad6633bc1fc
line wrap: on
line source

#include <string>
#include <vector>
#include <iostream>
#include <algorithm>

size_t levenshtein(std::string& a,
                   std::string& b,
                   size_t m = 100) //the cutoff maximum
{
  using namespace std;
  size_t asize=a.size(), bsize = b.size();
  bool swapped = false;

  if( asize > bsize)
  {
    swapped = true;
    swap(a,b);
    swap(asize,bsize);
  }

  //A bit of a dumb heuristic, but seems to help a little
  if( long(bsize)  - long(asize) > m)
  {
    if(swapped)
      swap(a,b); //unswap
    return m;
  }

  //Just need two rows at a time
  vector<size_t> prevrow(bsize+1), thisrow(bsize+1);

  for(size_t i = 0; i <= bsize; i++)
    prevrow[i] = i;

  for(size_t i = 1; i <= asize; i ++)
  {
    thisrow[0] = i;
    for(size_t j = 1; j <= bsize; j++)
    {
      thisrow[j] = min(prevrow[j-1] + size_t(a[i-1] != b[j-1]),
                       1 + min(prevrow[j],thisrow[j-1]) );
    }
    swap(thisrow,prevrow);
  }

  if(swapped)
    swap(a,b); //unswap

  return prevrow[bsize];

}