This slide is based on the book of Mark Allen Weiss
张怀勇等译.
散列表的实现通常称为
散列是一种用于以常数平均时间执行插入、删除和查找的技术.
理想的散列表数据结构只不过是一个包含一些项的具有固定大小的数组.
表的大小记作
将每个键映射到从
理想情况下, 散列函数应该运算简单并且保证任何两个不同的键映射到不同的单元. 即是单射.
不过这是不可能的, 因为单元的数目是有限的, 而键实际上是用不完的.
因此, 我们寻找一个散列函数, 该函数要在单元之间均匀地分配键. 还要选择一个函数, 决定当两个键散列到同一个值的时候(称为
如果键是整数, 则一般合理的方法是
这种情形下, 散列函数需要仔细选择.
一种方法是将字符串中字符的
int hash( const string & key, int tableSize ) { int hashVal = 0; for( int i = 0; i < key.length( ); i++ ) hashVal += key[ i ]; return hashVal % tableSize; }
不过, 如果表很大, 则函数就不会很好地分配键. 例如
由于
对于每个元素
平方探测就是上面线性探测函数中令
#ifndef SEPARATE_CHAINING_H #define SEPARATE_CHAINING_H #include <vector> #include <list> #include <string> #include <algorithm> using namespace std; int nextPrime( int n ); // SeparateChaining Hash table class // // CONSTRUCTION: an approximate initial size or default of 101 // // ******************PUBLIC OPERATIONS********************* // bool insert( x ) --> Insert x // bool remove( x ) --> Remove x // bool contains( x ) --> Return true if x is present // void makeEmpty( ) --> Remove all items // int hash( string str ) --> Global method to hash strings int hash( const string & key ); int hash( int key ); template <typename HashedObj> class HashTable { public: explicit HashTable( int size = 101 ) : currentSize( 0 ) { theLists.resize( size ); } bool contains( const HashedObj & x ) const { const list<HashedObj> & whichList = theLists[ myhash( x ) ]; return find( whichList.begin( ), whichList.end( ), x ) != whichList.end( ); } void makeEmpty( ) { for( int i = 0; i < theLists.size( ); i++ ) theLists[ i ].clear( ); } bool insert( const HashedObj & x ) { list<HashedObj> & whichList = theLists[ myhash( x ) ]; if( find( whichList.begin( ), whichList.end( ), x ) != whichList.end( ) ) return false; whichList.push_back( x ); // Rehash; see Section 5.5 if( ++currentSize > theLists.size( ) ) rehash( ); return true; } bool remove( const HashedObj & x ) { list<HashedObj> & whichList = theLists[ myhash( x ) ]; typename list<HashedObj>::iterator itr = find( whichList.begin( ), whichList.end( ), x ); if( itr == whichList.end( ) ) return false; whichList.erase( itr ); --currentSize; return true; } private: vector<list<HashedObj> > theLists; // The array of Lists int currentSize; void rehash( ) { vector<list<HashedObj> > oldLists = theLists; // Create new double-sized, empty table theLists.resize( nextPrime( 2 * theLists.size( ) ) ); for( unsigned int j = 0; j < theLists.size( ); j++ ) theLists[ j ].clear( ); // Copy table over currentSize = 0; for(unsigned int i = 0; i < oldLists.size( ); i++ ) { typename list<HashedObj>::iterator itr = oldLists[ i ].begin( ); while( itr != oldLists[ i ].end( ) ) insert( *itr++ ); } } int myhash( const HashedObj & x ) const { int hashVal = hash( x ); hashVal %= theLists.size( ); if( hashVal < 0 ) hashVal += theLists.size( ); return hashVal; } }; #endif
#include "SeparateChaining.h" #include <iostream> using namespace std; /** * Internal method to test if a positive number is prime. * Not an efficient algorithm. */ bool isPrime( int n ) { if( n == 2 || n == 3 ) return true; if( n == 1 || n % 2 == 0 ) return false; for( int i = 3; i * i <= n; i += 2 ) if( n % i == 0 ) return false; return true; } /** * Internal method to return a prime number at least as large as n. * Assumes n > 0. */ int nextPrime( int n ) { if( n % 2 == 0 ) n++; for( ; !isPrime( n ); n += 2 ) ; return n; } /** * A hash routine for string objects. */ int hash( const string & key ) { int hashVal = 0; for( unsigned int i = 0; i < key.length( ); i++ ) hashVal = 37 * hashVal + key[ i ]; return hashVal; } /** * A hash routine for ints. */ int hash( int key ) { return key; }
#include <iostream> #include "SeparateChaining.h" using namespace std; // Simple main int main( ) { HashTable<int> H; const int NUMS = 4000; const int GAP = 37; int i; cout << "Checking... (no more output means success)" << endl; for( i = GAP; i != 0; i = ( i + GAP ) % NUMS ) H.insert( i ); for( i = 1; i < NUMS; i += 2 ) H.remove( i ); for( i = 2; i < NUMS; i += 2 ) if( !H.contains( i ) ) cout << "Contains fails " << i << endl; for( i = 1; i < NUMS; i += 2 ) { if( H.contains( i ) ) cout << "OOPS!!! " << i << endl; } return 0; }
#ifndef QUADRATIC_PROBING_H #define QUADRATIC_PROBING_H #include <vector> #include <string> using namespace std; int nextPrime( int n ); int hash( const string & key ); int hash( int key ); // QuadraticProbing Hash table class // // CONSTRUCTION: an approximate initial size or default of 101 // // ******************PUBLIC OPERATIONS********************* // bool insert( x ) --> Insert x // bool remove( x ) --> Remove x // bool contains( x ) --> Return true if x is present // void makeEmpty( ) --> Remove all items // int hash( string str ) --> Global method to hash strings template <typename HashedObj> class HashTable { public: explicit HashTable( int size = 101 ) : array( nextPrime( size ) ) { makeEmpty( ); } bool contains( const HashedObj & x ) const { return isActive( findPos( x ) ); } void makeEmpty( ) { currentSize = 0; for( int i = 0; i < array.size( ); i++ ) array[ i ].info = EMPTY; } bool insert( const HashedObj & x ) { // Insert x as active int currentPos = findPos( x ); if( isActive( currentPos ) ) return false; array[ currentPos ] = HashEntry( x, ACTIVE ); // Rehash; see Section 5.5 if( ++currentSize > array.size( ) / 2 ) rehash( ); return true; } bool remove( const HashedObj & x ) { int currentPos = findPos( x ); if( !isActive( currentPos ) ) return false; array[ currentPos ].info = DELETED; return true; } enum EntryType { ACTIVE, EMPTY, DELETED }; private: struct HashEntry { HashedObj element; EntryType info; HashEntry( const HashedObj & e = HashedObj( ), EntryType i = EMPTY ) : element( e ), info( i ) { } }; vector<HashEntry> array; int currentSize; bool isActive( int currentPos ) const { return array[ currentPos ].info == ACTIVE; } int findPos( const HashedObj & x ) const { int offset = 1; int currentPos = myhash( x ); while( array[ currentPos ].info != EMPTY && array[ currentPos ].element != x ) { currentPos += offset; // Compute ith probe offset += 2; if( currentPos >= array.size( ) ) currentPos -= array.size( ); } return currentPos; } void rehash( ) { vector<HashEntry> oldArray = array; // Create new double-sized, empty table array.resize( nextPrime( 2 * oldArray.size( ) ) ); for( int j = 0; j < array.size( ); j++ ) array[ j ].info = EMPTY; // Copy table over currentSize = 0; for( int i = 0; i < oldArray.size( ); i++ ) if( oldArray[ i ].info == ACTIVE ) insert( oldArray[ i ].element ); } int myhash( const HashedObj & x ) const { int hashVal = hash( x ); hashVal %= array.size( ); if( hashVal < 0 ) hashVal += array.size( ); return hashVal; } }; #endif
#include "QuadraticProbing.h" #include <iostream> using namespace std; /** * Internal method to test if a positive number is prime. * Not an efficient algorithm. */ bool isPrime( int n ) { if( n == 2 || n == 3 ) return true; if( n == 1 || n % 2 == 0 ) return false; for( int i = 3; i * i <= n; i += 2 ) if( n % i == 0 ) return false; return true; } /** * Internal method to return a prime number at least as large as n. * Assumes n > 0. */ int nextPrime( int n ) { if( n % 2 == 0 ) n++; for( ; !isPrime( n ); n += 2 ) ; return n; } /** * A hash routine for string objects. */ int hash( const string & key ) { int hashVal = 0; for( int i = 0; i < key.length( ); i++ ) hashVal = 37 * hashVal + key[ i ]; return hashVal; } /** * A hash routine for ints. */ int hash( int key ) { return key; }
#include <iostream> #include "QuadraticProbing.h" using namespace std; // Simple main int main( ) { HashTable<int> H; const int NUMS = 4000; const int GAP = 37; int i; cout << "Checking... (no more output means success)" << endl; int numCount=0; int successCount=0; for( i = GAP; i != 0; i = ( i + GAP ) % NUMS ) { if(H.insert( i )) { cout << "insert " << i << endl; successCount++; } numCount++; } cout << "We have insert " << successCount << " elements during " << numCount << " operations." << endl; if(H.contains(4000)) { cout << "contains 4000" << endl; } else { cout << "4000 is not in the hash table." << endl; } //remove the odd numbers. for( i = 1; i < NUMS; i += 2 ) { //if(H.remove( i ))cout << "remove " << i << " success" << endl; //else cout << i << " is not contained in this hash table." << endl; } //test if even number is contained in H. for( i = 2; i < NUMS; i +=2 ) if( !H.contains( i ) ) cout << "Contains fails " << i << endl; for( i = 1; i < NUMS; i += 2 ) { //if( H.contains( i ) ) // cout << "OOPS!!! " << i << endl; } return 0; }
class HashEntry { private: int key; int value; public: HashEntry(int key, int value) { this->key = key; this->value = value; } int getKey() { return key; } int getValue() { return value; } };
const int TABLE_SIZE = 128; class HashMap { private: HashEntry **table; public: HashMap() { table = new HashEntry*[TABLE_SIZE]; for (int i = 0; i < TABLE_SIZE; i++) table[i] = NULL; } int get(int key) { int hash = (key % TABLE_SIZE); while (table[hash] != NULL && table[hash]->getKey() != key) hash = (hash + 1) % TABLE_SIZE; if (table[hash] == NULL) return -1; else return table[hash]->getValue(); } void put(int key, int value) { int hash = (key % TABLE_SIZE); while (table[hash] != NULL && table[hash]->getKey() != key) hash = (hash + 1) % TABLE_SIZE; if (table[hash] != NULL) delete table[hash]; table[hash] = new HashEntry(key, value); } ~HashMap() { for (int i = 0; i < TABLE_SIZE; i++) if (table[i] != NULL) delete table[i]; delete[] table; } };
编译器使用散列表跟踪源代码中声明的变量, 这种数据结构叫作
散列表适用于任何其结点有实名而不是数字名的图论问题.
当程序搜索游戏的不同的运动路径时, 它通过计算基于位置的散列函数而跟踪一些已知的位置(并把对于该位置的移动存储起来). 如果同样的位置再次出现, 程序通常通过简单的移动变换来避免昂贵的重复计算.
游戏程序的这种一般特点叫作
如果拼写检查错误的主要功能是检查拼写错误(而非纠正错误), 那么可以预先将整个词典进行散列, 这样就可以在常数时间