Saturday, March 31, 2012

uint32_t does not defined, when compile GCC C code with MinGW on Windows

Recently, I am trying to reuse some code developed by GCC on windows. One error may occurred is that "uint_32" is not defined. I found one solution from "stack overflow". The solution is simple, add the following code to your project, and include it in all your files need "uint_32".

The original link to the code is here

// ISO C9x  compliant stdint.h for Microsoft Visual Studio
// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 
// 
//  Copyright (c) 2006-2008 Alexander Chemeris
// 
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
// 
//   1. Redistributions of source code must retain the above copyright notice,
//      this list of conditions and the following disclaimer.
// 
//   2. Redistributions in binary form must reproduce the above copyright
//      notice, this list of conditions and the following disclaimer in the
//      documentation and/or other materials provided with the distribution.
// 
//   3. The name of the author may be used to endorse or promote products
//      derived from this software without specific prior written permission.
// 
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// 
///////////////////////////////////////////////////////////////////////////////

#ifndef _MSC_VER // [
#error "Use this header only with Microsoft Visual C++ compilers!"
#endif // _MSC_VER ]

#ifndef _MSC_STDINT_H_ // [
#define _MSC_STDINT_H_

#if _MSC_VER > 1000
#pragma once
#endif

#include 

// For Visual Studio 6 in C++ mode and for many Visual Studio versions when
// compiling for ARM we should wrap  include with 'extern "C++" {}'
// or compiler give many errors like this:
//   error C2733: second C linkage of overloaded function 'wmemchr' not allowed
#ifdef __cplusplus
extern "C" {
#endif
#  include 
#ifdef __cplusplus
}
#endif

// Define _W64 macros to mark types changing their size, like intptr_t.
#ifndef _W64
#  if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300
#     define _W64 __w64
#  else
#     define _W64
#  endif
#endif


// 7.18.1 Integer types

// 7.18.1.1 Exact-width integer types

// Visual Studio 6 and Embedded Visual C++ 4 doesn't
// realize that, e.g. char has the same size as __int8
// so we give up on __intX for them.
#if (_MSC_VER < 1300)
   typedef signed char       int8_t;
   typedef signed short      int16_t;
   typedef signed int        int32_t;
   typedef unsigned char     uint8_t;
   typedef unsigned short    uint16_t;
   typedef unsigned int      uint32_t;
#else
   typedef signed __int8     int8_t;
   typedef signed __int16    int16_t;
   typedef signed __int32    int32_t;
   typedef unsigned __int8   uint8_t;
   typedef unsigned __int16  uint16_t;
   typedef unsigned __int32  uint32_t;
#endif
typedef signed __int64       int64_t;
typedef unsigned __int64     uint64_t;


// 7.18.1.2 Minimum-width integer types
typedef int8_t    int_least8_t;
typedef int16_t   int_least16_t;
typedef int32_t   int_least32_t;
typedef int64_t   int_least64_t;
typedef uint8_t   uint_least8_t;
typedef uint16_t  uint_least16_t;
typedef uint32_t  uint_least32_t;
typedef uint64_t  uint_least64_t;

// 7.18.1.3 Fastest minimum-width integer types
typedef int8_t    int_fast8_t;
typedef int16_t   int_fast16_t;
typedef int32_t   int_fast32_t;
typedef int64_t   int_fast64_t;
typedef uint8_t   uint_fast8_t;
typedef uint16_t  uint_fast16_t;
typedef uint32_t  uint_fast32_t;
typedef uint64_t  uint_fast64_t;

// 7.18.1.4 Integer types capable of holding object pointers
#ifdef _WIN64 // [
   typedef signed __int64    intptr_t;
   typedef unsigned __int64  uintptr_t;
#else // _WIN64 ][
   typedef _W64 signed int   intptr_t;
   typedef _W64 unsigned int uintptr_t;
#endif // _WIN64 ]

// 7.18.1.5 Greatest-width integer types
typedef int64_t   intmax_t;
typedef uint64_t  uintmax_t;


// 7.18.2 Limits of specified-width integer types

#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [   See footnote 220 at page 257 and footnote 221 at page 259

// 7.18.2.1 Limits of exact-width integer types
#define INT8_MIN     ((int8_t)_I8_MIN)
#define INT8_MAX     _I8_MAX
#define INT16_MIN    ((int16_t)_I16_MIN)
#define INT16_MAX    _I16_MAX
#define INT32_MIN    ((int32_t)_I32_MIN)
#define INT32_MAX    _I32_MAX
#define INT64_MIN    ((int64_t)_I64_MIN)
#define INT64_MAX    _I64_MAX
#define UINT8_MAX    _UI8_MAX
#define UINT16_MAX   _UI16_MAX
#define UINT32_MAX   _UI32_MAX
#define UINT64_MAX   _UI64_MAX

// 7.18.2.2 Limits of minimum-width integer types
#define INT_LEAST8_MIN    INT8_MIN
#define INT_LEAST8_MAX    INT8_MAX
#define INT_LEAST16_MIN   INT16_MIN
#define INT_LEAST16_MAX   INT16_MAX
#define INT_LEAST32_MIN   INT32_MIN
#define INT_LEAST32_MAX   INT32_MAX
#define INT_LEAST64_MIN   INT64_MIN
#define INT_LEAST64_MAX   INT64_MAX
#define UINT_LEAST8_MAX   UINT8_MAX
#define UINT_LEAST16_MAX  UINT16_MAX
#define UINT_LEAST32_MAX  UINT32_MAX
#define UINT_LEAST64_MAX  UINT64_MAX

// 7.18.2.3 Limits of fastest minimum-width integer types
#define INT_FAST8_MIN    INT8_MIN
#define INT_FAST8_MAX    INT8_MAX
#define INT_FAST16_MIN   INT16_MIN
#define INT_FAST16_MAX   INT16_MAX
#define INT_FAST32_MIN   INT32_MIN
#define INT_FAST32_MAX   INT32_MAX
#define INT_FAST64_MIN   INT64_MIN
#define INT_FAST64_MAX   INT64_MAX
#define UINT_FAST8_MAX   UINT8_MAX
#define UINT_FAST16_MAX  UINT16_MAX
#define UINT_FAST32_MAX  UINT32_MAX
#define UINT_FAST64_MAX  UINT64_MAX

// 7.18.2.4 Limits of integer types capable of holding object pointers
#ifdef _WIN64 // [
#  define INTPTR_MIN   INT64_MIN
#  define INTPTR_MAX   INT64_MAX
#  define UINTPTR_MAX  UINT64_MAX
#else // _WIN64 ][
#  define INTPTR_MIN   INT32_MIN
#  define INTPTR_MAX   INT32_MAX
#  define UINTPTR_MAX  UINT32_MAX
#endif // _WIN64 ]

// 7.18.2.5 Limits of greatest-width integer types
#define INTMAX_MIN   INT64_MIN
#define INTMAX_MAX   INT64_MAX
#define UINTMAX_MAX  UINT64_MAX

// 7.18.3 Limits of other integer types

#ifdef _WIN64 // [
#  define PTRDIFF_MIN  _I64_MIN
#  define PTRDIFF_MAX  _I64_MAX
#else  // _WIN64 ][
#  define PTRDIFF_MIN  _I32_MIN
#  define PTRDIFF_MAX  _I32_MAX
#endif  // _WIN64 ]

#define SIG_ATOMIC_MIN  INT_MIN
#define SIG_ATOMIC_MAX  INT_MAX

#ifndef SIZE_MAX // [
#  ifdef _WIN64 // [
#     define SIZE_MAX  _UI64_MAX
#  else // _WIN64 ][
#     define SIZE_MAX  _UI32_MAX
#  endif // _WIN64 ]
#endif // SIZE_MAX ]

// WCHAR_MIN and WCHAR_MAX are also defined in 
#ifndef WCHAR_MIN // [
#  define WCHAR_MIN  0
#endif  // WCHAR_MIN ]
#ifndef WCHAR_MAX // [
#  define WCHAR_MAX  _UI16_MAX
#endif  // WCHAR_MAX ]

#define WINT_MIN  0
#define WINT_MAX  _UI16_MAX

#endif // __STDC_LIMIT_MACROS ]


// 7.18.4 Limits of other integer types

#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [   See footnote 224 at page 260

// 7.18.4.1 Macros for minimum-width integer constants

#define INT8_C(val)  val##i8
#define INT16_C(val) val##i16
#define INT32_C(val) val##i32
#define INT64_C(val) val##i64

#define UINT8_C(val)  val##ui8
#define UINT16_C(val) val##ui16
#define UINT32_C(val) val##ui32
#define UINT64_C(val) val##ui64

// 7.18.4.2 Macros for greatest-width integer constants
#define INTMAX_C   INT64_C
#define UINTMAX_C  UINT64_C

#endif // __STDC_CONSTANT_MACROS ]


#endif // _MSC_STDINT_H_ ]

Thursday, March 29, 2012

isnan/isinf functions are not define in Visual Studio

Try the following hard code, it works for me.

#include
#define isnan(x) _isnan(x)
#define isinf(x) (!_finite(x))
#define fpu_error(x) (isinf(x) || isnan(x))

Thursday, March 22, 2012

Get a sub image by OpenCV


The following code segment comes from the following link:
        http://blog.weisu.org/2007/10/opencv-get-sub-image.html
and tells you how to get a sub image by OpenCV. If you want to use this code, do not forget release the return value, the iamge :)

IplImage* Sub_Image(IplImage *image, CvRect roi)
{
    IplImage *result;
    // set ROI, you may use following two funs:
    //cvSetImageROI( image, cvRect( 0, 0, image->width, image->height ));

    cvSetImageROI(image,roi);
    // sub-image
    result = cvCreateImage( cvSize(roi.width, roi.height), image->depth, image->nChannels );
    cvCopy(image,result);
    cvResetImageROI(image); // release image ROI
    return result;
}

Tuesday, March 13, 2012

Action estimation

1. For optical flow approach, divide the field into 4 different channels, +x, -x, +y and -y, and then Gaussian on the four channels, which contains positive value only. By doing this, the field information would not be cancelled by Gaussian;
2. Create similarity matrix among each frame from both input video and the video in data set. Then, by searching the maximum from the matrix, we could overcome the differences from the uncertain beginning of the action. For example, working could be initialized from right leg or left leg.
3. Add time information to synchronize the movie script and the video scene by using the subtitle and its time line.
        subtitle                                movie script
        time t: ABC...                     ABC...
                                                  .....
        time t would the time tag of  "ABC"

Saturday, March 10, 2012

Unconstraint Optimization 3

choosing step size

After we understand the meaning of Wolfe Condition, we have had an metric to measure what is good step size, and then, we could begin to find good step size based on this metric. When line search is trying to calculate the step size, a_k, an initial point, a_0, is needed, and then generate a serial points, {a_i}, until a_I satisfies Wolfe condition. Once a step size is selected, it becomes the next a_i. In this blog, I would mainly introduce two commonly used strategies, square interpolation and cubic interpolation.

Square/Cubic interpolation

When given an initial step size, a_0, if this step size satisfies Wolfe conditions, then a_0 becomes a_k, which means the calculation at x_k is terminated. Otherwise, we could use the information, Ø(a_0)、Ø(0)、Ø’(0), at this position to build a 2-order interpolating polynomial to fit Ø(a_k). This polynomial is as followed: 

After reviewing the equation above, we could find that it satisfies the following conditions:

Ø_q(0) = Ø(0) Ø_q’(0) = Ø’(0) Ø_q(a_0) = Ø(a_0)

Then we calculate the derivative of the above equation and make it to be zero, which means we have found the value of a that leading to minimum.

If a_I satisfies Wolfe condition, a_k is set to a_1. Otherwise, we would build a 3-order interpolation polynomial and try to find a that leads to minimum value. The process of this interpolation are shown as following:

If a_(i+1) satisfies Wolfe condition, the a_k is set to a_(i+1). Otherwise, we continue the above procedure, and use  Ø (a_i+1) and Ø’(a_i+1) to replace the value at a_(i-1) and a_i. Once one of a_(i-1) and a_i is set, we use the new value to replace corresponding value. This procedure would be continued until find a value satisfies the Wolfe condition, or not point found.

In Newton method, or Quasi-Newton method, the initial step size is 1, which could ensure that whatever step size we select is unit step size and make a fast converging speed. When calculating the first step size, a-0, we use the following equation to calculate:

In the following steps, a_0 is always 1.

Calculating a_k

In this section, we would make a summer on the step size calculation with details. Suppose x_k is current point, and we want to find a step size satisfies Wolfe Condition, and move the current position to x_(k+1). We choose the initial point a_0 = 1. Then:

1) initialize a_x <- a_y <- 0Ø(a_x) 、 Ø(a_y) 、Ø’(a_x) 、 Ø’(a_y)、a_min、a_max

2) initialize a_I <- a_o, Ø’(a_i)、Ø(a_i)

3) if Ø(a_i) > Ø(a_x). This means a_I is too big. So the proper step size must be between [a_x a_i]. By using square and cubic interpolation on a_x and a_I, get two new step size, a_quadratic and a_cubic, and use the one near a_x as the step size. Here assuming use the new step size to overwrite a_quaratic, and apply the following operations:

a_y left a_i
Ø(a_y) left Ø(a_i)
Ø’(a_y) left Ø’(a_i)
a_i+1 left a_quadrati

In this case, a_k is in the interval of a_x and a_y.

4) If Ø’(a_i)* Ø’(a_x) < 0, a_I is too big. So the step size must be in the interval of [a_x a_i]. After interpolation, we use the step size which close to a_i. And do the following operations:

a_y left a_x
Ø(a_y) left Ø(a_x)
Ø’(a_y) left Ø’(a_x)

a_x left a_i
Ø(a_x) left Ø(a_i)
Ø’(a_x) left Ø’(a_i)

a_i+1 left a_quadratic

5) |Ø’(a_i)| <|Ø’(a_x) |, which means the step size is too small. After interpolation, we do the following operations:

a_x left a_i
Ø(a_x) left Ø(a_i)
Ø’(a_x) left Ø’(a_i)

a_i+1 left a_quadrati

6) if |Ø’(a_i)| ≥|Ø’(a_x) |, a_I is too small. If we have already known the range of a_k, such as [a_x a_y], the good step size should be in [a_I a_y]. Then apply cubic interpolation on a_I and a_y, and set the new step size as a_cubic; otherwise, if a_x is smaller than a_I, which means the new step size is not big enough. So size the new step size should be a_max. If a_x > a_I, this means the new step size is not small enough, we should set it to a_min. And apply the following operations:

a_x left a_i
Ø(a_x) left Ø(a_i)
Ø’(a_x) left Ø’(a_i)

If we have known the interval, [a_x, a_y], of a_k, then

a_i+1 left a_cubic
else if a_x < a_i
a_i+1 left a_max
else if a_x ≥ a_i
a_i+1 left a_min

7) if a_(i+1) satisfies the following conditions:

we set the new step size as a_(i+1) and end the process of calculating the step size.

Otherwise, go to 2) and continue the next iteration.

Friday, March 9, 2012

Unconstrained optimization

  • The rationality of the step size

In the following section, we would begin to discuss two conditions which a_k(the step size) have to satisfy. If the two conditions are both satisfied, we could say that the proper step size, from x_k to x_(k+1), has been found. The first condition called “sufficient decrease condition”.  Intuitively, this condition keeps that the value at x_(k+1) should be smaller than x_(k)`s, which is required to reach the convergence. The second one is called “curvature condition”. This condition keeps that the derivative of f at x_k must be smaller than x_(k+1)`s.

  • Sufficient decrease condition

A proper a_k must satisfy sufficient decrease condition. We could make the following descriptions about this condition:

f( x_(k+1) ) <= f( x_k ) + c_1 * a_k * ▽f_k^T * p_k;

if we plugin EQ1, we have

f( x_k + a_k * p_k ) <= f( x_k ) _ c_1 * a_k * ▽f_k^T * p_k

where f( x_k ) is the value of f at x_k; ▽f_k means the gradient at x_k; p_k means the direction from x_k to x_(k+1); a_k means the step size and c_1 is a constant that belongs to [0, 1]

If p_k is the decent direction of function f, we have ▽f_k * p_k < 0. So f( x_k + 1 )<f( x_k ).

As only a_k is a variable, we could rewrite as followed:

Ø(a_k) ≤ l(a_k), where

 

We could visualize the condition as below:

If the step size could make Ø(a_k) in the acceptable interval, the condition is satisfied.

  • Curvature condition

We could describe this condition as following:

 

or     

In the above equation, ▽f_(k+1) is the gradient at x_(k+1); p_k is the decent direction and c_2 is an constant belongs to [0 1] and usually set as 0.9.

When p_k is the decent direction of the function f, then ▽f_k * p_k < 0

So, it means ▽f _k+1 ≥ c_2 * ▽f_k

From the plot below, we could see that the equation above requires that the decent “speed” at x_(k+1)should be smaller than x_k`s.

  • Wolfe condition

Wolfe condition could be seemed as the combination of the two conditions mentioned above, which means the a_k should meet the following equations.

We could visualize the condition as:

In some real optimization application, we may have to use another condition, called string Wolfe condition, which is stemmed from the Wolfe condition. In this case, the a_k need to meet the followings:

The only difference between those two conditions is the strong Wolfe condition avoids the possible positive value of  ▽f(x_k + a_k * p_k).

 

To be continued.

Unconstrained optimization

NOTE: this article is based on a serial blog from 52nlp.cn written by “jianzhu”. I translated from the Chinese version but there are not exactly the same. The original link to the blogs is attached: Click here. 

1. What is unconstrained optimization problem.

The above image shown an example of unconstrained optimization problem. Suppose we have function, called f, and its plot in 2D space is as above. Unconstrained optimization is a process that tries to find the minimum point(s) of f. In the figure above, there are two minimum points, one is global minimum(or minimal point) and the other one is a local minimum. As the limitation of the complexity of the algorithm, most unconstraint optimizer algorithms could only guarantee a local minimum. If we could only find local minimum, why we could still apply those “not-so-good” algorithms to solve our problems? From my point of view, there are mainly two reasons, the first is that many problems we are facing are modeled as a convex function, which means that the minimum and minimal are the same point; the second reason is that our world is not perfect, local minimum may sufficient for us in some cases.

Now we have known what is the unconstraint optimization problem, let us work on the basic process of unconstraint optimization. The first thing we have to do is to find an initial point, denoted as a red dot. Sometimes we randomly choose that point, sometimes we guess it with some heuristics, the rest of the time we could calculate it. It is all depended on what is your problem.

 

After we have the initial point, we could begin a arbitrary optimization algorithm to find the minimum point. During the optimizing, two concepts are critical, direction and step size.

2. Line search

Line search is used to calculate the step size, which mentioned above. Once we find the direction, we need to find how far we should move from current point, x_k and stop at the next “proper” point, x_(k+1). If we use p_k to denote the direction from the k-th point to the (k+1)-th point, x_k denotes current point, x_(k+1) denotes the nect point, and a_k denotes the k-th step size, then the following equation holds:

x_(k+1) = x_k + a_k * p_k

In most line search method, p_k should be the decent direction, which means f(x) would decrease if we move along p_k. As the goal is to find a minimum value, the best situation is that find a a_k that satisfies f(x_(k+1)) is the global minimum along p_k. The following equation shows the best situation:

Ø (a_k) = f(x_k+1) = f(x_k + a_k * p_k)

If we want to find a step size that makes Ø (a_k) to be global minimum, a great amount of calculation related to f(x_k+a_k*p_k); on the other hand, if we consider this problem from derivative point of view, we have to calculate ▽f_k+1, which is also unrealistic. So, we have to make some tradeoff between computation load and problem solving.

If we cannot build Roma in one day, what we should do with a_k? We give two constraints to determine what is a reasonable a_k and what is the size of a_k.

 

To be continued.

Wednesday, March 7, 2012

A small project on Pinterest, which needs web spider...

As we are doing a small project about Pinterest, we have to download some data, including images and "re-pin" relations, from www.pinterest.com. At the beginning, I suppose this project is purely a computer vision project, which only needs taking care of the algorithms and the implementation. But I am worry, Pinterest has not opened its API yet, which means I may need to implement a simple web spider to traverse the web page and extract the data we need out. It is a complicate thing... After few hours of investigation, I found this "answer" from StackOverflow, which is very useful and inspiring.


Writing a spider for the whole WWW is quite a task --- you have to take care about many "little details" such as:
  • Each spider computer needs should receive data from a few thousand servers in parallel in order to make efficient use of the connection bandwidth. (asynchronous socket i/o).
  • You need several computers that spider in parallel in order to cover the vast amount of information on the WWW (clustering; partitioning the work)
  • You need to be polite to the spidered web sites:
    • Respect the robots.txt files.
    • Don't fetch a lot of information too quickly: this overloads the servers.
    • Don't fetch files that you really don't need (e.g. iso disk images; tgz packages for software download...).
  • You have to deal with cookies/session ids: Many sites attach unique session ids to URLs to identify client sessions. Each time you arrive at the site, you get a new session id and a new virtual world of pages (with the same content). Because of such problems, early search engines ignored dynamic content. Modern search engines have learned what the problems are and how to deal with them.
  • You have to detect and ignore troublesome data: connections that provide a seemingly infinite amount of data or connections that are too slow to finish.
  • Besides following links, you may want to parse sitemaps to get URLs of pages.
  • You may want to evaluate which information is important for you and changes frequently to be refreshed more frequently than other pages. Note: A spider for the whole WWW receives a lot of data --- you pay for that bandwidth. You may want to use HTTP HEAD requests to guess whether a page has changed or not.
  • Besides receiving, you want to process the information and store it. Google builds indices that list for each word the pages that contain it. You may need separate storage computers and an infrastructure to connect them. Traditional relational data bases don't keep up with the data volume and performance requirements of storing/indexing the whole WWW.
This is a lot of work. But if your target is more modest than reading the whole WWW, you may skip some of the parts. If you just want to download a copy of a wiki etc. you get down to the specs of wget.
Note: If you don't believe that it's so much work, you may want to read up on how Google re-invented most of the computing wheels (on top of the basic Linux kernel) to build good spiders. Even if you cut a lot of corners, it's a lot of work.
Let me add a few more technical remarks on three points
Parallel connections / asynchronous socket communication
You could run several spider programs in parallel processes or threads. But you need about 5000-10000 parallel connections in order to make good use of your network connection. And this amount of parallel processes/threads produces too much overhead.
A better solution is asynchronous input/output: process about 1000 parallel connections in one single thread by opening the sockets in non-blocking mode and use epoll or select to process just those connections that have received data. Since Linux kernel 2.4, Linux has excellent support for scalability (I also recommend that you study memory-mapped files) continuously improved in later versions.
Note: Using asynchronous i/o helps much more than using a "fast language": It's better to write an epoll-driven process for 1000 connections written in Perl than to run 1000 processes written in C. If you do it right, you can saturate a 100Mb connection with processes written in perl.
The down side of this approach is that you will have to implement the HTTP specification yourself in an asynchronous form (I am not aware of a re-usable library that does this). It's much easier to do this with the simpler HTTP/1.0 protocol than the modern HTTP/1.1 protocol. You probably would not benefit from the advantages of HTTP/1.1 for normal browsers anyhow, so this may be a good place to save some development costs.
Partitioning the work over several servers
One computer can't keep up with spidering the whole WWW. You need to distribute your work over several servers and exchange information between them. I suggest to assign certain "ranges of domain names" to each server: keep a central data base of domain names with a reference to a spider computer.
Extract URLs from received web pages in batches: sort them according to their domain names; remove duplicates and send them to the responsible spider computer. On that computer, keep an index of URLs that already are fetched and fetch the remaining URLs.
If you keep a queue of URLs waiting to be fetched on each spider computer, you will have no performance bottlenecks. But it's quite a lot of programming to implement this.
Read the standards
I mentioned several standards (HTTP/1.x, Robots.txt, Cookies). Take your time to read them and implement them. If you just follow examples of sites that you know, you will make mistakes (forget parts of the standard that are not relevant to your samples) and cause trouble for those sites that use these additional features.
It's a pain to read the HTTP/1.1 standard document. But all the little details got added to it because somebody really needs that little detail and now uses it. 


The original link is attached here.