Ijumaa, 15 Agosti 2014

Intersection_Posting_lists

"utility.h"    file should contain the following lines

#include<iostream>
#include<fstream>
#include<vector>
#include<algorithm>
#include<string>
#include<map>
---------------------------------------------------------------------------------------------
"yangu.txt" should contain the following but not necessarily for the purpose of this program::

MY MOTHER love me too much #
i love my mother too #
do you love me too #
or only you care about your mother #
-----------------------------------------------------------------------------------------------





#include"utility.h"
using namespace std;
//list of function prototypes
string Lower(string& lowercase_term);
string removepunc(string& s);
int digitcheck(string& s);
void dictionery(vector<string> &terms,vector<vector<string> > &documents);
void post1(vector<string> &terms,vector<vector<string> > &documents, vector<int> &take,int
&docID);
void post2(vector<string> &terms,vector<vector<string> > &documents, vector<int> &take1,int
&docID);
void inter2_post(vector<int> &take,vector<int> &take1,vector<vector<string> > &documents);
int main()//main function
{
//declarations
vector<string> v;
vector<int> tk;
vector<int> tk1;
vector<vector<string> > vv;
int ID;
cout<<"THE OUTPUT WILL BE WRITTEN IN TEXT FILE NAMED posting "<<endl;
dictionery(v,vv);//call for dictionery
post1(v,vv,tk,ID);//call for posting creation
post2(v,vv,tk1,ID);//call for posting creation
inter2_post(tk,tk1,vv);
system("pause");//hold the screen
return 0;//exit
}
void dictionery(vector<string> &terms,vector<vector<string> > &documents)//function for
creating dictionery
{
ifstream mem("yangu.txt");
if(!mem)
{
cout<<"The file for documents not created"<<endl;
}
else
{
while(!mem.eof())
{
string word;
vector<string> words;
while(mem>>word && word!="#")
{
Lower(word);
digitcheck(word);
removepunc(word);
words.push_back(word);
terms.push_back(word);
sort(terms.begin(),terms.end());//sorting the terms
terms.erase(unique(terms.begin(),terms.end()),terms.end());//remove term
repeatation
}
if(!words.empty())
{
documents.push_back(words);
}
}
}
}
void post1(vector<string> &terms,vector<vector<string> > &documents,vector<int> &take,int
&docID)//function for creating posting list in text file
{
ifstream file("yangu.txt");//an output file stream obtained here
for(int i=0;i<documents.size();i++)
{
vector<string>::const_iterator p=find(documents[i].begin(),documents[i].end(),
"mother");//searching term in each document
if(p!=documents[i].end())
{
docID=i+1; //store the document ID
}
take.push_back(docID);//put the document IDs into vector
}
take.erase(unique(take.begin(),take.end()),take.end());//remove term repeatation
ofstream outputFile("posting.txt");
outputFile<<"The first posting list[mother]:"<<endl;
outputFile<<"==============================="<<endl;
for(int i=0;i<take.size();i++)
outputFile<<take[i]<<" ";
outputFile<<endl;
}
void post2(vector<string> &terms,vector<vector<string> > &documents,vector<int> &take1,int
&docID)//function for creating posting list in text file
{
ifstream file("yangu.txt");//an output file stream obtained here
for(int i=0;i<documents.size();i++)
{
vector<string>::const_iterator p=find(documents[i].begin(),documents[i].end(),
"love");//searching term in each document
if(p!=documents[i].end())
{
docID=i+1;//store the document ID
}
take1.push_back(docID);//put the document IDs into vector
take1.erase(unique(take1.begin(),take1.end()),take1.end());//remove term
repeatation
}
ofstream outputFile("posting.txt",ios::app);
outputFile<<"The second posting list[love]:"<<endl;
outputFile<<"==============================="<<endl;
for(int i=0;i<take1.size();i++)
outputFile<<take1[i]<<" ";
outputFile<<endl;
}
void inter2_post(vector<int> &take,vector<int> &take1,vector<vector<string> > &documents)
{
ofstream outputFile("posting.txt",ios::app);
outputFile<<"The intersection of the two posting lists"<<"[mother AND love]:"<<endl;
outputFile<<"==============================================================="<<endl
;
for(int i=0;i<1;i++)
{
for(int j=0;j<take1.size();j++)
{
if(take[i]==take1[j])
{
outputFile<<take[i]<<" ";//Output the intersected posting
take[i]=take[i+1];//update the posting
take1[j]=take[j+1];//update the posting
}
else
if(take[i]<take1[j])
{
take[i]=take[i+1];//update the posting
}
else
{
take1[j]=take1[j+1];//update the posting
}
}
}
}
string Lower(string& lowercase_term)//function for converting terms to lowercase
{
for(unsigned int i=0;i<lowercase_term.length();i++)
{
lowercase_term[i] = tolower(lowercase_term[i]);
}
return lowercase_term;
}
string removepunc(string& s)//function for removing punctuations
{
char p;
string holder;
for(unsigned int i=0;i<s.length();i++)
{
if(isalpha(s[i])!=0)//check if punctuation is not found
{
p=s[i];//store character in variable p
holder.push_back(p);//push p into temporary variable holder
}
}
s=holder;//store string into memory
return s;//return a string
}
int digitcheck(string& s)//function for cheching if there exist a digit or & or dash
{
int c=0;
for(unsigned int i=0;i<s.length();i++)
{
if(isdigit(s[i]||s[i]=='&'||s[i]=='-'))
goto out;
else
++c;
}
out:
if(c!=0)
return c;
else
return 0;

}




================================================
output should look like::

The first posting list[mother]:
===============================
1          2          4        
The second posting list[love]:
===============================
1          2          3        
The intersection of the two posting lists[mother AND love]:
===============================================================
1     2    


Jumapili, 1 Desemba 2013

How to install a LAMP server on fedora 19 - Mysql(MariaDB) , PHP and Apache Webmin STEPS BY STEP

How to install a LAMP server on fedora 19 - Mysql(MariaDB) , PHP and Apache   Webmin
===============================================================
STEPS BY STEP
=============
<1> ::  sudo yum install httpd -y
<2>::   sudo systemctl enable httpd.service
<3>::   sudo yum install mysql mysql-server
<4>::   sudo systemctl enable httpd.service
<5>::   sudo yum install php -y
<6>::   sudo yum install php-mysql -y
<7>::   reboot
<8>::   sudo mysql_secure_installation

webmin installation
ingiza command hiyo

 <9>:: sudo gedit  /etc/yum.repos.d/webmin.repo


<10>:: Andika maneno haya

[Webmin]
name=Webmin Distribution Neutral
#baseurl=http://download.webmin.com/download/yum
mirrorlist=http://download.webmin.com/download/yum/mirrorlist
enabled=1

SAVE/CLOSE

<11>::  wget http://www.webmin.com/jcameron-key.asc
<12>::  sudo rpm –import jcameron-key.asc
<13>:: sudo yum install webmin

ifconfig =======> uangalie ip address inayokuhusu na port # yake na uziweke
kwenye url kama ifutavyo
http://<address><port_number>ENTER
for more info look in:http://www.webmin.com/index.html
*********LET THE PARTY BEGIN********

Jumamosi, 30 Novemba 2013

Cosine Similarity Using C++

/*C++ program to show cosine similarities*/
#include<iostream>//include the library of input output stream
#include<fstream>//Header file for reading the word in the file
#include<vector>//Header file for storing the word in the documents
#include<map>//for relating the terms,frequencies,TF&TF_IDF and cosine smilarities
#include<cmath>//for  doing mathematical calculation
#include<algorithm>//for sorting
using namespace std;//allows us to use the standard library

//string Lower(string& lowercase_term);//prototype for changing words to lowercase
void tf_idf_compute(map<string,int> &frequency,vector<vector<string> > &documents, vector<string> &terms);//prototype for finding a frequency of terms
int main()//where a c++ program starts/execution starts.
{//openining curled brac i.e main()opens
cout<<"Terms\t\t""Term frequency\t\t\t""TF_IDF\t\t""CosineSimilarity";
    cout<<"\n";
    vector<vector<string> > dc;// dictionary
    vector<string> tokens;//container for documents which represent refered vector  in prototype
    map<string,int>S;//A vector "s"that stores int type values.

    tf_idf_compute(S,dc,tokens);//call function for tf_idf
}
void tf_idf_compute(map<string,int> &frequency,vector<vector<string> > &documents,vector<string> &terms)//function for creating a dictionery
{
     fstream file("regs.txt");//opens the file named regs.
       if(!file)// reading file is not found
       {
       cout<<"file not found"<<endl;
       }
       else
       {
         while(!file.eof())//reading file is not found doesnot mark end of fuction
         {
            string hb;//variable of type string for holding a term
            vector<string> words;//container for storing terms before storing the terms into a memory
            while(file>>hb && hb!="#")//condition which direct raeding of documents with specified delimiter as the sign of an end of a document
            {
                   words.push_back(hb);//put terms into a temporary holding vector
                   terms.push_back(hb);//keep terms in memory
                    frequency[hb]++;
                   sort(terms.begin(),terms.end());//sorting the terms
                   terms.erase(unique(terms.begin(),terms.end()),terms.end());//remove term repeatation
                   }
                   if(!words.empty())//if the vector is not empty
           {
           documents.push_back(words);//push the words in temporary vector into the vector of vectors in order to be stored in a memory
           }

        }
          int a;
          vector<int> ting;
          for(int j=0;j<terms.size();j++)
          {
                 a=0;

         for(int i=0;i<documents.size();i++)
         {
                 //finds if a term occurs or doesn't occcur in the document
          vector<string>::const_iterator p=find(documents[i].begin(),documents[i].end(),terms[j]);
          if(p!=documents[i].end())
          {
            a=a+1;
          }
         }
         ting.push_back(a);
        //cout<<cnt<<endl;
         }


        //operation for calculating tf_idf
        map<string,int>::iterator iter;
        float tf_idf;
        float cosine;
        for(iter=frequency.begin();iter!=frequency.end();iter++)
        {
        tf_idf=(1+log10(iter->second))*log10(documents.size()/a);//formular to calculate tf_idf & cosine similarities
        cosine=(tf_idf*iter->second)/(abs(tf_idf)*abs(iter->second));
        cout<<iter->first<<"                  "<<iter->second<<"                       "<<tf_idf<<"              "<<cosine<<endl;
        }

     cin.get();//holds the screen
}
}//closing curled brac (}) i.e main()ends


====================================================================
your .txt file should look like
mine was "regs.txt"below

Information retrieval #
Information retrieval it is a discipline #
organization and storage should provide easy access #

Jumanne, 26 Novemba 2013

Compute Recall Precision and F-Measure using C++


in a .txt file copy and paste the below data 
True-Positives= 20
False-Positives= 40
False-Negatives= 60
True-Negatives= 100
save both in the same folder and run it


Alhamisi, 26 Septemba 2013

Jumatatu, 6 Mei 2013


cygwin

What is it?

Cygwin is a Linux-like environment for Windows. It consists of a DLL (cygwin1.dll), which acts as an emulation layer providing substantial POSIX (Portable Operating System Interface) system call functionality, and a collection of tools, which provide a Linux look and feel. The Cygwin DLL works with all x86 and AMD64 versions of Windows NT since Windows 2000. The API follows the Single Unix Specification as much as possible, and then Linux practice. The major differences between Cygwin and Linux is the C library (newlib instead of glibc).
With Cygwin installed, users have access to many standard UNIX utilities. They can be used from one of the provided shells such as bash or from the Windows Command Prompt. Additionally, programmers may write Win32 console or GUI applications that make use of the standard Microsoft Win32 API and/or the Cygwin API. As a result, it is possible to easily port many significant UNIX programs without the need for extensive changes to the source code. This includes configuring and building most of the available GNU software (including the development tools included with the Cygwin distribution).

Quick Start Guide for those more experienced with Windows

If you are new to the world of UNIX, you may find it difficult to understand at first. This guide is not meant to be comprehensive, so we recommend that you use the many available Internet resources to become acquainted with UNIX basics (search for "UNIX basics" or "UNIX tutorial").
To install a basic Cygwin environment, run the setup.exe program and click Next at each page. The default settings are correct for most users. If you want to know more about what each option means, see the section called “Internet Setup”. Use setup.exe any time you want to update or install a Cygwin package. If you are installing Cygwin for a specific purpose, use it to install the tools that you need. For example, if you want to compile C++ programs, you need the gcc-g++ package and probably a text editor like nano. When running setup.exe, clicking on categories and packages in the package installation screen will provide you with the ability to control what is installed or updated.
Another option is to install everything by clicking on the Default field next to the All category. However, be advised that this will download and install several hundreds of megabytes of software to your computer. The best plan is probably to click on individual categories and install either entire categories or packages from the categories themselves. After installation, you can find Cygwin-specific documentation in the /usr/share/doc/Cygwin/ directory.
Developers coming from a Windows background will be able to write console or GUI executables that rely on the Microsoft Win32 API instead of Cygwin using the mingw32 or mingw64 cross-compiler toolchains. The -shared option to GCC allows to write Windows Dynamically Linked Libraries (DLLs). The resource compiler windres is also provided.

Quick Start Guide for those more experienced with UNIX

If you are an experienced UNIX user who misses a powerful command-line environment, you will enjoy Cygwin. Developers coming from a UNIX background will find a set of utilities they are already comfortable using, including a working UNIX shell. The compiler tools are the standard GNU compilers most people will have previously used under UNIX, only ported to the Windows host. Programmers wishing to port UNIX software to Windows NT will find that the Cygwin library provides an easy way to port many UNIX packages, with only minimal source code changes.
Note that there are some workarounds that cause Cygwin to behave differently than most UNIX-like operating systems; these are described in more detail in the section called “Using Cygwin effectively with Windows”.
Use the graphical command setup.exe any time you want to update or install a Cygwin package. This program must be run manually every time you want to check for updated packages since Cygwin does not currently include a mechanism for automatically detecting package updates.
By default, setup.exe only installs a minimal subset of packages. Add any other packages by clicking on the + next to the Category name and selecting the package from the displayed list. You may search for specfic tools by using the Setup Package Search at the Cygwin web site.
Another option is to install everything by clicking on the Default field next to the All category. However, be advised that this will download and install several hundreds of megabytes of software to your computer. The best plan is probably to click on individual categories and install either entire categories or packages from the categories themselves. After installation, you can find Cygwin-specific documentation in the /usr/share/doc/Cygwin/ directory.
For more information about what each option in setup.exe means, see the section called “Internet Setup”.

Are the Cygwin tools free software?

Yes. Parts are GNU software (gcc, gas, ld, etc.), parts are covered by the standard X11 license, some of it is public domain, some of it was written by Red Hat and placed under the GNU General Public License (GPL). None of it is shareware. You don't have to pay anyone to use it but you should be sure to read the copyright section of the FAQ for more information on how the GNU GPL may affect your use of these tools. If you intend to port a proprietary application using the Cygwin library, you may want the Cygwin proprietary-use license. For more information about the proprietary-use license, please go to http://www.redhat.com/services/custom/cygwin/. Customers of the native Win32 GNUPro should feel free to submit bug reports and ask questions through Red Hat channels. All other questions should be sent to the project mailing list <cygwin@cygwin.com>.

 

A brief history of the Cygwin project

Note

A historical look into the first years of Cygwin development is Geoffrey J. Noer's 1998 paper, "Cygwin32: A Free Win32 Porting Layer for UNIX® Applications" which can be found at the 2nd USENIX Windows NT Symposium Online Proceedings.
Cygwin began development in 1995 at Cygnus Solutions (now part of Red Hat, Inc.). The first thing done was to enhance the development tools (gcc, gdb, gas, etc.) so that they could generate and interpret Win32 native object files. The next task was to port the tools to Win NT/9x. We could have done this by rewriting large portions of the source to work within the context of the Win32 API. But this would have meant spending a huge amount of time on each and every tool. Instead, we took a substantially different approach by writing a shared library (the Cygwin DLL) that adds the necessary UNIX-like functionality missing from the Win32 API (fork, spawn, signals, select, sockets, etc.). We call this new interface the Cygwin API. Once written, it was possible to build working Win32 tools using UNIX-hosted cross-compilers, linking against this library.
From this point, we pursued the goal of producing Windows-hosted tools capable of rebuilding themselves under Windows 9x and NT (this is often called self-hosting). Since neither OS ships with standard UNIX user tools (fileutils, textutils, bash, etc...), we had to get the GNU equivalents working with the Cygwin API. Many of these tools were previously only built natively so we had to modify their configure scripts to be compatible with cross-compilation. Other than the configuration changes, very few source-level changes had to be made since Cygwin provided a UNIX-like API. Running bash with the development tools and user tools in place, Windows 9x and NT looked like a flavor of UNIX from the perspective of the GNU configure mechanism. Self hosting was achieved as of the beta 17.1 release in October 1996.
The entire Cygwin toolset was available as a monolithic install. In April 2000, the project announced a New Cygwin Net Release which provided the native non-Cygwin Win32 program setup.exe to install and upgrade each package separately. Since then, the Cygwin DLL and setup.exe have seen continuous development.
The latest major improvement in this development is the 1.7 release in 2009, which dropped Windows 95/98/Me support in favor of using Windows NT features more extensively. It adds a lot of new features like case-sensitive filenames, NFS interoperability, IPv6 support and much more.

Highlights of Cygwin Functionality

Introduction

When a binary linked against the library is executed, the Cygwin DLL is loaded into the application's text segment. Because we are trying to emulate a UNIX kernel which needs access to all processes running under it, the first Cygwin DLL to run creates shared memory areas and global synchronization objects that other processes using separate instances of the DLL can access. This is used to keep track of open file descriptors and to assist fork and exec, among other purposes. Every process also has a per_process structure that contains information such as process id, user id, signal masks, and other similar process-specific information.
The DLL is implemented as a standard DLL in the Win32 subsystem. Under the hood it's using the Win32 API, as well as the native NT API, where appropriate.

Note

Some restrictions apply for calls to the Win32 API. For details, see the section called “Restricted Win32 environment”, as well as the section called “Using the Win32 file API in Cygwin applications”.
The native NT API is used mainly for speed, as well as to access NT capabilities which are useful to implement certain POSIX features, but are hidden to the Win32 API.
Due to some restrictions in Windows, it's not always possible to strictly adhere to existing UNIX standards like POSIX.1. Fortunately these are mostly corner cases.
Note that many of the things that Cygwin does to provide POSIX compatibility do not mesh well with the native Windows API. If you mix POSIX calls with Windows calls in your program it is possible that you will see uneven results. In particular, Cygwin signals will not work with Windows functions which block and Windows functions which accept filenames may be confused by Cygwin's support for long filenames.

Permissions and Security

Windows NT includes a sophisticated security model based on Access Control Lists (ACLs). Cygwin maps Win32 file ownership and permissions to ACLs by default, on file systems supporting them (usually NTFS). Solaris style ACLs and accompanying function calls are also supported. The chmod call maps UNIX-style permissions back to the Win32 equivalents. Because many programs expect to be able to find the /etc/passwd and /etc/group files, we provide utilities that can be used to construct them from the user and group information provided by the operating system.
Users with Administrator rights are permitted to chown files. With version 1.1.3 Cygwin introduced a mechanism for setting real and effective UIDs. This is described in the section called “Using Windows security in Cygwin”. As of version 1.5.13, the Cygwin developers are not aware of any feature in the Cygwin DLL that would allow users to gain privileges or to access objects to which they have no rights under Windows. However there is no guarantee that Cygwin is as secure as the Windows it runs on. Cygwin processes share some variables and are thus easier targets of denial of service type of attacks.

 

 

File Access

Cygwin supports both POSIX- and Win32-style paths, using either forward or back slashes as the directory delimiter. Paths coming into the DLL are translated from POSIX to native NT as needed. From the application perspective, the file system is a POSIX-compliant one. The implementation details are safely hidden in the Cygwin DLL. UNC pathnames (starting with two slashes) are supported for network paths.
Since version 1.7.0, the layout of this POSIX view of the Windows file system space is stored in the /etc/fstab file. Actually, there is a system-wide /etc/fstab file as well as a user-specific fstab file /etc/fstab.d/${USER}.
At startup the DLL has to find out where it can find the /etc/fstab file. The mechanism used for this is simple. First it retrieves it's own path, for instance C:\Cygwin\bin\cygwin1.dll. From there it deduces that the root path is C:\Cygwin. So it looks for the fstab file in C:\Cygwin\etc\fstab. The layout of this file is very similar to the layout of the fstab file on Linux. Just instead of block devices, the mount points point to Win32 paths. An installation with setup.exe installs a fstab file by default, which can easily be changed using the editor of your choice.
The fstab file allows mounting arbitrary Win32 paths into the POSIX file system space. A special case is the so-called cygdrive prefix. It's the path under which every available drive in the system is mounted under its drive letter. The default value is /cygdrive, so you can access the drives as /cygdrive/c, /cygdrive/d, etc... The cygdrive prefix can be set to some other value (/mnt for instance) in the fstab file(s).
The library exports several Cygwin-specific functions that can be used by external programs to convert a path or path list from Win32 to POSIX or vice versa. Shell scripts and Makefiles cannot call these functions directly. Instead, they can do the same path translations by executing the cygpath utility program that we provide with Cygwin.
Win32 applications handle filenames in a case preserving, but case insensitive manner. Cygwin supports case sensitivity on file systems supporting that. Since Windows XP, the OS only supports case sensitivity when a specific registry value is changed. Therefore, case sensitivity is not usually the default.
Symbolic links are not present and supported on Windows up to and including Windows Server 2003 R2. Native symlinks are available starting with Windows Vista. Due to their strange implementation, however, they are not useful in a POSIX emulation layer. Cygwin recognizes native symlinks, but does not create them.
Symbolic links are potentially created in two different ways. The file style symlinks are files containing a magic cookie followed by the path to which the link points. They are marked with the System DOS attribute so that only files with that attribute have to be read to determine whether or not the file is a symbolic link. The shortcut style symlinks are Windows shortcut files with a special header and the Readonly DOS attribute set. The advantage of file symlinks is speed, the advantage of shortcut symlinks is the fact that they can be utilized by non-Cygwin Win32 tools as well.
Starting with Cygwin 1.7, symbolic links are using UTF-16 to encode the filename of the target file, to better support internationalization. Symlinks created by older Cygwin releases can be read just fine. However, you could run into problems with them if you're now using another character set than the one you used when creating these symlinks (see the section called “Potential Problems when using Locales”. Please note that this new UTF-16 style of symlinks is not compatible with older Cygwin release, which can't read the target filename correctly.
Hard links are fully supported on NTFS and NFS file systems. On FAT and other file systems which don't support hardlinks, the call returns with an error, just like on other POSIX systems.
On file systems which don't support unique persistent file IDs (FAT, older Samba shares) the inode number for a file is calculated by hashing its full Win32 path. The inode number generated by the stat call always matches the one returned in d_ino of the dirent structure. It is worth noting that the number produced by this method is not guaranteed to be unique. However, we have not found this to be a significant problem because of the low probability of generating a duplicate inode number.
Cygwin 1.7 and later supports Extended Attributes (EAs) via the linux-specific function calls getxattr, setxattr, listxattr, and removexattr. All EAs on Samba or NTFS are treated as user EAs, so, if the name of an EA is "foo" from the Windows perspective, it's transformed into "user.foo" within Cygwin. This allows Linux-compatible EA operations and keeps tools like attr, or setfattr happy.
chroot is supported since Cygwin 1.1.3. However, chroot is not a concept known by Windows. This implies some serious restrictions. First of all, the chroot call isn't a privileged call. Any user may call it. Second, the chroot environment isn't safe against native windows processes. Given that, chroot in Cygwin is only a hack which pretends security where there is none. For that reason the usage of chroot is discouraged.

Text Mode vs. Binary Mode

It is often important that files created by native Windows applications be interoperable with Cygwin applications. For example, a file created by a native Windows text editor should be readable by a Cygwin application, and vice versa.
Unfortunately, UNIX and Win32 have different end-of-line conventions in text files. A UNIX text file will have a single newline character (LF) whereas a Win32 text file will instead use a two character sequence (CR+LF). Consequently, the two character sequence must be translated on the fly by Cygwin into a single character newline when reading in text mode.
This solution addresses the newline interoperability concern at the expense of violating the POSIX requirement that text and binary mode be identical. Consequently, processes that attempt to lseek through text files can no longer rely on the number of bytes read to be an accurate indicator of position within the file. For this reason, Cygwin allows you to choose the mode in which a file is read in several ways.

ANSI C Library

We chose to include Red Hat's own existing ANSI C library "newlib" as part of the library, rather than write all of the lib C and math calls from scratch. Newlib is a BSD-derived ANSI C library, previously only used by cross-compilers for embedded systems development. Other functions, which are not supported by newlib have been added to the Cygwin sources using BSD implementations as much as possible.
The reuse of existing free implementations of such things as the glob, regexp, and getopt libraries saved us considerable effort. In addition, Cygwin uses Doug Lea's free malloc implementation that successfully balances speed and compactness. The library accesses the malloc calls via an exported function pointer. This makes it possible for a Cygwin process to provide its own malloc if it so desires.

Process Creation

The fork call in Cygwin is particularly interesting because it does not map well on top of the Win32 API. This makes it very difficult to implement correctly. Currently, the Cygwin fork is a non-copy-on-write implementation similar to what was present in early flavors of UNIX.
The first thing that happens when a parent process forks a child process is that the parent initializes a space in the Cygwin process table for the child. It then creates a suspended child process using the Win32 CreateProcess call. Next, the parent process calls setjmp to save its own context and sets a pointer to this in a Cygwin shared memory area (shared among all Cygwin tasks). It then fills in the child's .data and .bss sections by copying from its own address space into the suspended child's address space. After the child's address space is initialized, the child is run while the parent waits on a mutex. The child discovers it has been forked and longjumps using the saved jump buffer. The child then sets the mutex the parent is waiting on and blocks on another mutex. This is the signal for the parent to copy its stack and heap into the child, after which it releases the mutex the child is waiting on and returns from the fork call. Finally, the child wakes from blocking on the last mutex, recreates any memory-mapped areas passed to it via the shared area, and returns from fork itself.
While we have some ideas as to how to speed up our fork implementation by reducing the number of context switches between the parent and child process, fork will almost certainly always be inefficient under Win32. Fortunately, in most circumstances the spawn family of calls provided by Cygwin can be substituted for a fork/exec pair with only a little effort. These calls map cleanly on top of the Win32 API. As a result, they are much more efficient. Changing the compiler's driver program to call spawn instead of fork was a trivial change and increased compilation speeds by twenty to thirty percent in our tests.
However, spawn and exec present their own set of difficulties. Because there is no way to do an actual exec under Win32, Cygwin has to invent its own Process IDs (PIDs). As a result, when a process performs multiple exec calls, there will be multiple Windows PIDs associated with a single Cygwin PID. In some cases, stubs of each of these Win32 processes may linger, waiting for their exec'd Cygwin process to exit.

Problems with process creation

The semantics of fork require that a forked child process have exactly the same address space layout as its parent. However, Windows provides no native support for cloning address space between processes and several features actively undermine a reliable fork implementation. Three issues are especially prevalent:
  • DLL base address collisions. Unlike *nix shared libraries, which use "position-independent code", Windows shared libraries assume a fixed base address. Whenever the hard-wired address ranges of two DLLs collide (which occurs quite often), the Windows loader must "rebase" one of them to a different address. However, it may not resolve collisions consistently, and may rebase a different dll and/or move it to a different address every time. Cygwin can usually compensate for this effect when it involves libraries opened dynamically, but collisions among statically-linked dlls (dependencies known at compile time) are resolved before cygwin1.dll initializes and cannot be fixed afterward. This problem can only be solved by removing the base address conflicts which cause the problem, usually using the rebaseall tool.
  • Address space layout randomization (ASLR). Starting with Vista, Windows implements ASLR, which means that thread stacks, heap, memory-mapped files, and statically-linked dlls are placed at different (random) locations in each process. This behaviour interferes with a proper fork, and if an unmovable object (process heap or system dll) ends up at the wrong location, Cygwin can do nothing to compensate (though it will retry a few times automatically).
  • DLL injection by BLODA. Badly-behaved applications which inject dlls into other processes often manage to clobber important sections of the child's address space, leading to base address collisions which rebasing cannot fix. The only way to resolve this problem is to remove (usually uninstall) the offending app. See the section called “Implemented options” for the detect_bloda option, which may be able to identify the BLODA.
In summary, current Windows implementations make it impossible to implement a perfectly reliable fork, and occasional fork failures are inevitable.

Signals

When a Cygwin process starts, the library starts a secondary thread for use in signal handling. This thread waits for Windows events used to pass signals to the process. When a process notices it has a signal, it scans its signal bitmask and handles the signal in the appropriate fashion.
Several complications in the implementation arise from the fact that the signal handler operates in the same address space as the executing program. The immediate consequence is that Cygwin system functions are interruptible unless special care is taken to avoid this. We go to some lengths to prevent the sig_send function that sends signals from being interrupted. In the case of a process sending a signal to another process, we place a mutex around sig_send such that sig_send will not be interrupted until it has completely finished sending the signal.
In the case of a process sending itself a signal, we use a separate semaphore/event pair instead of the mutex. sig_send starts by resetting the event and incrementing the semaphore that flags the signal handler to process the signal. After the signal is processed, the signal handler signals the event that it is done. This process keeps intraprocess signals synchronous, as required by POSIX.
Most standard UNIX signals are provided. Job control works as expected in shells that support it.

Sockets

Socket-related calls in Cygwin basically call the functions by the same name in Winsock, Microsoft's implementation of Berkeley sockets, but with lots of tweaks. All sockets are non-blocking under the hood to allow to interrupt blocking calls by POSIX signals. Additional bookkeeping is necessary to implement correct socket sharing POSIX semantics and especially for the select call. Some socket-related functions are not implemented at all in Winsock, as, for example, socketpair. Starting with Windows Vista, Microsoft removed the legacy calls rcmd(3), rexec(3) and rresvport(3). Recent versions of Cygwin now implement all these calls internally.
An especially troublesome feature of Winsock is that it must be initialized before the first socket function is called. As a result, Cygwin has to perform this initialization on the fly, as soon as the first socket-related function is called by the application. In order to support sockets across fork calls, child processes initialize Winsock if any inherited file descriptor is a socket.
AF_UNIX (AF_LOCAL) sockets are not available in Winsock. They are implemented in Cygwin by using local AF_INET sockets instead. This is completely transparent to the application. Cygwin's implementation also supports the getpeereid BSD extension. However, Cygwin does not yet support descriptor passing.
IPv6 is supported beginning with Cygwin release 1.7.0. This support is dependent, however, on the availability of the Windows IPv6 stack. The IPv6 stack was "experimental", i.e. not feature complete in Windows 2003 and earlier. Full IPv6 support became available starting with Windows Vista and Windows Server 2008. Cygwin does not depend on the underlying OS for the (newly implemented) getaddrinfo and getnameinfo functions. Cygwin 1.7.0 adds replacement functions which implement the full functionality for IPv4.

Select

The UNIX select function is another call that does not map cleanly on top of the Win32 API. Much to our dismay, we discovered that the Win32 select in Winsock only worked on socket handles. Our implementation allows select to function normally when given different types of file descriptors (sockets, pipes, handles, and a custom /dev/windows Windows messages pseudo-device).
Upon entry into the select function, the first operation is to sort the file descriptors into the different types. There are then two cases to consider. The simple case is when at least one file descriptor is a type that is always known to be ready (such as a disk file). In that case, select returns immediately as soon as it has polled each of the other types to see if they are ready. The more complex case involves waiting for socket or pipe file descriptors to be ready. This is accomplished by the main thread suspending itself, after starting one thread for each type of file descriptor present. Each thread polls the file descriptors of its respective type with the appropriate Win32 API call. As soon as a thread identifies a ready descriptor, that thread signals the main thread to wake up. This case is now the same as the first one since we know at least one descriptor is ready. So select returns, after polling all of the file descriptors one last time.

What's new and what changed in Cygwin 1.7

What's new and what changed from 1.7.16 to 1.7.17

  • Support the "e" flag to fopen(3). This is a Glibc extension which allows to fopen the file with the O_CLOEXEC flag set.
  • Support the "x" flag to fopen(3). This is a Glibc/C11 extension which allows to open the file with the O_EXCL flag set.

What's new and what changed from 1.7.15 to 1.7.16

  • New API: getmntent_r, memrchr.
  • Recognize ReFS filesystem.

What's new and what changed from 1.7.14 to 1.7.15

  • CYGWIN=pipe_byte option now forces the opening of pipes in byte mode rather than message mode.

What's new and what changed from 1.7.13 to 1.7.14

  • Add mouse reporting modes 1005, 1006 and 1015 to console window.

What's new and what changed from 1.7.12 to 1.7.13

  • mkpasswd and mkgroup now try to print an entry for the TrustedInstaller account existing since Windows Vista/Server 2008.
  • Terminal typeahead when switching from canonical to non-canonical mode is now properly flushed.

What's new and what changed from 1.7.11 to 1.7.12

  • Cygwin now automatically populates the /dev directory with all existing POSIX devices.
  • Add virtual /proc/PID/mountinfo file.
  • flock now additionally supports the following scenario, which requires to propagate locks to the parent process:
  •     (
  •       flock -n 9 || exit 1
  •       # ... commands executed under lock ...
  •     } 9>/var/lock/mylockfile
  
Only propagation to the direct parent process is supported so far, not to grand parents or sibling processes.
  • Add a "detect_bloda" setting for the CYGWIN environment variable to help finding potential BLODAs.

What's new and what changed from 1.7.10 to 1.7.11

  • New pldd command for listing DLLs loaded by a process.
  • New API: scandirat.
  • Change the way remote shares mapped to drive letters are recognized when creating the cygdrive directory. If Windows claims the drive is unavailable, don't show it in the cygdrive directory listing.
  • Raise default stacksize of pthreads from 512K to 1 Meg. It can still be changed using the pthread_attr_setstacksize call.

What's new and what changed from 1.7.9 to 1.7.10

  • Drop support for Windows NT4.
  • The CYGWIN environment variable options "envcache", "strip_title", "title", "tty", and "upcaseenv" have been removed.
  • If the executable (and the system) is large address aware, the application heap will be placed in the large memory area. The peflags tool from the rebase package can be used to set the large address awareness flag in the executable file header.
  • The registry setting "heap_chunk_in_mb" has been removed, in favor of a new per-executable setting in the executable file header which can be set using the peflags tool. See the section called “Changing Cygwin's Maximum Memory” for more information.
  • The CYGWIN=tty mode using pipes to communicate with the console in a pseudo tty-like mode has been removed. Either just use the normal Windows console as is, or use a terminal application like mintty.
  • New getconf command for querying confstr(3), pathconf(3), sysconf(3), and limits.h configuration.
  • New tzset utility to generate a POSIX-compatible TZ environment variable from the Windows timezone settings.
  • The passwd command now allows an administrator to use the -R command for other user accounts: passwd -R username.
  • Pthread spinlocks. New APIs: pthread_spin_destroy, pthread_spin_init, pthread_spin_lock, pthread_spin_trylock, pthread_spin_unlock.
  • Pthread stack address management. New APIs: pthread_attr_getstack, pthread_attr_getstackaddr, pthread_attr_getguardsize, pthread_attr_setstack, pthread_attr_setstackaddr, pthread_attr_setguardsize, pthread_getattr_np.
  • POSIX Clock Selection option. New APIs: clock_nanosleep, pthread_condattr_getclock, pthread_condattr_setclock.
  • clock_gettime(3) and clock_getres(3) accept per-process and per-thread CPU-time clocks, including CLOCK_PROCESS_CPUTIME_ID and CLOCK_THREAD_CPUTIME_ID. New APIs: clock_getcpuclockid, pthread_getcpuclockid.
  • GNU/glibc error.h error reporting functions. New APIs: error, error_at_line. New exports: error_message_count, error_one_per_line, error_print_progname. Also, perror and strerror_r no longer clobber strerror storage.
  • C99 <tgmath.h> type-generic macros.
  • /proc/loadavg now shows the number of currently running processes and the total number of processes.
  • Added /proc/devices and /proc/misc, which lists supported device types and their device numbers.
  • Added /proc/swaps, which shows the location and size of Windows paging file(s).
  • Added /proc/sysvipc/msg, /proc/sysvipc/sem, and /proc/sysvipc/shm which provide information about System V IPC message queues, semaphores, and shared memory.
  • /proc/version now shows the username of whomever compiled the Cygwin DLL as well as the version of GCC used when compiling.
  • dlopen now supports the Glibc-specific RTLD_NODELETE and RTLD_NOOPEN flags.
  • The printf(3) and wprintf(3) families of functions now handle the %m conversion flag.
  • Other new API: clock_settime, __fpurge, getgrouplist, get_current_dir_name, getpt, ppoll, psiginfo, psignal, ptsname_r, sys_siglist, pthread_setschedprio, pthread_sigqueue, sysinfo.

What's new and what changed from 1.7.8 to 1.7.9

  • New API: strchrnul.

What's new and what changed from 1.7.7 to 1.7.8

  • Drop support for Windows NT4 prior to Service Pack 4.
  • Reinstantiate Cygwin's ability to delete an empty directory which is the current working directory of the same or another process. Same for any other empty directory which has been opened by the same or another process.
  • Cygwin now ships the C standard library fenv.h header file, and implements the related APIs (including GNU/glibc extensions): feclearexcept, fedisableexcept, feenableexcept, fegetenv, fegetexcept, fegetexceptflag, fegetprec, fegetround, feholdexcept, feraiseexcept, fesetenv, fesetexceptflag, fesetprec, fesetround, fetestexcept, feupdateenv, and predefines both default and no-mask FP environments. See the GNU C Library manual for full details of this functionality.
  • Support for the C99 complex functions, except for the "long double" implementations. New APIs: cacos, cacosf, cacosh, cacoshf, carg, cargf, casin, casinf, casinh, casinhf, catan, catanf, catanh, catanhf, ccos, ccosf, ccosh, ccoshf, cexp, cexpf, cimag, cimagf, clog, clogf, conj, conjf, cpow, cpowf, cproj, cprojf, creal, crealf, csin, csinf, csinh, csinhf, csqrt, csqrtf, ctan, ctanf, ctanh, ctanhf.
  • Fix the width of "CJK Ambiguous Width" characters to 1 for singlebyte charsets and 2 for East Asian multibyte charsets. (For UTF-8, it remains dependent on the specified language, and the "@cjknarrow" locale modifier can still be used to force width 1.)
  • The strerror_r interface now has two flavors; if _GNU_SOURCE is defined, it retains the previous behavior of returning char * (but the result is now guaranteed to be NUL-terminated); otherwise it now obeys POSIX semantics of returning int.
  • /proc/sys now allows unfiltered access to the native NT namespace. Access restrictions still apply. Direct device access via /proc/sys is not yet supported. File system access via block devices works. For instance (note the trailing slash!)
  • bash$ cd /proc/sys/Device/HarddiskVolumeShadowCopy1/
  • Other new APIs: llround, llroundf, madvise, pthread_yield. Export program_invocation_name, program_invocation_short_name. Support TIOCGPGRP, TIOCSPGRP ioctls.

What's new and what changed from 1.7.6 to 1.7.7

What's new and what changed from 1.7.5 to 1.7.6

  • Add new mount options "dos" and "ihash" to allow overriding Cygwin default behaviour on broken filesystems not recognized by Cygwin.
  • Add new mount option "bind" to allow remounting parts of the POSIX file hirarchy somewhere else.
  • Ttys and ptys are handled as securable objects using file-like permissions and owner/group information. chmod and chown now work on ttys/ptys. A new mechanism is used to propagate pty handles safely to other processes, which does not require to use Cygserver.
  • Pass on coresize settings made with setrlimit(2). This allows shells to disable creating stackdump files in child processes via
ulimit -c 0
in bash or
limit coredumpsize 0
in tcsh.
  • Locale categories contain all localization strings additionally as wide-char strings. locale(1) prints these values just as on Linux. nl_langinfo(3) allows to fetch them.
  • New interfaces mkostemp(3) and mkostemps(3) are added.
  • New virtual file /proc/filesystems.
  • clock_gettime(3) and clock_getres(3) accept CLOCK_MONOTONIC.
  • DEPRECATED with 1.7.7: Cygwin handles the current working directory entirely on its own. The Win32 current working directory is set to an invalid path to be out of the way. [...]

What's new and what changed from 1.7.3 to 1.7.5

  • Support for DEC Backarrow Key Mode escape sequences (ESC [ ? 67 h, ESC [ ? 67 l) in Windows console.

What's new and what changed from 1.7.2 to 1.7.3

  • Support for GB2312/EUC-CN. These charsets are implemented as aliases to GBK. GB2312 is now the default charset name for the locales zh_CN and zh_SG, just as on Linux.
  • Modification and access timestamps of devices reflect the current time.

What's new and what changed from 1.7.1 to 1.7.2

  • Localization support has been much improved.
    • Cygwin now handles locales using the underlying Windows locale support. The locale must exist in Windows to be recognized. Locale aliases from the file /usr/share/locale/locale.alias are also allowed, as long as their replacement is supported by the underlying Windows.
    • New tool "locale" to fetch locale information and default locales based on the Windows default settings as well as lists of all supported locales and character sets.
    • Default charset for locales without explicit charset is now chosen from a list of Linux-compatible charsets.
For instance: en_US -> ISO-8859-1, ja_JP -> EUC-JP, zh_TW -> Big5.
    • Added support for the charsets GEORGIAN-PS, PT154, and TIS-620.
    • Support for the various locale modifiers to switch charsets as on Linux.
    • Default charset in the "C" or "POSIX" locale has been changed back from UTF-8 to ASCII, to avoid problems with applications expecting a singlebyte charset in the "C"/"POSIX" locale. Still use UTF-8 internally for filename conversion in this case.
    • LC_COLLATE, LC_MONETARY, LC_NUMERIC, and LC_TIME localization is enabled via Windows locale support. LC_MESSAGES is enabled via a big table with localized strings.
    • fnmatch(3), regcomp(3), regexec(3) calls are now multibyte-aware.
    • printf(3), wprintf(3) families of functions now handle the grouping flag, the apostrophe ', per POSIX-1.2008. The integer portion of the result of a decimal conversion (%i, %d, %u, %f, %F, %g, %G) will be formatted with thousands' grouping characters.
    • strftime(3), wcsftime(3), and strptime(3) now handle the E and O format modifiers to print/scan alternative date and time representations or to use alternative digits in locales which support this. Additionally these functions now also support the padding modifiers '0' and '+', as well as a field width per POSIX-1.2008.
    • New strfmon(3) call.
  • Support open(2) flags O_CLOEXEC and O_TTY_INIT flags. Support fcntl flag F_DUPFD_CLOEXEC. Support socket flags SOCK_CLOEXEC and SOCK_NONBLOCK. Add new Linux-compatible API calls accept4(2), dup3(2), and pipe2(2). Support the signal SIGPWR.
  • Enhanced Windows console support.
    • The console's backspace keycode can be changed using 'stty erase'.
    • Function keys send distinguished escape sequences compatible with rxvt. Keypad keys send distinguished escape sequences, xterm-style.
    • Support of combining Alt and AltGr modifiers in console window (compatible with xterm and mintty), so that e.g. Alt-@ sends ESC @ also on keyboards where @ is mapped to an AltGr combination.
    • Report mouse wheel scroll events in mouse reporting mode 1000 (note: this doesn't seem to work on all systems, assumedly due to driver interworking issues). Add mouse reporting mode 1002 to report mouse drag movement. Add mouse reporting mode 1003 to report any mouse movement. Add focus event reporting (mode 1004), compatible with xterm and mintty.
    • Add escape sequences for not bold (22), not invisible (28), not blinking (25) (compatible with xterm and mintty).
    • Support VT100 line drawing graphics mode in console window (compatible with xterm and mintty).
  • Handle native DOS paths always as if mounted with "posix=0,noacl".
  • Handle UNC paths starting with slashes identical to /cygdrive paths. In other words, use the /cygdrive mount flags for these paths as well.
  • Recognize NWFS filesystem and workaround broken OS call.
  • New support for eXtensible Data Record (XDR) encoding and decoding, as defined by RFCs 1014, 1832, and 4506. The XDR protocol and functions are useful for cross-platfrom data exchange, and are commonly used as the core data interchange format for Remote Procedure Call (RPC) and NFS.

OS related changes

  • Windows 95, 98 and Me are not supported anymore. The new Cygwin 1.7 DLL will not run on any of these systems.
  • Add support for Windows 7 and Windows Server 2008 R2.

File Access related changes

  • Mount points are no longer stored in the registry. Use /etc/fstab and /etc/fstab.d/$USER instead. Mount points created with mount(1) are only local to the current session and disappear when the last Cygwin process in the session exits.
  • Cygwin creates the mount points for /, /usr/bin, and /usr/lib automatically from it's own position on the disk. They don't have to be specified in /etc/fstab.
  • If a filename cannot be represented in the current character set, the character will be converted to a sequence Ctrl-X + UTF-8 representation of the character. This allows to access all files, even those not having a valid representation of their filename in the current character set. To always have a valid string, use the UTF-8 charset by setting the environment variable $LANG, $LC_ALL, or $LC_CTYPE to a valid POSIX value, such as "en_US.UTF-8".
  • PATH_MAX is now 4096. Internally, path names can be as long as the underlying OS can handle (32K).
  • struct dirent now supports d_type, filled out with DT_REG or DT_DIR. All other file types return as DT_UNKNOWN for performance reasons.
  • The CYGWIN environment variable options "ntsec" and "smbntsec" have been replaced by the per-mount option "acl"/"noacl".
  • The CYGWIN environment variable option "ntea" has been removed without substitute.
  • The CYGWIN environment variable option "check_case" has been removed in favor of real case-sensitivity on file systems supporting it.
  • Creating filenames with special DOS characters '"', '*', ':', '<', '>', '|' is supported.
  • Creating files with special DOS device filename components ("aux", "nul", "prn") is supported.
  • File names are case sensitive if the OS and the underlying file system supports it. Works on NTFS and NFS. Does not work on FAT and Samba shares. Requires to change a registry key (see the User's Guide). Can be switched off on a per-mount basis.
  • Due to the above changes, managed mounts have been removed.
  • Incoming DOS paths are always handled case-insensitive and get no POSIX permission, as if they are mounted with noacl,posix=0 mount flags.
  • unlink(2) and rmdir(2) try very hard to remove files/directories even if they are currently accessed or locked. This is done by utilizing the hidden recycle bin directories and marking the files for deletion.
  • rename(2) rewritten to be more POSIX conformant.
  • access(2) now performs checks using the real user ID, as required by POSIX; the old behavior of querying based on effective user ID is available through the new faccessat(2) and euidaccess(2) APIs.
  • Add st_birthtim member to struct stat.
  • File locking is now advisory, not mandatory anymore. The fcntl(2) and the new lockf(2) APIs create and maintain locks with POSIX semantics, the flock(2) API creates and maintains locks with BSD semantics. POSIX and BSD locks are independent of each other.
  • Implement atomic O_APPEND mode.
  • New open(2) flags O_DIRECTORY, O_EXEC and O_SEARCH.
  • Make the "plain file with SYSTEM attribute set" style symlink default again when creating symlinks. Only create Windows shortcut style symlinks if CYGWIN=winsymlinks is set in the environment.
  • Symlinks now use UTF-16 encoding for the target filename for better internationalization support. Cygwin 1.7 can read all old style symlinks, but the new style is not compatible with older Cygwin releases.
  • Handle NTFS native symlinks available since Vista/2008 as symlinks (but don't create Vista/2008 symlinks due to unfortunate OS restrictions).
  • Recognize NFS shares and handle them using native mechanisms. Recognize and create real symlinks on NFS shares. Get correct stat(2) information and set real mode bits on open(2), mkdir(2) and chmod(2).
  • Recognize MVFS and workaround problems manipulating metadata and handling DOS attributes.
  • Recognize Netapp DataOnTap drives and fix inode number handling.
  • Recognize Samba version beginning with Samba 3.0.28a using the new extended version information negotiated with the Samba developers.
  • Stop faking hardlinks by copying the file on filesystems which don't support hardlinks natively (FAT, FAT32, etc.). Just return an error instead, just like Linux.
  • List servers of all accessible domains and workgroups in // instead of just the servers in the own domain/workgroup.
  • Support Linux-like extended attributes ([fl]getxattr, [fl]listxattr, [fl]setxattr, [fl]removexattr).
  • New file conversion API for conversion from Win32 to POSIX path and vice versa (cygwin_conv_path, cygwin_create_path, cygwin_conv_path_list).
  • New openat family of functions: openat, faccessat, fchmodat, fchownat, fstatat, futimesat, linkat, mkdirat, mkfifoat, mknodat, readlinkat, renameat, symlinkat, unlinkat.
  • Other new APIs: posix_fadvise, posix_fallocate, funopen, fopencookie, open_memstream, open_wmemstream, fmemopen, fdopendir, fpurge, mkstemps, eaccess, euidaccess, canonicalize_file_name, fexecve, execvpe.

Network related changes

  • New implementation for blocking sockets and select on sockets which is supposed to allow POSIX-compatible sharing of sockets between threads and processes.
  • send/sendto/sendmsg now send data in 64K chunks to circumvent an internal buffer problem in WinSock (KB 201213).
  • New send/recv option MSG_DONTWAIT.
  • IPv6 support. New APIs getaddrinfo, getnameinfo, freeaddrinfo, gai_strerror, in6addr_any, in6addr_loopback. On IPv6-less systems, replacement functions are available for IPv4. On systems with IPv6 enabled, the underlying WinSock functions are used. While I tried hard to get the functionality as POSIXy as possible, keep in mind that a *fully* conformant implementation of getaddrinfo and other stuff is only available starting with Windows Vista/2008.
  • Resolver functions (res_init, res_query, res_search, res_querydomain, res_mkquery, res_send, dn_comp, dn_expand) are now part of Cygwin. Applications don't have to link against minires anymore. Actually, this *is* the former libminires.a.
  • rcmd is now implemented inside of Cygwin, instead of calling the WinSock function. This allows rsh(1) usage on Vista/2008 and later, which dropped this function from WinSock.
  • Define multicast structures in netinet/in.h. Note that fully conformant multicast support is only available beginning with Vista/2008.
  • Improve get_ifconf. Redefine struct ifreq and subsequent datastructures to be able to keep more information. Support SIOCGIFINDEX, SIOCGIFDSTADDR and the Cygwin specific SIOCGIFFRNDLYNAM. Support real interface flags on systems supporting them.
  • Other new APIs: bindresvport, bindresvport_sa, gethostbyname2, iruserok_sa, rcmd_af, rresvport_af. getifaddrs, freeifaddrs, if_nametoindex, if_indextoname, if_nameindex, if_freenameindex.
  • Add /proc/net/if_inet6.

Device related changes

  • Reworked pipe implementation which uses overlapped IO to create more reliable interruptible pipes and fifos.
  • The CYGWIN environment variable option "binmode" has been removed.
  • Improved fifo handling by using native Windows named pipes.
  • Detect when a stdin/stdout which looks like a pipe is really a tty. Among other things, this allows a debugged application to recognize that it is using the same tty as the debugger.
  • Support UTF-8 in console window.
  • In the console window the backspace key now emits DEL (0x7f) instead of BS (0x08), Alt-Backspace emits ESC-DEL (0x1b,0x7f) instead of DEL (0x7f), same as the Linux console and xterm. Control-Space now emits an ASCII NUL (0x0) character.
  • Support up to 64 serial interfaces using /dev/ttyS0 - /dev/ttyS63.
  • Support up to 128 raw disk drives /dev/sda - /dev/sddx.
  • New API: cfmakeraw, get_avphys_pages, get_nprocs, get_nprocs_conf, get_phys_pages, posix_openpt.

 

Other POSIX related changes

  • A lot of character sets are supported now via a call to setlocale(). The setting of the environment variables $LANG, $LC_ALL or $LC_CTYPE will be used. For instance, setting $LANG to "de_DE.ISO-8859-15" before starting a Cygwin session will use the ISO-8859-15 character set in the entire session. The default locale in the absence of one of the aforementioned environment variables is "C.UTF-8".
The full list of supported character sets: "ASCII", "ISO-8859-x" with x in 1-16, except 12, "UTF-8", Windows codepages "CPxxx", with xxx in (437, 720, 737, 775, 850, 852, 855, 857, 858, 862, 866, 874, 1125, 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258), "KOI8-R", "KOI8-U", "SJIS", "GBK", "eucJP", "eucKR", and "Big5".
  • Allow multiple concurrent read locks per thread for pthread_rwlock_t.
  • Implement pthread_kill(thread, 0) as per POSIX.
  • New API for POSIX IPC: Named semaphores: sem_open, sem_close, sem_unlink. Message queues: mq_open, mq_getattr, mq_setattr, mq_notify, mq_send, mq_timedsend, mq_receive, mq_timedreceive, mq_close, mq_unlink. Shared memory: shm_open, shm_unlink.
  • Only declare expected functions in <strings.h>, don't include< string.h> from here.
  • Support for WCONTINUED, WIFCONTINUED() added to waitpid and wait4.
  • New APIs: _Exit, confstr, insque, remque, sys_sigabbrev, posix_madvise, posix_memalign, reallocf, exp10, exp10f, pow10, pow10f, lrint, lrintf, rint, rintf, llrint, llrintf, llrintl, lrintl, rintl, mbsnrtowcs, strcasestr, stpcpy, stpncpy, wcpcpy, wcpncpy, wcsnlen, wcsnrtombs, wcsftime, wcstod, wcstof, wcstoimax, wcstok, wcstol, wcstoll, wcstoul, wcstoull, wcstoumax, wcsxfrm, wcscasecmp, wcsncasecmp, fgetwc, fgetws, fputwc, fputws, fwide, getwc, getwchar, putwc, putwchar, ungetwc, asnprintf, dprintf, vasnprintf, vdprintf, wprintf, fwprintf, swprintf, vwprintf, vfwprintf, vswprintf, wscanf, fwscanf, swscanf, vwscanf, vfwscanf, vswscanf.

Security related changes

  • Getting a domain user's groups is hopefully more bulletproof now.
  • Cygwin now comes with a real LSA authentication package. This must be manually installed by a privileged user using the /bin/cyglsa-config script. The advantages and disadvantages are noted in http://cygwin.com/ml/cygwin-developers/2006-11/msg00000.html
  • Cygwin now allows storage and use of user passwords in a hidden area of the registry. This is tried first when Cygwin is called by privileged processes to switch the user context. This allows, for instance, ssh public key sessions with full network credentials to access shares on other machines.
  • New options have been added to the mkpasswd and mkgroup tools to ease use in multi-machine and multi-domain environments. The existing options have a slightly changed behaviour.

Miscellaneous

  • New ldd utility, similar to Linux.
  • New link libraries libdl.a, libresolv.a, librt.a.
  • Fallout from the long path names: If the current working directory is longer than 260 bytes, or if the current working directory is a virtual path (like /proc, /cygdrive, //server), don't call native Win32 programs since they don't understand these paths.
  • On the first usage of a DOS path (C:\foo, \\foo\bar), the Cygwin DLL emits a scary warning that DOS paths shouldn't be used. This warning may be disabled via the new CYGWIN=nodosfilewarning setting.
  • The CYGWIN environment variable option "server" has been removed. Cygwin automatically uses cygserver if it's available.
  • Allow environment of arbitrary size instead of a maximum of 32K.
  • Don't force uppercase environment when started from a non-Cygwin process. Except for certain Windows and POSIX variables which are always uppercased, preserve environment case. Switch back to old behaviour with the new CYGWIN=upcaseenv setting.
  • Detect and report a missing DLL on process startup.
  • Add /proc/registry32 and /proc/registry64 paths to access 32 bit and 64 bit registry on 64 bit systems.
  • Add the ability to distinguish registry keys and registry values with the same name in the same registry subtree. The key is called "foo" and the value will be called "foo%val" in this case.
  • Align /proc/cpuinfo more closly to Linux content.
  • Add /proc/$PID/mounts entries and a symlink /proc/mounts pointing to /proc/self/mounts as on Linux.
  • Optimized strstr and memmem implementation.
  • Remove backwards compatibility with old signal masks. (Some *very* old programs which use signal masks may no longer work correctly).
  • Cygwin now exports wrapper functions for libstdc++ operators new and delete, to support the toolchain in implementing full C++ standards conformance when working with shared libraries.
  • Different Cygwin installations in different paths can be run in parallel without knowing of each other. The path of the Cygwin DLL used in a process is a key used when creating IPC objects. So different Cygwin DLLs are running in different namespaces.
  • Each Cygwin DLL stores its path and installation key in the registry. This allows troubleshooting of problems which could be a result of having multiple concurrent Cygwin installations. 
HOW DO YOU COMPILE AND RUN A PROGRAM WITH IT