"utility.h" file should contain the following lines
#include<iostream>
#include<fstream>
#include<vector>
#include<algorithm>
#include<string>
#include<map>
---------------------------------------------------------------------------------------------
"yangu.txt" should contain the following but not necessarily for the purpose of this program::
MY MOTHER love me too much #
i love my mother too #
do you love me too #
or only you care about your mother #
-----------------------------------------------------------------------------------------------
#include"utility.h"
using namespace std;
//list of function prototypes
string Lower(string& lowercase_term);
string removepunc(string& s);
int digitcheck(string& s);
void dictionery(vector<string> &terms,vector<vector<string> > &documents);
void post1(vector<string> &terms,vector<vector<string> > &documents, vector<int> &take,int
&docID);
void post2(vector<string> &terms,vector<vector<string> > &documents, vector<int> &take1,int
&docID);
void inter2_post(vector<int> &take,vector<int> &take1,vector<vector<string> > &documents);
int main()//main function
{
//declarations
vector<string> v;
vector<int> tk;
vector<int> tk1;
vector<vector<string> > vv;
int ID;
cout<<"THE OUTPUT WILL BE WRITTEN IN TEXT FILE NAMED posting "<<endl;
dictionery(v,vv);//call for dictionery
post1(v,vv,tk,ID);//call for posting creation
post2(v,vv,tk1,ID);//call for posting creation
inter2_post(tk,tk1,vv);
system("pause");//hold the screen
return 0;//exit
}
void dictionery(vector<string> &terms,vector<vector<string> > &documents)//function for
creating dictionery
{
ifstream mem("yangu.txt");
if(!mem)
{
cout<<"The file for documents not created"<<endl;
}
else
{
while(!mem.eof())
{
string word;
vector<string> words;
while(mem>>word && word!="#")
{
Lower(word);
digitcheck(word);
removepunc(word);
words.push_back(word);
terms.push_back(word);
sort(terms.begin(),terms.end());//sorting the terms
terms.erase(unique(terms.begin(),terms.end()),terms.end());//remove term
repeatation
}
if(!words.empty())
{
documents.push_back(words);
}
}
}
}
void post1(vector<string> &terms,vector<vector<string> > &documents,vector<int> &take,int
&docID)//function for creating posting list in text file
{
ifstream file("yangu.txt");//an output file stream obtained here
for(int i=0;i<documents.size();i++)
{
vector<string>::const_iterator p=find(documents[i].begin(),documents[i].end(),
"mother");//searching term in each document
if(p!=documents[i].end())
{
docID=i+1; //store the document ID
}
take.push_back(docID);//put the document IDs into vector
}
take.erase(unique(take.begin(),take.end()),take.end());//remove term repeatation
ofstream outputFile("posting.txt");
outputFile<<"The first posting list[mother]:"<<endl;
outputFile<<"==============================="<<endl;
for(int i=0;i<take.size();i++)
outputFile<<take[i]<<" ";
outputFile<<endl;
}
void post2(vector<string> &terms,vector<vector<string> > &documents,vector<int> &take1,int
&docID)//function for creating posting list in text file
{
ifstream file("yangu.txt");//an output file stream obtained here
for(int i=0;i<documents.size();i++)
{
vector<string>::const_iterator p=find(documents[i].begin(),documents[i].end(),
"love");//searching term in each document
if(p!=documents[i].end())
{
docID=i+1;//store the document ID
}
take1.push_back(docID);//put the document IDs into vector
take1.erase(unique(take1.begin(),take1.end()),take1.end());//remove term
repeatation
}
ofstream outputFile("posting.txt",ios::app);
outputFile<<"The second posting list[love]:"<<endl;
outputFile<<"==============================="<<endl;
for(int i=0;i<take1.size();i++)
outputFile<<take1[i]<<" ";
outputFile<<endl;
}
void inter2_post(vector<int> &take,vector<int> &take1,vector<vector<string> > &documents)
{
ofstream outputFile("posting.txt",ios::app);
outputFile<<"The intersection of the two posting lists"<<"[mother AND love]:"<<endl;
outputFile<<"==============================================================="<<endl
;
for(int i=0;i<1;i++)
{
for(int j=0;j<take1.size();j++)
{
if(take[i]==take1[j])
{
outputFile<<take[i]<<" ";//Output the intersected posting
take[i]=take[i+1];//update the posting
take1[j]=take[j+1];//update the posting
}
else
if(take[i]<take1[j])
{
take[i]=take[i+1];//update the posting
}
else
{
take1[j]=take1[j+1];//update the posting
}
}
}
}
string Lower(string& lowercase_term)//function for converting terms to lowercase
{
for(unsigned int i=0;i<lowercase_term.length();i++)
{
lowercase_term[i] = tolower(lowercase_term[i]);
}
return lowercase_term;
}
string removepunc(string& s)//function for removing punctuations
{
char p;
string holder;
for(unsigned int i=0;i<s.length();i++)
{
if(isalpha(s[i])!=0)//check if punctuation is not found
{
p=s[i];//store character in variable p
holder.push_back(p);//push p into temporary variable holder
}
}
s=holder;//store string into memory
return s;//return a string
}
int digitcheck(string& s)//function for cheching if there exist a digit or & or dash
{
int c=0;
for(unsigned int i=0;i<s.length();i++)
{
if(isdigit(s[i]||s[i]=='&'||s[i]=='-'))
goto out;
else
++c;
}
out:
if(c!=0)
return c;
else
return 0;
}
================================================
output should look like::
The first posting list[mother]:
===============================
1 2 4
The second posting list[love]:
===============================
1 2 3
The intersection of the two posting lists[mother AND love]:
===============================================================
1 2
LEARNING BY PRACTICAL
Ijumaa, 15 Agosti 2014
Jumapili, 1 Desemba 2013
How to install a LAMP server on fedora 19 - Mysql(MariaDB) , PHP and Apache Webmin STEPS BY STEP
How to install a LAMP server on fedora 19 - Mysql(MariaDB) , PHP and Apache Webmin
===============================================================
STEPS BY STEP
=============
<1> :: sudo yum install httpd -y
<2>:: sudo systemctl enable httpd.service
<3>:: sudo yum install mysql mysql-server
<4>:: sudo systemctl enable httpd.service
<5>:: sudo yum install php -y
<6>:: sudo yum install php-mysql -y
<7>:: reboot
<8>:: sudo mysql_secure_installation
webmin installation
ingiza command hiyo
<9>:: sudo gedit /etc/yum.repos.d/webmin.repo
<10>:: Andika maneno haya
[Webmin]
name=Webmin Distribution Neutral
#baseurl=http://download.webmin.com/download/yum
mirrorlist=http://download.webmin.com/download/yum/mirrorlist
enabled=1
SAVE/CLOSE
<11>:: wget http://www.webmin.com/jcameron-key.asc
<12>:: sudo rpm –import jcameron-key.asc
<13>:: sudo yum install webmin
ifconfig =======> uangalie ip address inayokuhusu na port # yake na uziweke
kwenye url kama ifutavyo
http://<address><port_number>ENTER
for more info look in:http://www.webmin.com/index.html
===============================================================
STEPS BY STEP
=============
<1> :: sudo yum install httpd -y
<2>:: sudo systemctl enable httpd.service
<3>:: sudo yum install mysql mysql-server
<4>:: sudo systemctl enable httpd.service
<5>:: sudo yum install php -y
<6>:: sudo yum install php-mysql -y
<7>:: reboot
<8>:: sudo mysql_secure_installation
webmin installation
ingiza command hiyo
<9>:: sudo gedit /etc/yum.repos.d/webmin.repo
<10>:: Andika maneno haya
[Webmin]
name=Webmin Distribution Neutral
#baseurl=http://download.webmin.com/download/yum
mirrorlist=http://download.webmin.com/download/yum/mirrorlist
enabled=1
SAVE/CLOSE
<11>:: wget http://www.webmin.com/jcameron-key.asc
<12>:: sudo rpm –import jcameron-key.asc
<13>:: sudo yum install webmin
ifconfig =======> uangalie ip address inayokuhusu na port # yake na uziweke
kwenye url kama ifutavyo
http://<address><port_number>ENTER
for more info look in:http://www.webmin.com/index.html
*********LET THE PARTY BEGIN********
Jumamosi, 30 Novemba 2013
Cosine Similarity Using C++
/*C++ program to show cosine similarities*/
#include<iostream>//include the library of input output stream
#include<fstream>//Header file for reading the word in the file
#include<vector>//Header file for storing the word in the documents
#include<map>//for relating the terms,frequencies,TF&TF_IDF and cosine smilarities
#include<cmath>//for doing mathematical calculation
#include<algorithm>//for sorting
using namespace std;//allows us to use the standard library
//string Lower(string& lowercase_term);//prototype for changing words to lowercase
void tf_idf_compute(map<string,int> &frequency,vector<vector<string> > &documents, vector<string> &terms);//prototype for finding a frequency of terms
int main()//where a c++ program starts/execution starts.
{//openining curled brac i.e main()opens
cout<<"Terms\t\t""Term frequency\t\t\t""TF_IDF\t\t""CosineSimilarity";
cout<<"\n";
vector<vector<string> > dc;// dictionary
vector<string> tokens;//container for documents which represent refered vector in prototype
map<string,int>S;//A vector "s"that stores int type values.
tf_idf_compute(S,dc,tokens);//call function for tf_idf
}
void tf_idf_compute(map<string,int> &frequency,vector<vector<string> > &documents,vector<string> &terms)//function for creating a dictionery
{
fstream file("regs.txt");//opens the file named regs.
if(!file)// reading file is not found
{
cout<<"file not found"<<endl;
}
else
{
while(!file.eof())//reading file is not found doesnot mark end of fuction
{
string hb;//variable of type string for holding a term
vector<string> words;//container for storing terms before storing the terms into a memory
while(file>>hb && hb!="#")//condition which direct raeding of documents with specified delimiter as the sign of an end of a document
{
words.push_back(hb);//put terms into a temporary holding vector
terms.push_back(hb);//keep terms in memory
frequency[hb]++;
sort(terms.begin(),terms.end());//sorting the terms
terms.erase(unique(terms.begin(),terms.end()),terms.end());//remove term repeatation
}
if(!words.empty())//if the vector is not empty
{
documents.push_back(words);//push the words in temporary vector into the vector of vectors in order to be stored in a memory
}
}
int a;
vector<int> ting;
for(int j=0;j<terms.size();j++)
{
a=0;
for(int i=0;i<documents.size();i++)
{
//finds if a term occurs or doesn't occcur in the document
vector<string>::const_iterator p=find(documents[i].begin(),documents[i].end(),terms[j]);
if(p!=documents[i].end())
{
a=a+1;
}
}
ting.push_back(a);
//cout<<cnt<<endl;
}
//operation for calculating tf_idf
map<string,int>::iterator iter;
float tf_idf;
float cosine;
for(iter=frequency.begin();iter!=frequency.end();iter++)
{
tf_idf=(1+log10(iter->second))*log10(documents.size()/a);//formular to calculate tf_idf & cosine similarities
cosine=(tf_idf*iter->second)/(abs(tf_idf)*abs(iter->second));
cout<<iter->first<<" "<<iter->second<<" "<<tf_idf<<" "<<cosine<<endl;
}
cin.get();//holds the screen
}
}//closing curled brac (}) i.e main()ends
====================================================================
your .txt file should look like
mine was "regs.txt"below
Information retrieval #
Information retrieval it is a discipline #
organization and storage should provide easy access #
#include<iostream>//include the library of input output stream
#include<fstream>//Header file for reading the word in the file
#include<vector>//Header file for storing the word in the documents
#include<map>//for relating the terms,frequencies,TF&TF_IDF and cosine smilarities
#include<cmath>//for doing mathematical calculation
#include<algorithm>//for sorting
using namespace std;//allows us to use the standard library
//string Lower(string& lowercase_term);//prototype for changing words to lowercase
void tf_idf_compute(map<string,int> &frequency,vector<vector<string> > &documents, vector<string> &terms);//prototype for finding a frequency of terms
int main()//where a c++ program starts/execution starts.
{//openining curled brac i.e main()opens
cout<<"Terms\t\t""Term frequency\t\t\t""TF_IDF\t\t""CosineSimilarity";
cout<<"\n";
vector<vector<string> > dc;// dictionary
vector<string> tokens;//container for documents which represent refered vector in prototype
map<string,int>S;//A vector "s"that stores int type values.
tf_idf_compute(S,dc,tokens);//call function for tf_idf
}
void tf_idf_compute(map<string,int> &frequency,vector<vector<string> > &documents,vector<string> &terms)//function for creating a dictionery
{
fstream file("regs.txt");//opens the file named regs.
if(!file)// reading file is not found
{
cout<<"file not found"<<endl;
}
else
{
while(!file.eof())//reading file is not found doesnot mark end of fuction
{
string hb;//variable of type string for holding a term
vector<string> words;//container for storing terms before storing the terms into a memory
while(file>>hb && hb!="#")//condition which direct raeding of documents with specified delimiter as the sign of an end of a document
{
words.push_back(hb);//put terms into a temporary holding vector
terms.push_back(hb);//keep terms in memory
frequency[hb]++;
sort(terms.begin(),terms.end());//sorting the terms
terms.erase(unique(terms.begin(),terms.end()),terms.end());//remove term repeatation
}
if(!words.empty())//if the vector is not empty
{
documents.push_back(words);//push the words in temporary vector into the vector of vectors in order to be stored in a memory
}
}
int a;
vector<int> ting;
for(int j=0;j<terms.size();j++)
{
a=0;
for(int i=0;i<documents.size();i++)
{
//finds if a term occurs or doesn't occcur in the document
vector<string>::const_iterator p=find(documents[i].begin(),documents[i].end(),terms[j]);
if(p!=documents[i].end())
{
a=a+1;
}
}
ting.push_back(a);
//cout<<cnt<<endl;
}
//operation for calculating tf_idf
map<string,int>::iterator iter;
float tf_idf;
float cosine;
for(iter=frequency.begin();iter!=frequency.end();iter++)
{
tf_idf=(1+log10(iter->second))*log10(documents.size()/a);//formular to calculate tf_idf & cosine similarities
cosine=(tf_idf*iter->second)/(abs(tf_idf)*abs(iter->second));
cout<<iter->first<<" "<<iter->second<<" "<<tf_idf<<" "<<cosine<<endl;
}
cin.get();//holds the screen
}
}//closing curled brac (}) i.e main()ends
====================================================================
your .txt file should look like
mine was "regs.txt"below
Information retrieval #
Information retrieval it is a discipline #
organization and storage should provide easy access #
Jumanne, 26 Novemba 2013
Compute Recall Precision and F-Measure using C++
in a .txt file copy and paste the below data
True-Positives= 20
False-Positives= 40
False-Negatives= 60
True-Negatives= 100
save both in the same folder and run it
Alhamisi, 26 Septemba 2013
BINARY MATRIX OR INCIDENCE DOCUMENT MATRIX source code for C++ AN ASSIGNMENT FOR INFORMATION RETRIEVAL
- code hii ina run vema kabisa lakini nataka inioneshe
- command prompt ambayo inafanania na hiyo ya chini
Jumatatu, 6 Mei 2013
cygwin
What is it?
Cygwin is a Linux-like
environment for Windows. It consists of a DLL (cygwin1.dll), which acts as an
emulation layer providing substantial POSIX (Portable Operating System Interface) system call functionality,
and a collection of tools, which provide a Linux look and feel. The Cygwin DLL
works with all x86 and AMD64 versions of Windows NT since Windows 2000. The API
follows the Single Unix Specification as much as possible, and then Linux
practice. The major differences between Cygwin and Linux is the C library
(newlib instead of glibc).
With Cygwin installed, users have
access to many standard UNIX utilities. They can be used from one of the
provided shells such as bash or from the Windows Command Prompt.
Additionally, programmers may write Win32 console or GUI applications that make
use of the standard Microsoft Win32 API and/or the Cygwin API. As a result, it
is possible to easily port many significant UNIX programs without the need for
extensive changes to the source code. This includes configuring and building
most of the available GNU software (including the development tools included
with the Cygwin distribution).
Quick Start Guide for those more experienced
with Windows
If you are new to the world of
UNIX, you may find it difficult to understand at first. This guide is not meant
to be comprehensive, so we recommend that you use the many available Internet
resources to become acquainted with UNIX basics (search for "UNIX
basics" or "UNIX tutorial").
To install a basic Cygwin
environment, run the setup.exe program and click Next at each page. The
default settings are correct for most users. If you want to know more about
what each option means, see the section called “Internet Setup”. Use setup.exe
any time you want to update or install a Cygwin package. If you are installing
Cygwin for a specific purpose, use it to install the tools that you need. For
example, if you want to compile C++ programs, you need the gcc-g++ package and
probably a text editor like nano. When running setup.exe, clicking on
categories and packages in the package installation screen will provide you
with the ability to control what is installed or updated.
Another option is to install
everything by clicking on the Default field next to the All category. However,
be advised that this will download and install several hundreds of megabytes of
software to your computer. The best plan is probably to click on individual
categories and install either entire categories or packages from the categories
themselves. After installation, you can find Cygwin-specific documentation in
the /usr/share/doc/Cygwin/ directory.
Developers coming from a Windows
background will be able to write console or GUI executables that rely on the
Microsoft Win32 API instead of Cygwin using the mingw32 or mingw64
cross-compiler toolchains. The -shared option to GCC allows to write
Windows Dynamically Linked Libraries (DLLs). The resource compiler windres
is also provided.
Quick Start Guide for those more experienced
with UNIX
If you are an experienced UNIX
user who misses a powerful command-line environment, you will enjoy Cygwin.
Developers coming from a UNIX background will find a set of utilities they are
already comfortable using, including a working UNIX shell. The compiler tools
are the standard GNU compilers most people will have previously used under
UNIX, only ported to the Windows host. Programmers wishing to port UNIX
software to Windows NT will find that the Cygwin library provides an easy way
to port many UNIX packages, with only minimal source code changes.
Note that there are some
workarounds that cause Cygwin to behave differently than most UNIX-like
operating systems; these are described in more detail in the section called “Using Cygwin
effectively with Windows”.
Use the graphical command setup.exe
any time you want to update or install a Cygwin package. This program must be
run manually every time you want to check for updated packages since Cygwin
does not currently include a mechanism for automatically detecting package
updates.
By default, setup.exe only
installs a minimal subset of packages. Add any other packages by clicking on
the + next to the Category name and selecting the package from the displayed
list. You may search for specfic tools by using the Setup Package Search at
the Cygwin web site.
Another option is to install
everything by clicking on the Default field next to the All category. However,
be advised that this will download and install several hundreds of megabytes of
software to your computer. The best plan is probably to click on individual
categories and install either entire categories or packages from the categories
themselves. After installation, you can find Cygwin-specific documentation in
the /usr/share/doc/Cygwin/ directory.
For more information about what each
option in setup.exe means, see the section called “Internet Setup”.
Are the Cygwin tools free software?
Yes. Parts are GNU software (gcc, gas,
ld, etc.), parts are covered by the standard X11 license,
some of it is public domain, some of it was written by Red Hat and placed under
the GNU General
Public License (GPL). None of it is shareware. You don't have to pay anyone
to use it but you should be sure to read the copyright section of the FAQ for
more information on how the GNU GPL may affect your use of these tools. If you
intend to port a proprietary application using the Cygwin library, you may want
the Cygwin proprietary-use license. For more information about the
proprietary-use license, please go to http://www.redhat.com/services/custom/cygwin/.
Customers of the native Win32 GNUPro should feel free to submit bug reports and
ask questions through Red Hat channels. All other questions should be sent to
the project mailing list <cygwin@cygwin.com>.
A brief history of the
Cygwin project
Note
A historical look into the first
years of Cygwin development is Geoffrey J. Noer's 1998 paper, "Cygwin32: A
Free Win32 Porting Layer for UNIX® Applications" which can be found at the
2nd USENIX Windows NT Symposium Online Proceedings.
Cygwin began development in 1995
at Cygnus Solutions (now part of Red Hat, Inc.). The first thing done was to
enhance the development tools (gcc, gdb, gas, etc.) so
that they could generate and interpret Win32 native object files. The next task
was to port the tools to Win NT/9x. We could have done this by rewriting large
portions of the source to work within the context of the Win32 API. But this
would have meant spending a huge amount of time on each and every tool.
Instead, we took a substantially different approach by writing a shared library
(the Cygwin DLL) that adds the necessary UNIX-like functionality missing from
the Win32 API (fork, spawn, signals, select, sockets, etc.). We call this new
interface the Cygwin API. Once written, it was possible to build working Win32
tools using UNIX-hosted cross-compilers, linking against this library.
From this point, we pursued the
goal of producing Windows-hosted tools capable of rebuilding themselves under
Windows 9x and NT (this is often called self-hosting). Since neither OS ships
with standard UNIX user tools (fileutils, textutils, bash, etc...), we had to
get the GNU equivalents working with the Cygwin API. Many of these tools were
previously only built natively so we had to modify their configure scripts to
be compatible with cross-compilation. Other than the configuration changes,
very few source-level changes had to be made since Cygwin provided a UNIX-like
API. Running bash with the development tools and user tools in place, Windows
9x and NT looked like a flavor of UNIX from the perspective of the GNU
configure mechanism. Self hosting was achieved as of the beta 17.1 release in
October 1996.
The entire Cygwin toolset was
available as a monolithic install. In April 2000, the project announced a New
Cygwin Net Release which provided the native non-Cygwin Win32 program setup.exe
to install and upgrade each package separately. Since then, the Cygwin DLL and setup.exe
have seen continuous development.
The latest major improvement in
this development is the 1.7 release in 2009, which dropped Windows 95/98/Me
support in favor of using Windows NT features more extensively. It adds a lot
of new features like case-sensitive filenames, NFS interoperability, IPv6
support and much more.
Highlights of Cygwin Functionality
Introduction
When a binary linked against the
library is executed, the Cygwin DLL is loaded into the application's text
segment. Because we are trying to emulate a UNIX kernel which needs access to
all processes running under it, the first Cygwin DLL to run creates shared
memory areas and global synchronization objects that other processes using
separate instances of the DLL can access. This is used to keep track of open
file descriptors and to assist fork and exec, among other purposes. Every
process also has a per_process structure that contains information such as
process id, user id, signal masks, and other similar process-specific
information.
The DLL is implemented as a
standard DLL in the Win32 subsystem. Under the hood it's using the Win32 API,
as well as the native NT API, where appropriate.
Note
Some restrictions apply for calls
to the Win32 API. For details, see the section called “Restricted Win32
environment”, as well as the section called
“Using the Win32 file API in Cygwin applications”.
The native NT API is used mainly
for speed, as well as to access NT capabilities which are useful to implement
certain POSIX features, but are hidden to the Win32 API.
Due to some restrictions in
Windows, it's not always possible to strictly adhere to existing UNIX standards
like POSIX.1. Fortunately these are mostly corner cases.
Note that many of the things that
Cygwin does to provide POSIX compatibility do not mesh well with the native
Windows API. If you mix POSIX calls with Windows calls in your program it is
possible that you will see uneven results. In particular, Cygwin signals will
not work with Windows functions which block and Windows functions which accept
filenames may be confused by Cygwin's support for long filenames.
Permissions and Security
Windows NT includes a
sophisticated security model based on Access Control Lists (ACLs). Cygwin maps
Win32 file ownership and permissions to ACLs by default, on file systems supporting
them (usually NTFS). Solaris style ACLs and accompanying function calls are
also supported. The chmod call maps UNIX-style permissions back to the Win32
equivalents. Because many programs expect to be able to find the /etc/passwd
and /etc/group files, we provide utilities
that can be used to construct them from the user and group information provided
by the operating system.
Users with Administrator rights
are permitted to chown files. With version 1.1.3 Cygwin introduced a mechanism
for setting real and effective UIDs. This is described in the section called “Using Windows
security in Cygwin”. As of version 1.5.13, the Cygwin developers are not
aware of any feature in the Cygwin DLL that would allow users to gain
privileges or to access objects to which they have no rights under Windows.
However there is no guarantee that Cygwin is as secure as the Windows it runs
on. Cygwin processes share some variables and are thus easier targets of denial
of service type of attacks.
File Access
Cygwin supports both POSIX- and
Win32-style paths, using either forward or back slashes as the directory
delimiter. Paths coming into the DLL are translated from POSIX to native NT as
needed. From the application perspective, the file system is a POSIX-compliant
one. The implementation details are safely hidden in the Cygwin DLL. UNC
pathnames (starting with two slashes) are supported for network paths.
Since version 1.7.0, the layout
of this POSIX view of the Windows file system space is stored in the /etc/fstab
file. Actually, there is a system-wide /etc/fstab file as well as a
user-specific fstab file /etc/fstab.d/${USER}.
At startup the DLL has to find
out where it can find the /etc/fstab file. The mechanism used for this is
simple. First it retrieves it's own path, for instance C:\Cygwin\bin\cygwin1.dll.
From there it deduces that the root path is C:\Cygwin. So it looks for the
fstab file in C:\Cygwin\etc\fstab. The layout of this file is very similar to
the layout of the fstab file on Linux. Just instead of block devices, the mount
points point to Win32 paths. An installation with setup.exe installs a
fstab file by default, which can easily be changed using the editor of your
choice.
The fstab file allows mounting
arbitrary Win32 paths into the POSIX file system space. A special case is the
so-called cygdrive prefix. It's the path under which every available drive in
the system is mounted under its drive letter. The default value is /cygdrive,
so you can access the drives as /cygdrive/c, /cygdrive/d, etc... The cygdrive
prefix can be set to some other value (/mnt for instance) in the fstab file(s).
The library exports several
Cygwin-specific functions that can be used by external programs to convert a
path or path list from Win32 to POSIX or vice versa. Shell scripts and
Makefiles cannot call these functions directly. Instead, they can do the same
path translations by executing the cygpath utility program that we
provide with Cygwin.
Win32 applications handle
filenames in a case preserving, but case insensitive manner. Cygwin supports
case sensitivity on file systems supporting that. Since Windows XP, the OS only
supports case sensitivity when a specific registry value is changed. Therefore,
case sensitivity is not usually the default.
Symbolic links are not present
and supported on Windows up to and including Windows Server 2003 R2. Native
symlinks are available starting with Windows Vista. Due to their strange
implementation, however, they are not useful in a POSIX emulation layer. Cygwin
recognizes native symlinks, but does not create them.
Symbolic links are potentially
created in two different ways. The file style symlinks are files containing a
magic cookie followed by the path to which the link points. They are marked
with the System DOS attribute so that only files with that attribute have to be
read to determine whether or not the file is a symbolic link. The shortcut
style symlinks are Windows shortcut files with a special header and the
Readonly DOS attribute set. The advantage of file symlinks is speed, the
advantage of shortcut symlinks is the fact that they can be utilized by
non-Cygwin Win32 tools as well.
Starting with Cygwin 1.7,
symbolic links are using UTF-16 to encode the filename of the target file, to
better support internationalization. Symlinks created by older Cygwin releases
can be read just fine. However, you could run into problems with them if you're
now using another character set than the one you used when creating these
symlinks (see the section called “Potential
Problems when using Locales”. Please note that this new UTF-16 style of
symlinks is not compatible with older Cygwin release, which can't read the
target filename correctly.
Hard links are fully supported on
NTFS and NFS file systems. On FAT and other file systems which don't support
hardlinks, the call returns with an error, just like on other POSIX systems.
On file systems which don't support
unique persistent file IDs (FAT, older Samba shares) the inode number for a
file is calculated by hashing its full Win32 path. The inode number generated
by the stat call always matches the one returned in d_ino of the dirent
structure. It is worth noting that the number produced by this method is not
guaranteed to be unique. However, we have not found this to be a significant
problem because of the low probability of generating a duplicate inode number.
Cygwin 1.7 and later supports
Extended Attributes (EAs) via the linux-specific function calls getxattr,
setxattr, listxattr, and removexattr. All EAs on Samba or NTFS are treated as
user EAs, so, if the name of an EA is "foo" from the Windows
perspective, it's transformed into "user.foo" within Cygwin. This
allows Linux-compatible EA operations and keeps tools like attr, or setfattr
happy.
chroot is supported since Cygwin
1.1.3. However, chroot is not a concept known by Windows. This implies some
serious restrictions. First of all, the chroot call isn't a privileged call.
Any user may call it. Second, the chroot environment isn't safe against native
windows processes. Given that, chroot in Cygwin is only a hack which pretends
security where there is none. For that reason the usage of chroot is discouraged.
Text Mode
vs. Binary Mode
It is often important that files
created by native Windows applications be interoperable with Cygwin
applications. For example, a file created by a native Windows text editor
should be readable by a Cygwin application, and vice versa.
Unfortunately, UNIX and Win32
have different end-of-line conventions in text files. A UNIX text file will
have a single newline character (LF) whereas a Win32 text file will instead use
a two character sequence (CR+LF). Consequently, the two character sequence must
be translated on the fly by Cygwin into a single character newline when reading
in text mode.
This solution addresses the
newline interoperability concern at the expense of violating the POSIX
requirement that text and binary mode be identical. Consequently, processes
that attempt to lseek through text files can no longer rely on the number of
bytes read to be an accurate indicator of position within the file. For this
reason, Cygwin allows you to choose the mode in which a file is read in several
ways.
ANSI C Library
We chose to include Red Hat's own
existing ANSI C library "newlib" as part of the library, rather than
write all of the lib C and math calls from scratch. Newlib is a BSD-derived
ANSI C library, previously only used by cross-compilers for embedded systems
development. Other functions, which are not supported by newlib have been added
to the Cygwin sources using BSD implementations as much as possible.
The reuse of existing free
implementations of such things as the glob, regexp, and getopt libraries saved
us considerable effort. In addition, Cygwin uses Doug Lea's free malloc
implementation that successfully balances speed and compactness. The library
accesses the malloc calls via an exported function pointer. This makes it possible
for a Cygwin process to provide its own malloc if it so desires.
Process
Creation
The fork call in Cygwin is
particularly interesting because it does not map well on top of the Win32 API.
This makes it very difficult to implement correctly. Currently, the Cygwin fork
is a non-copy-on-write implementation similar to what was present in early
flavors of UNIX.
The first thing that happens when
a parent process forks a child process is that the parent initializes a space
in the Cygwin process table for the child. It then creates a suspended child
process using the Win32 CreateProcess call. Next, the parent process calls
setjmp to save its own context and sets a pointer to this in a Cygwin shared
memory area (shared among all Cygwin tasks). It then fills in the child's .data
and .bss sections by copying from its own address space into the suspended
child's address space. After the child's address space is initialized, the
child is run while the parent waits on a mutex. The child discovers it has been
forked and longjumps using the saved jump buffer. The child then sets the mutex
the parent is waiting on and blocks on another mutex. This is the signal for
the parent to copy its stack and heap into the child, after which it releases
the mutex the child is waiting on and returns from the fork call. Finally, the
child wakes from blocking on the last mutex, recreates any memory-mapped areas
passed to it via the shared area, and returns from fork itself.
While we have some ideas as to
how to speed up our fork implementation by reducing the number of context
switches between the parent and child process, fork will almost certainly
always be inefficient under Win32. Fortunately, in most circumstances the spawn
family of calls provided by Cygwin can be substituted for a fork/exec pair with
only a little effort. These calls map cleanly on top of the Win32 API. As a
result, they are much more efficient. Changing the compiler's driver program to
call spawn instead of fork was a trivial change and increased compilation speeds
by twenty to thirty percent in our tests.
However, spawn and exec present
their own set of difficulties. Because there is no way to do an actual exec
under Win32, Cygwin has to invent its own Process IDs (PIDs). As a result, when
a process performs multiple exec calls, there will be multiple Windows PIDs
associated with a single Cygwin PID. In some cases, stubs of each of these
Win32 processes may linger, waiting for their exec'd Cygwin process to exit.
Problems
with process creation
The semantics of fork require
that a forked child process have exactly the same address space layout
as its parent. However, Windows provides no native support for cloning address
space between processes and several features actively undermine a reliable fork
implementation. Three issues are especially prevalent:
- DLL base address collisions. Unlike *nix shared
libraries, which use "position-independent code", Windows shared
libraries assume a fixed base address. Whenever the hard-wired address
ranges of two DLLs collide (which occurs quite often), the Windows loader
must "rebase" one of them to a different address. However, it
may not resolve collisions consistently, and may rebase a different dll
and/or move it to a different address every time. Cygwin can usually
compensate for this effect when it involves libraries opened dynamically,
but collisions among statically-linked dlls (dependencies known at compile
time) are resolved before cygwin1.dll initializes and cannot be fixed
afterward. This problem can only be solved by removing the base address
conflicts which cause the problem, usually using the rebaseall tool.
- Address space layout randomization (ASLR). Starting
with Vista, Windows implements ASLR, which means that thread stacks, heap,
memory-mapped files, and statically-linked dlls are placed at different
(random) locations in each process. This behaviour interferes with a
proper fork, and if an unmovable object (process heap or system dll) ends
up at the wrong location, Cygwin can do nothing to compensate (though it will
retry a few times automatically).
- DLL injection by BLODA.
Badly-behaved applications which inject dlls into other processes often
manage to clobber important sections of the child's address space, leading
to base address collisions which rebasing cannot fix. The only way to
resolve this problem is to remove (usually uninstall) the offending app.
See the section called “Implemented options”
for the detect_bloda option, which may be able to identify the BLODA.
In summary, current Windows
implementations make it impossible to implement a perfectly reliable fork, and
occasional fork failures are inevitable.
Signals
When a Cygwin process starts, the
library starts a secondary thread for use in signal handling. This thread waits
for Windows events used to pass signals to the process. When a process notices
it has a signal, it scans its signal bitmask and handles the signal in the
appropriate fashion.
Several complications in the
implementation arise from the fact that the signal handler operates in the same
address space as the executing program. The immediate consequence is that
Cygwin system functions are interruptible unless special care is taken to avoid
this. We go to some lengths to prevent the sig_send function that sends signals
from being interrupted. In the case of a process sending a signal to another
process, we place a mutex around sig_send such that sig_send will not be
interrupted until it has completely finished sending the signal.
In the case of a process sending
itself a signal, we use a separate semaphore/event pair instead of the mutex.
sig_send starts by resetting the event and incrementing the semaphore that
flags the signal handler to process the signal. After the signal is processed,
the signal handler signals the event that it is done. This process keeps
intraprocess signals synchronous, as required by POSIX.
Most standard UNIX signals are
provided. Job control works as expected in shells that support it.
Sockets
Socket-related calls in Cygwin
basically call the functions by the same name in Winsock, Microsoft's
implementation of Berkeley sockets, but with lots of tweaks. All sockets are
non-blocking under the hood to allow to interrupt blocking calls by POSIX
signals. Additional bookkeeping is necessary to implement correct socket
sharing POSIX semantics and especially for the select call. Some socket-related
functions are not implemented at all in Winsock, as, for example, socketpair.
Starting with Windows Vista, Microsoft removed the legacy calls rcmd(3),
rexec(3) and rresvport(3). Recent versions of Cygwin now implement all these
calls internally.
An especially troublesome feature
of Winsock is that it must be initialized before the first socket function is
called. As a result, Cygwin has to perform this initialization on the fly, as
soon as the first socket-related function is called by the application. In
order to support sockets across fork calls, child processes initialize Winsock
if any inherited file descriptor is a socket.
AF_UNIX (AF_LOCAL) sockets are
not available in Winsock. They are implemented in Cygwin by using local AF_INET
sockets instead. This is completely transparent to the application. Cygwin's
implementation also supports the getpeereid BSD extension. However, Cygwin does
not yet support descriptor passing.
IPv6 is supported beginning with
Cygwin release 1.7.0. This support is dependent, however, on the availability
of the Windows IPv6 stack. The IPv6 stack was "experimental", i.e.
not feature complete in Windows 2003 and earlier. Full IPv6 support became
available starting with Windows Vista and Windows Server 2008. Cygwin does not
depend on the underlying OS for the (newly implemented) getaddrinfo and
getnameinfo functions. Cygwin 1.7.0 adds replacement functions which implement
the full functionality for IPv4.
Select
The UNIX select function is
another call that does not map cleanly on top of the Win32 API. Much to our
dismay, we discovered that the Win32 select in Winsock only worked on socket
handles. Our implementation allows select to function normally when given
different types of file descriptors (sockets, pipes, handles, and a custom
/dev/windows Windows messages pseudo-device).
Upon entry into the select
function, the first operation is to sort the file descriptors into the
different types. There are then two cases to consider. The simple case is when
at least one file descriptor is a type that is always known to be ready (such
as a disk file). In that case, select returns immediately as soon as it has
polled each of the other types to see if they are ready. The more complex case
involves waiting for socket or pipe file descriptors to be ready. This is
accomplished by the main thread suspending itself, after starting one thread
for each type of file descriptor present. Each thread polls the file
descriptors of its respective type with the appropriate Win32 API call. As soon
as a thread identifies a ready descriptor, that thread signals the main thread
to wake up. This case is now the same as the first one since we know at least
one descriptor is ready. So select returns, after polling all of the file
descriptors one last time.
What's new and what changed in Cygwin 1.7
What's new and
what changed from 1.7.16 to 1.7.17
- Support the "e" flag to fopen(3). This is a
Glibc extension which allows to fopen the file with the O_CLOEXEC flag
set.
- Support the "x" flag to fopen(3). This is a
Glibc/C11 extension which allows to open the file with the O_EXCL flag
set.
What's new and
what changed from 1.7.15 to 1.7.16
- New API: getmntent_r, memrchr.
- Recognize ReFS filesystem.
What's new and
what changed from 1.7.14 to 1.7.15
- CYGWIN=pipe_byte option now forces the opening of
pipes in byte mode rather than message mode.
What's new and
what changed from 1.7.13 to 1.7.14
- Add mouse reporting modes 1005, 1006 and 1015 to
console window.
What's new and
what changed from 1.7.12 to 1.7.13
- mkpasswd and mkgroup now try to print an entry for
the TrustedInstaller account existing since Windows Vista/Server 2008.
- Terminal typeahead when switching from canonical to
non-canonical mode is now properly flushed.
What's new and
what changed from 1.7.11 to 1.7.12
- Cygwin now automatically populates the /dev directory
with all existing POSIX devices.
- Add virtual /proc/PID/mountinfo file.
- flock now additionally supports the following
scenario, which requires to propagate locks to the parent process:
- (
- flock -n
9 || exit 1
- # ...
commands executed under lock ...
- }
9>/var/lock/mylockfile
Only propagation to the direct parent
process is supported so far, not to grand parents or sibling processes.
- Add a "detect_bloda" setting for the CYGWIN
environment variable to help finding potential BLODAs.
What's new and
what changed from 1.7.10 to 1.7.11
- New pldd command for listing DLLs loaded by a
process.
- New API: scandirat.
- Change the way remote shares mapped to drive letters
are recognized when creating the cygdrive directory. If Windows claims the
drive is unavailable, don't show it in the cygdrive directory listing.
- Raise default stacksize of pthreads from 512K to 1
Meg. It can still be changed using the pthread_attr_setstacksize call.
What's new and
what changed from 1.7.9 to 1.7.10
- Drop support for Windows NT4.
- The CYGWIN environment variable options
"envcache", "strip_title", "title",
"tty", and "upcaseenv" have been removed.
- If the executable (and the system) is large address
aware, the application heap will be placed in the large memory area. The peflags
tool from the rebase package can be used to set the large address
awareness flag in the executable file header.
- The registry setting "heap_chunk_in_mb" has
been removed, in favor of a new per-executable setting in the executable
file header which can be set using the peflags tool. See the section called “Changing
Cygwin's Maximum Memory” for more information.
- The CYGWIN=tty mode using pipes to communicate with
the console in a pseudo tty-like mode has been removed. Either just use
the normal Windows console as is, or use a terminal application like mintty.
- New getconf command for querying confstr(3),
pathconf(3), sysconf(3), and limits.h configuration.
- New tzset utility to generate a
POSIX-compatible TZ environment variable from the Windows timezone
settings.
- The passwd command now allows an administrator to use
the -R command for other user accounts: passwd -R username.
- Pthread spinlocks. New APIs: pthread_spin_destroy,
pthread_spin_init, pthread_spin_lock, pthread_spin_trylock,
pthread_spin_unlock.
- Pthread stack address management. New APIs:
pthread_attr_getstack, pthread_attr_getstackaddr,
pthread_attr_getguardsize, pthread_attr_setstack,
pthread_attr_setstackaddr, pthread_attr_setguardsize, pthread_getattr_np.
- POSIX Clock Selection option. New APIs:
clock_nanosleep, pthread_condattr_getclock, pthread_condattr_setclock.
- clock_gettime(3) and clock_getres(3) accept
per-process and per-thread CPU-time clocks, including
CLOCK_PROCESS_CPUTIME_ID and CLOCK_THREAD_CPUTIME_ID. New APIs:
clock_getcpuclockid, pthread_getcpuclockid.
- GNU/glibc error.h error reporting functions. New
APIs: error, error_at_line. New exports: error_message_count,
error_one_per_line, error_print_progname. Also, perror and strerror_r no
longer clobber strerror storage.
- C99 <tgmath.h> type-generic macros.
- /proc/loadavg now shows the number of currently
running processes and the total number of processes.
- Added /proc/devices and /proc/misc, which lists
supported device types and their device numbers.
- Added /proc/swaps, which shows the location and size
of Windows paging file(s).
- Added /proc/sysvipc/msg, /proc/sysvipc/sem, and
/proc/sysvipc/shm which provide information about System V IPC message
queues, semaphores, and shared memory.
- /proc/version now shows the username of whomever
compiled the Cygwin DLL as well as the version of GCC used when compiling.
- dlopen now supports the Glibc-specific RTLD_NODELETE
and RTLD_NOOPEN flags.
- The printf(3) and wprintf(3) families of functions
now handle the %m conversion flag.
- Other new API: clock_settime, __fpurge, getgrouplist,
get_current_dir_name, getpt, ppoll, psiginfo, psignal, ptsname_r,
sys_siglist, pthread_setschedprio, pthread_sigqueue, sysinfo.
What's new and what changed from 1.7.8 to
1.7.9
- New API: strchrnul.
What's new and what changed from 1.7.7 to
1.7.8
- Drop support for Windows NT4 prior to Service Pack 4.
- Reinstantiate Cygwin's ability to delete an empty
directory which is the current working directory of the same or another
process. Same for any other empty directory which has been opened by the
same or another process.
- Cygwin now ships the C standard library fenv.h header
file, and implements the related APIs (including GNU/glibc extensions):
feclearexcept, fedisableexcept, feenableexcept, fegetenv, fegetexcept,
fegetexceptflag, fegetprec, fegetround, feholdexcept, feraiseexcept,
fesetenv, fesetexceptflag, fesetprec, fesetround, fetestexcept,
feupdateenv, and predefines both default and no-mask FP environments. See
the GNU C Library manual for full details of this
functionality.
- Support for the C99 complex functions, except for the
"long double" implementations. New APIs: cacos, cacosf, cacosh,
cacoshf, carg, cargf, casin, casinf, casinh, casinhf, catan, catanf,
catanh, catanhf, ccos, ccosf, ccosh, ccoshf, cexp, cexpf, cimag, cimagf,
clog, clogf, conj, conjf, cpow, cpowf, cproj, cprojf, creal, crealf, csin,
csinf, csinh, csinhf, csqrt, csqrtf, ctan, ctanf, ctanh, ctanhf.
- Fix the width of "CJK Ambiguous Width"
characters to 1 for singlebyte charsets and 2 for East Asian multibyte
charsets. (For UTF-8, it remains dependent on the specified language, and
the "@cjknarrow" locale modifier can still be used to force width
1.)
- The strerror_r interface now has two flavors; if
_GNU_SOURCE is defined, it retains the previous behavior of returning char
* (but the result is now guaranteed to be NUL-terminated); otherwise it
now obeys POSIX semantics of returning int.
- /proc/sys now allows unfiltered access to the native
NT namespace. Access restrictions still apply. Direct device access via
/proc/sys is not yet supported. File system access via block devices
works. For instance (note the trailing slash!)
- bash$ cd /proc/sys/Device/HarddiskVolumeShadowCopy1/
- Other new APIs: llround, llroundf, madvise,
pthread_yield. Export program_invocation_name,
program_invocation_short_name. Support TIOCGPGRP, TIOCSPGRP ioctls.
What's new and what changed from 1.7.6 to
1.7.7
- Partially revert the 1.7.6 change to set the Win32
current working directory (CWD) always to an invalid directory, since it
breaks backward compatibility too much. The Cygwin CWD and the Win32 CWD
are now kept in sync again, unless the Cygwin CWD is not usable as Win32
CWD. See the reworked the section called
“Using the Win32 file API in Cygwin applications” for details.
- Make sure to follow the Microsoft security advisory
concerning DLL hijacking. See the Microsoft Security Advisory (2269637) "Insecure Library
Loading Could Allow Remote Code Execution" for details.
- Allow to link against -lbinmode instead of
/lib/binmode.o. Same for -ltextmode, -ltextreadmode and -lautomode. See the section called “Programming” for details.
What's new and what changed from 1.7.5 to
1.7.6
- Add new mount options "dos" and
"ihash" to allow overriding Cygwin default behaviour on broken
filesystems not recognized by Cygwin.
- Add new mount option "bind" to allow
remounting parts of the POSIX file hirarchy somewhere else.
- Ttys and ptys are handled as securable objects using
file-like permissions and owner/group information. chmod and chown
now work on ttys/ptys. A new mechanism is used to propagate pty handles
safely to other processes, which does not require to use Cygserver.
- Pass on coresize settings made with setrlimit(2).
This allows shells to disable creating stackdump files in child processes
via
ulimit -c 0
in bash or
limit coredumpsize 0
in tcsh.
- Locale categories contain all localization strings
additionally as wide-char strings. locale(1) prints these values just as
on Linux. nl_langinfo(3) allows to fetch them.
- New interfaces mkostemp(3) and mkostemps(3) are
added.
- New virtual file /proc/filesystems.
- clock_gettime(3) and clock_getres(3) accept
CLOCK_MONOTONIC.
- DEPRECATED with 1.7.7: Cygwin handles the current
working directory entirely on its own. The Win32 current working directory
is set to an invalid path to be out of the way. [...]
What's new and what changed from 1.7.3 to
1.7.5
- Support for DEC Backarrow Key Mode escape sequences
(ESC [ ? 67 h, ESC [ ? 67 l) in Windows console.
What's new and what changed from 1.7.2 to
1.7.3
- Support for GB2312/EUC-CN. These charsets are
implemented as aliases to GBK. GB2312 is now the default charset name for
the locales zh_CN and zh_SG, just as on Linux.
- Modification and access timestamps of devices reflect
the current time.
What's new and what changed from 1.7.1 to
1.7.2
- Localization support has been much improved.
- Cygwin now handles locales using the underlying
Windows locale support. The locale must exist in Windows to be
recognized. Locale aliases from the file /usr/share/locale/locale.alias
are also allowed, as long as their replacement is supported by the underlying
Windows.
- New tool "locale" to fetch locale
information and default locales based on the Windows default settings as
well as lists of all supported locales and character sets.
- Default charset for locales without explicit
charset is now chosen from a list of Linux-compatible charsets.
For instance: en_US ->
ISO-8859-1, ja_JP -> EUC-JP, zh_TW -> Big5.
- Added support for the charsets GEORGIAN-PS, PT154,
and TIS-620.
- Support for the various locale modifiers to switch
charsets as on Linux.
- Default charset in the "C" or
"POSIX" locale has been changed back from UTF-8 to ASCII, to
avoid problems with applications expecting a singlebyte charset in the
"C"/"POSIX" locale. Still use UTF-8 internally for
filename conversion in this case.
- LC_COLLATE, LC_MONETARY, LC_NUMERIC, and LC_TIME
localization is enabled via Windows locale support. LC_MESSAGES is
enabled via a big table with localized strings.
- fnmatch(3), regcomp(3), regexec(3) calls are now
multibyte-aware.
- printf(3), wprintf(3) families of functions now
handle the grouping flag, the apostrophe ', per POSIX-1.2008. The integer
portion of the result of a decimal conversion (%i, %d, %u, %f, %F, %g,
%G) will be formatted with thousands' grouping characters.
- strftime(3), wcsftime(3), and strptime(3) now handle
the E and O format modifiers to print/scan alternative date and time
representations or to use alternative digits in locales which support
this. Additionally these functions now also support the padding modifiers
'0' and '+', as well as a field width per POSIX-1.2008.
- New strfmon(3) call.
- Support open(2) flags O_CLOEXEC and O_TTY_INIT flags.
Support fcntl flag F_DUPFD_CLOEXEC. Support socket flags SOCK_CLOEXEC and
SOCK_NONBLOCK. Add new Linux-compatible API calls accept4(2), dup3(2), and
pipe2(2). Support the signal SIGPWR.
- Enhanced Windows console support.
- The console's backspace keycode can be changed
using 'stty erase'.
- Function keys send distinguished escape sequences
compatible with rxvt. Keypad keys send distinguished escape sequences,
xterm-style.
- Support of combining Alt and AltGr modifiers in
console window (compatible with xterm and mintty), so that e.g. Alt-@
sends ESC @ also on keyboards where @ is mapped to an AltGr combination.
- Report mouse wheel scroll events in mouse reporting
mode 1000 (note: this doesn't seem to work on all systems, assumedly due
to driver interworking issues). Add mouse reporting mode 1002 to report
mouse drag movement. Add mouse reporting mode 1003 to report any mouse
movement. Add focus event reporting (mode 1004), compatible with xterm
and mintty.
- Add escape sequences for not bold (22), not
invisible (28), not blinking (25) (compatible with xterm and mintty).
- Support VT100 line drawing graphics mode in console
window (compatible with xterm and mintty).
- Handle native DOS paths always as if mounted with
"posix=0,noacl".
- Handle UNC paths starting with slashes identical to
/cygdrive paths. In other words, use the /cygdrive mount flags for these
paths as well.
- Recognize NWFS filesystem and workaround broken OS
call.
- New support for eXtensible Data Record (XDR) encoding
and decoding, as defined by RFCs 1014, 1832, and 4506. The XDR protocol
and functions are useful for cross-platfrom data exchange, and are commonly
used as the core data interchange format for Remote Procedure Call (RPC)
and NFS.
OS related
changes
- Windows 95, 98 and Me are not supported anymore. The
new Cygwin 1.7 DLL will not run on any of these systems.
- Add support for Windows 7 and Windows Server 2008 R2.
File Access
related changes
- Mount points are no longer stored in the registry.
Use /etc/fstab and /etc/fstab.d/$USER instead. Mount points created with
mount(1) are only local to the current session and disappear when the last
Cygwin process in the session exits.
- Cygwin creates the mount points for /, /usr/bin, and
/usr/lib automatically from it's own position on the disk. They don't have
to be specified in /etc/fstab.
- If a filename cannot be represented in the current
character set, the character will be converted to a sequence Ctrl-X +
UTF-8 representation of the character. This allows to access all files,
even those not having a valid representation of their filename in the
current character set. To always have a valid string, use the UTF-8
charset by setting the environment variable $LANG, $LC_ALL, or $LC_CTYPE
to a valid POSIX value, such as "en_US.UTF-8".
- PATH_MAX is now 4096. Internally, path names can be
as long as the underlying OS can handle (32K).
- struct dirent now supports d_type, filled out with
DT_REG or DT_DIR. All other file types return as DT_UNKNOWN for
performance reasons.
- The CYGWIN environment variable options
"ntsec" and "smbntsec" have been replaced by the
per-mount option "acl"/"noacl".
- The CYGWIN environment variable option
"ntea" has been removed without substitute.
- The CYGWIN environment variable option
"check_case" has been removed in favor of real case-sensitivity
on file systems supporting it.
- Creating filenames with special DOS characters
'"', '*', ':', '<', '>', '|' is supported.
- Creating files with special DOS device filename
components ("aux", "nul", "prn") is
supported.
- File names are case sensitive if the OS and the
underlying file system supports it. Works on NTFS and NFS. Does not work
on FAT and Samba shares. Requires to change a registry key (see the User's
Guide). Can be switched off on a per-mount basis.
- Due to the above changes, managed mounts have been
removed.
- Incoming DOS paths are always handled
case-insensitive and get no POSIX permission, as if they are mounted with
noacl,posix=0 mount flags.
- unlink(2) and rmdir(2) try very hard to remove
files/directories even if they are currently accessed or locked. This is
done by utilizing the hidden recycle bin directories and marking the files
for deletion.
- rename(2) rewritten to be more POSIX conformant.
- access(2) now performs checks using the real user ID,
as required by POSIX; the old behavior of querying based on effective user
ID is available through the new faccessat(2) and euidaccess(2) APIs.
- Add st_birthtim member to struct stat.
- File locking is now advisory, not mandatory anymore.
The fcntl(2) and the new lockf(2) APIs create and maintain locks with
POSIX semantics, the flock(2) API creates and maintains locks with BSD
semantics. POSIX and BSD locks are independent of each other.
- Implement atomic O_APPEND mode.
- New open(2) flags O_DIRECTORY, O_EXEC and O_SEARCH.
- Make the "plain file with SYSTEM attribute
set" style symlink default again when creating symlinks. Only create
Windows shortcut style symlinks if CYGWIN=winsymlinks is set in the
environment.
- Symlinks now use UTF-16 encoding for the target
filename for better internationalization support. Cygwin 1.7 can read all
old style symlinks, but the new style is not compatible with older Cygwin
releases.
- Handle NTFS native symlinks available since
Vista/2008 as symlinks (but don't create Vista/2008 symlinks due to
unfortunate OS restrictions).
- Recognize NFS shares and handle them using native
mechanisms. Recognize and create real symlinks on NFS shares. Get correct
stat(2) information and set real mode bits on open(2), mkdir(2) and
chmod(2).
- Recognize MVFS and workaround problems manipulating
metadata and handling DOS attributes.
- Recognize Netapp DataOnTap drives and fix inode
number handling.
- Recognize Samba version beginning with Samba 3.0.28a
using the new extended version information negotiated with the Samba
developers.
- Stop faking hardlinks by copying the file on
filesystems which don't support hardlinks natively (FAT, FAT32, etc.).
Just return an error instead, just like Linux.
- List servers of all accessible domains and workgroups
in // instead of just the servers in the own domain/workgroup.
- Support Linux-like extended attributes ([fl]getxattr,
[fl]listxattr, [fl]setxattr, [fl]removexattr).
- New file conversion API for conversion from Win32 to
POSIX path and vice versa (cygwin_conv_path, cygwin_create_path,
cygwin_conv_path_list).
- New openat family of functions: openat, faccessat,
fchmodat, fchownat, fstatat, futimesat, linkat, mkdirat, mkfifoat,
mknodat, readlinkat, renameat, symlinkat, unlinkat.
- Other new APIs: posix_fadvise, posix_fallocate,
funopen, fopencookie, open_memstream, open_wmemstream, fmemopen,
fdopendir, fpurge, mkstemps, eaccess, euidaccess, canonicalize_file_name,
fexecve, execvpe.
Network related
changes
- New implementation for blocking sockets and select on
sockets which is supposed to allow POSIX-compatible sharing of sockets
between threads and processes.
- send/sendto/sendmsg now send data in 64K chunks to
circumvent an internal buffer problem in WinSock (KB 201213).
- New send/recv option MSG_DONTWAIT.
- IPv6 support. New APIs getaddrinfo, getnameinfo,
freeaddrinfo, gai_strerror, in6addr_any, in6addr_loopback. On IPv6-less
systems, replacement functions are available for IPv4. On systems with
IPv6 enabled, the underlying WinSock functions are used. While I tried
hard to get the functionality as POSIXy as possible, keep in mind that a
*fully* conformant implementation of getaddrinfo and other stuff is only
available starting with Windows Vista/2008.
- Resolver functions (res_init, res_query, res_search,
res_querydomain, res_mkquery, res_send, dn_comp, dn_expand) are now part
of Cygwin. Applications don't have to link against minires anymore.
Actually, this *is* the former libminires.a.
- rcmd is now implemented inside of Cygwin, instead of
calling the WinSock function. This allows rsh(1) usage on Vista/2008 and
later, which dropped this function from WinSock.
- Define multicast structures in netinet/in.h. Note
that fully conformant multicast support is only available beginning with
Vista/2008.
- Improve get_ifconf. Redefine struct ifreq and subsequent
datastructures to be able to keep more information. Support SIOCGIFINDEX,
SIOCGIFDSTADDR and the Cygwin specific SIOCGIFFRNDLYNAM. Support real
interface flags on systems supporting them.
- Other new APIs: bindresvport, bindresvport_sa,
gethostbyname2, iruserok_sa, rcmd_af, rresvport_af. getifaddrs,
freeifaddrs, if_nametoindex, if_indextoname, if_nameindex,
if_freenameindex.
- Add /proc/net/if_inet6.
Device
related changes
- Reworked pipe implementation which uses overlapped IO
to create more reliable interruptible pipes and fifos.
- The CYGWIN environment variable option
"binmode" has been removed.
- Improved fifo handling by using native Windows named
pipes.
- Detect when a stdin/stdout which looks like a pipe is
really a tty. Among other things, this allows a debugged application to
recognize that it is using the same tty as the debugger.
- Support UTF-8 in console window.
- In the console window the backspace key now emits DEL
(0x7f) instead of BS (0x08), Alt-Backspace emits ESC-DEL (0x1b,0x7f)
instead of DEL (0x7f), same as the Linux console and xterm. Control-Space
now emits an ASCII NUL (0x0) character.
- Support up to 64 serial interfaces using /dev/ttyS0 -
/dev/ttyS63.
- Support up to 128 raw disk drives /dev/sda -
/dev/sddx.
- New API: cfmakeraw, get_avphys_pages, get_nprocs,
get_nprocs_conf, get_phys_pages, posix_openpt.
Other POSIX related
changes
- A lot of character sets are supported now via a call
to setlocale(). The setting of the environment variables $LANG, $LC_ALL or
$LC_CTYPE will be used. For instance, setting $LANG to
"de_DE.ISO-8859-15" before starting a Cygwin session will use
the ISO-8859-15 character set in the entire session. The default locale in
the absence of one of the aforementioned environment variables is
"C.UTF-8".
The full list of supported
character sets: "ASCII", "ISO-8859-x" with x in 1-16,
except 12, "UTF-8", Windows codepages "CPxxx", with xxx in
(437, 720, 737, 775, 850, 852, 855, 857, 858, 862, 866, 874, 1125, 1250, 1251,
1252, 1253, 1254, 1255, 1256, 1257, 1258), "KOI8-R",
"KOI8-U", "SJIS", "GBK", "eucJP",
"eucKR", and "Big5".
- Allow multiple concurrent read locks per thread for
pthread_rwlock_t.
- Implement pthread_kill(thread, 0) as per POSIX.
- New API for POSIX IPC: Named semaphores: sem_open,
sem_close, sem_unlink. Message queues: mq_open, mq_getattr, mq_setattr,
mq_notify, mq_send, mq_timedsend, mq_receive, mq_timedreceive, mq_close,
mq_unlink. Shared memory: shm_open, shm_unlink.
- Only declare expected functions in <strings.h>,
don't include< string.h> from here.
- Support for WCONTINUED, WIFCONTINUED() added to
waitpid and wait4.
- New APIs: _Exit, confstr, insque, remque,
sys_sigabbrev, posix_madvise, posix_memalign, reallocf, exp10, exp10f, pow10,
pow10f, lrint, lrintf, rint, rintf, llrint, llrintf, llrintl, lrintl,
rintl, mbsnrtowcs, strcasestr, stpcpy, stpncpy, wcpcpy, wcpncpy, wcsnlen,
wcsnrtombs, wcsftime, wcstod, wcstof, wcstoimax, wcstok, wcstol, wcstoll,
wcstoul, wcstoull, wcstoumax, wcsxfrm, wcscasecmp, wcsncasecmp, fgetwc,
fgetws, fputwc, fputws, fwide, getwc, getwchar, putwc, putwchar, ungetwc,
asnprintf, dprintf, vasnprintf, vdprintf, wprintf, fwprintf, swprintf,
vwprintf, vfwprintf, vswprintf, wscanf, fwscanf, swscanf, vwscanf, vfwscanf,
vswscanf.
Security
related changes
- Getting a domain user's groups is hopefully more
bulletproof now.
- Cygwin now comes with a real LSA authentication
package. This must be manually installed by a privileged user using the
/bin/cyglsa-config script. The advantages and disadvantages are noted in
http://cygwin.com/ml/cygwin-developers/2006-11/msg00000.html
- Cygwin now allows storage and use of user passwords
in a hidden area of the registry. This is tried first when Cygwin is
called by privileged processes to switch the user context. This allows,
for instance, ssh public key sessions with full network credentials to
access shares on other machines.
- New options have been added to the mkpasswd and
mkgroup tools to ease use in multi-machine and multi-domain environments.
The existing options have a slightly changed behaviour.
Miscellaneous
- New ldd utility, similar to Linux.
- New link libraries libdl.a, libresolv.a, librt.a.
- Fallout from the long path names: If the current
working directory is longer than 260 bytes, or if the current working
directory is a virtual path (like /proc, /cygdrive, //server), don't call
native Win32 programs since they don't understand these paths.
- On the first usage of a DOS path (C:\foo, \\foo\bar),
the Cygwin DLL emits a scary warning that DOS paths shouldn't be used.
This warning may be disabled via the new CYGWIN=nodosfilewarning setting.
- The CYGWIN environment variable option
"server" has been removed. Cygwin automatically uses cygserver
if it's available.
- Allow environment of arbitrary size instead of a
maximum of 32K.
- Don't force uppercase environment when started from a
non-Cygwin process. Except for certain Windows and POSIX variables which
are always uppercased, preserve environment case. Switch back to old
behaviour with the new CYGWIN=upcaseenv setting.
- Detect and report a missing DLL on process startup.
- Add /proc/registry32 and /proc/registry64 paths to
access 32 bit and 64 bit registry on 64 bit systems.
- Add the ability to distinguish registry keys and
registry values with the same name in the same registry subtree. The key
is called "foo" and the value will be called "foo%val"
in this case.
- Align /proc/cpuinfo more closly to Linux content.
- Add /proc/$PID/mounts entries and a symlink
/proc/mounts pointing to /proc/self/mounts as on Linux.
- Optimized strstr and memmem implementation.
- Remove backwards compatibility with old signal masks.
(Some *very* old programs which use signal masks may no longer work
correctly).
- Cygwin now exports wrapper functions for libstdc++
operators new and delete, to support the toolchain in implementing full
C++ standards conformance when working with shared libraries.
- Different Cygwin installations in different paths can
be run in parallel without knowing of each other. The path of the Cygwin
DLL used in a process is a key used when creating IPC objects. So
different Cygwin DLLs are running in different namespaces.
- Each Cygwin DLL stores its path and installation key
in the registry. This allows troubleshooting of problems which could be a
result of having multiple concurrent Cygwin installations.
HOW DO YOU COMPILE AND RUN A PROGRAM WITH IT
Jisajili kwenye:
Machapisho (Atom)