Get the content charset from header and html content-type.
$ npm install charset --save
var charset = require('charset');
var http = require('http');
http.get('http://nodejs.org', function (res) {
res.on('data', function (chunk) {
console.log(charset(res.headers, chunk));
// or `console.log(charset(res, chunk));`
res.destroy();
});
});
Stdout will should log: utf8
.
charset(res.headers['content-type']);
Detect combine with jschardet
As you know, charset
only detect from http response headers and html content-type meta tag.
You can combine with jschardet to help you detect the finally charset.
This example codes come from stackoverflow#12326688:
var request = require('request');
var charset = require('charset');
var jschardet = require('jschardet');
request({
url: 'http://www.example.com',
encoding: null
}, function (err, res, body) {
if (err) {
throw err;
}
enc = charset(res.headers, body);
enc = enc || jschardet.detect(body).encoding.toLowerCase();
console.log(enc);
});