index.js 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628
  1. /* eslint no-control-regex:0 */
  2. 'use strict';
  3. const base64 = require('../base64');
  4. const qp = require('../qp');
  5. const mimeTypes = require('./mime-types');
  6. module.exports = {
  7. /**
  8. * Checks if a value is plaintext string (uses only printable 7bit chars)
  9. *
  10. * @param {String} value String to be tested
  11. * @returns {Boolean} true if it is a plaintext string
  12. */
  13. isPlainText(value) {
  14. if (typeof value !== 'string' || /[\x00-\x08\x0b\x0c\x0e-\x1f\u0080-\uFFFF]/.test(value)) {
  15. return false;
  16. } else {
  17. return true;
  18. }
  19. },
  20. /**
  21. * Checks if a multi line string containes lines longer than the selected value.
  22. *
  23. * Useful when detecting if a mail message needs any processing at all –
  24. * if only plaintext characters are used and lines are short, then there is
  25. * no need to encode the values in any way. If the value is plaintext but has
  26. * longer lines then allowed, then use format=flowed
  27. *
  28. * @param {Number} lineLength Max line length to check for
  29. * @returns {Boolean} Returns true if there is at least one line longer than lineLength chars
  30. */
  31. hasLongerLines(str, lineLength) {
  32. if (str.length > 128 * 1024) {
  33. // do not test strings longer than 128kB
  34. return true;
  35. }
  36. return new RegExp('^.{' + (lineLength + 1) + ',}', 'm').test(str);
  37. },
  38. /**
  39. * Encodes a string or an Buffer to an UTF-8 MIME Word (rfc2047)
  40. *
  41. * @param {String|Buffer} data String to be encoded
  42. * @param {String} mimeWordEncoding='Q' Encoding for the mime word, either Q or B
  43. * @param {Number} [maxLength=0] If set, split mime words into several chunks if needed
  44. * @return {String} Single or several mime words joined together
  45. */
  46. encodeWord(data, mimeWordEncoding, maxLength) {
  47. mimeWordEncoding = (mimeWordEncoding || 'Q')
  48. .toString()
  49. .toUpperCase()
  50. .trim()
  51. .charAt(0);
  52. maxLength = maxLength || 0;
  53. let encodedStr;
  54. let toCharset = 'UTF-8';
  55. if (maxLength && maxLength > 7 + toCharset.length) {
  56. maxLength -= 7 + toCharset.length;
  57. }
  58. if (mimeWordEncoding === 'Q') {
  59. // https://tools.ietf.org/html/rfc2047#section-5 rule (3)
  60. encodedStr = qp.encode(data).replace(/[^a-z0-9!*+\-/=]/gi, chr => {
  61. let ord = chr
  62. .charCodeAt(0)
  63. .toString(16)
  64. .toUpperCase();
  65. if (chr === ' ') {
  66. return '_';
  67. } else {
  68. return '=' + (ord.length === 1 ? '0' + ord : ord);
  69. }
  70. });
  71. } else if (mimeWordEncoding === 'B') {
  72. encodedStr = typeof data === 'string' ? data : base64.encode(data);
  73. maxLength = maxLength ? Math.max(3, ((maxLength - (maxLength % 4)) / 4) * 3) : 0;
  74. }
  75. if (maxLength && (mimeWordEncoding !== 'B' ? encodedStr : base64.encode(data)).length > maxLength) {
  76. if (mimeWordEncoding === 'Q') {
  77. encodedStr = this.splitMimeEncodedString(encodedStr, maxLength).join('?= =?' + toCharset + '?' + mimeWordEncoding + '?');
  78. } else {
  79. // RFC2047 6.3 (2) states that encoded-word must include an integral number of characters, so no chopping unicode sequences
  80. let parts = [];
  81. let lpart = '';
  82. for (let i = 0, len = encodedStr.length; i < len; i++) {
  83. let chr = encodedStr.charAt(i);
  84. // check if we can add this character to the existing string
  85. // without breaking byte length limit
  86. if (Buffer.byteLength(lpart + chr) <= maxLength || i === 0) {
  87. lpart += chr;
  88. } else {
  89. // we hit the length limit, so push the existing string and start over
  90. parts.push(base64.encode(lpart));
  91. lpart = chr;
  92. }
  93. }
  94. if (lpart) {
  95. parts.push(base64.encode(lpart));
  96. }
  97. if (parts.length > 1) {
  98. encodedStr = parts.join('?= =?' + toCharset + '?' + mimeWordEncoding + '?');
  99. } else {
  100. encodedStr = parts.join('');
  101. }
  102. }
  103. } else if (mimeWordEncoding === 'B') {
  104. encodedStr = base64.encode(data);
  105. }
  106. return '=?' + toCharset + '?' + mimeWordEncoding + '?' + encodedStr + (encodedStr.substr(-2) === '?=' ? '' : '?=');
  107. },
  108. /**
  109. * Finds word sequences with non ascii text and converts these to mime words
  110. *
  111. * @param {String} value String to be encoded
  112. * @param {String} mimeWordEncoding='Q' Encoding for the mime word, either Q or B
  113. * @param {Number} [maxLength=0] If set, split mime words into several chunks if needed
  114. * @param {Boolean} [encodeAll=false] If true and the value needs encoding then encodes entire string, not just the smallest match
  115. * @return {String} String with possible mime words
  116. */
  117. encodeWords(value, mimeWordEncoding, maxLength, encodeAll) {
  118. maxLength = maxLength || 0;
  119. let encodedValue;
  120. // find first word with a non-printable ascii in it
  121. let firstMatch = value.match(/(?:^|\s)([^\s]*[\u0080-\uFFFF])/);
  122. if (!firstMatch) {
  123. return value;
  124. }
  125. if (encodeAll) {
  126. // if it is requested to encode everything or the string contains something that resebles encoded word, then encode everything
  127. return this.encodeWord(value, mimeWordEncoding, maxLength);
  128. }
  129. // find the last word with a non-printable ascii in it
  130. let lastMatch = value.match(/([\u0080-\uFFFF][^\s]*)[^\u0080-\uFFFF]*$/);
  131. if (!lastMatch) {
  132. // should not happen
  133. return value;
  134. }
  135. let startIndex =
  136. firstMatch.index +
  137. (
  138. firstMatch[0].match(/[^\s]/) || {
  139. index: 0
  140. }
  141. ).index;
  142. let endIndex = lastMatch.index + (lastMatch[1] || '').length;
  143. encodedValue =
  144. (startIndex ? value.substr(0, startIndex) : '') +
  145. this.encodeWord(value.substring(startIndex, endIndex), mimeWordEncoding || 'Q', maxLength) +
  146. (endIndex < value.length ? value.substr(endIndex) : '');
  147. return encodedValue;
  148. },
  149. /**
  150. * Joins parsed header value together as 'value; param1=value1; param2=value2'
  151. * PS: We are following RFC 822 for the list of special characters that we need to keep in quotes.
  152. * Refer: https://www.w3.org/Protocols/rfc1341/4_Content-Type.html
  153. * @param {Object} structured Parsed header value
  154. * @return {String} joined header value
  155. */
  156. buildHeaderValue(structured) {
  157. let paramsArray = [];
  158. Object.keys(structured.params || {}).forEach(param => {
  159. // filename might include unicode characters so it is a special case
  160. // other values probably do not
  161. let value = structured.params[param];
  162. if (!this.isPlainText(value) || value.length >= 75) {
  163. this.buildHeaderParam(param, value, 50).forEach(encodedParam => {
  164. if (!/[\s"\\;:/=(),<>@[\]?]|^[-']|'$/.test(encodedParam.value) || encodedParam.key.substr(-1) === '*') {
  165. paramsArray.push(encodedParam.key + '=' + encodedParam.value);
  166. } else {
  167. paramsArray.push(encodedParam.key + '=' + JSON.stringify(encodedParam.value));
  168. }
  169. });
  170. } else if (/[\s'"\\;:/=(),<>@[\]?]|^-/.test(value)) {
  171. paramsArray.push(param + '=' + JSON.stringify(value));
  172. } else {
  173. paramsArray.push(param + '=' + value);
  174. }
  175. });
  176. return structured.value + (paramsArray.length ? '; ' + paramsArray.join('; ') : '');
  177. },
  178. /**
  179. * Encodes a string or an Buffer to an UTF-8 Parameter Value Continuation encoding (rfc2231)
  180. * Useful for splitting long parameter values.
  181. *
  182. * For example
  183. * title="unicode string"
  184. * becomes
  185. * title*0*=utf-8''unicode
  186. * title*1*=%20string
  187. *
  188. * @param {String|Buffer} data String to be encoded
  189. * @param {Number} [maxLength=50] Max length for generated chunks
  190. * @param {String} [fromCharset='UTF-8'] Source sharacter set
  191. * @return {Array} A list of encoded keys and headers
  192. */
  193. buildHeaderParam(key, data, maxLength) {
  194. let list = [];
  195. let encodedStr = typeof data === 'string' ? data : (data || '').toString();
  196. let encodedStrArr;
  197. let chr, ord;
  198. let line;
  199. let startPos = 0;
  200. let i, len;
  201. maxLength = maxLength || 50;
  202. // process ascii only text
  203. if (this.isPlainText(data)) {
  204. // check if conversion is even needed
  205. if (encodedStr.length <= maxLength) {
  206. return [
  207. {
  208. key,
  209. value: encodedStr
  210. }
  211. ];
  212. }
  213. encodedStr = encodedStr.replace(new RegExp('.{' + maxLength + '}', 'g'), str => {
  214. list.push({
  215. line: str
  216. });
  217. return '';
  218. });
  219. if (encodedStr) {
  220. list.push({
  221. line: encodedStr
  222. });
  223. }
  224. } else {
  225. if (/[\uD800-\uDBFF]/.test(encodedStr)) {
  226. // string containts surrogate pairs, so normalize it to an array of bytes
  227. encodedStrArr = [];
  228. for (i = 0, len = encodedStr.length; i < len; i++) {
  229. chr = encodedStr.charAt(i);
  230. ord = chr.charCodeAt(0);
  231. if (ord >= 0xd800 && ord <= 0xdbff && i < len - 1) {
  232. chr += encodedStr.charAt(i + 1);
  233. encodedStrArr.push(chr);
  234. i++;
  235. } else {
  236. encodedStrArr.push(chr);
  237. }
  238. }
  239. encodedStr = encodedStrArr;
  240. }
  241. // first line includes the charset and language info and needs to be encoded
  242. // even if it does not contain any unicode characters
  243. line = 'utf-8\x27\x27';
  244. let encoded = true;
  245. startPos = 0;
  246. // process text with unicode or special chars
  247. for (i = 0, len = encodedStr.length; i < len; i++) {
  248. chr = encodedStr[i];
  249. if (encoded) {
  250. chr = this.safeEncodeURIComponent(chr);
  251. } else {
  252. // try to urlencode current char
  253. chr = chr === ' ' ? chr : this.safeEncodeURIComponent(chr);
  254. // By default it is not required to encode a line, the need
  255. // only appears when the string contains unicode or special chars
  256. // in this case we start processing the line over and encode all chars
  257. if (chr !== encodedStr[i]) {
  258. // Check if it is even possible to add the encoded char to the line
  259. // If not, there is no reason to use this line, just push it to the list
  260. // and start a new line with the char that needs encoding
  261. if ((this.safeEncodeURIComponent(line) + chr).length >= maxLength) {
  262. list.push({
  263. line,
  264. encoded
  265. });
  266. line = '';
  267. startPos = i - 1;
  268. } else {
  269. encoded = true;
  270. i = startPos;
  271. line = '';
  272. continue;
  273. }
  274. }
  275. }
  276. // if the line is already too long, push it to the list and start a new one
  277. if ((line + chr).length >= maxLength) {
  278. list.push({
  279. line,
  280. encoded
  281. });
  282. line = chr = encodedStr[i] === ' ' ? ' ' : this.safeEncodeURIComponent(encodedStr[i]);
  283. if (chr === encodedStr[i]) {
  284. encoded = false;
  285. startPos = i - 1;
  286. } else {
  287. encoded = true;
  288. }
  289. } else {
  290. line += chr;
  291. }
  292. }
  293. if (line) {
  294. list.push({
  295. line,
  296. encoded
  297. });
  298. }
  299. }
  300. return list.map((item, i) => ({
  301. // encoded lines: {name}*{part}*
  302. // unencoded lines: {name}*{part}
  303. // if any line needs to be encoded then the first line (part==0) is always encoded
  304. key: key + '*' + i + (item.encoded ? '*' : ''),
  305. value: item.line
  306. }));
  307. },
  308. /**
  309. * Parses a header value with key=value arguments into a structured
  310. * object.
  311. *
  312. * parseHeaderValue('content-type: text/plain; CHARSET='UTF-8'') ->
  313. * {
  314. * 'value': 'text/plain',
  315. * 'params': {
  316. * 'charset': 'UTF-8'
  317. * }
  318. * }
  319. *
  320. * @param {String} str Header value
  321. * @return {Object} Header value as a parsed structure
  322. */
  323. parseHeaderValue(str) {
  324. let response = {
  325. value: false,
  326. params: {}
  327. };
  328. let key = false;
  329. let value = '';
  330. let type = 'value';
  331. let quote = false;
  332. let escaped = false;
  333. let chr;
  334. for (let i = 0, len = str.length; i < len; i++) {
  335. chr = str.charAt(i);
  336. if (type === 'key') {
  337. if (chr === '=') {
  338. key = value.trim().toLowerCase();
  339. type = 'value';
  340. value = '';
  341. continue;
  342. }
  343. value += chr;
  344. } else {
  345. if (escaped) {
  346. value += chr;
  347. } else if (chr === '\\') {
  348. escaped = true;
  349. continue;
  350. } else if (quote && chr === quote) {
  351. quote = false;
  352. } else if (!quote && chr === '"') {
  353. quote = chr;
  354. } else if (!quote && chr === ';') {
  355. if (key === false) {
  356. response.value = value.trim();
  357. } else {
  358. response.params[key] = value.trim();
  359. }
  360. type = 'key';
  361. value = '';
  362. } else {
  363. value += chr;
  364. }
  365. escaped = false;
  366. }
  367. }
  368. if (type === 'value') {
  369. if (key === false) {
  370. response.value = value.trim();
  371. } else {
  372. response.params[key] = value.trim();
  373. }
  374. } else if (value.trim()) {
  375. response.params[value.trim().toLowerCase()] = '';
  376. }
  377. // handle parameter value continuations
  378. // https://tools.ietf.org/html/rfc2231#section-3
  379. // preprocess values
  380. Object.keys(response.params).forEach(key => {
  381. let actualKey, nr, match, value;
  382. if ((match = key.match(/(\*(\d+)|\*(\d+)\*|\*)$/))) {
  383. actualKey = key.substr(0, match.index);
  384. nr = Number(match[2] || match[3]) || 0;
  385. if (!response.params[actualKey] || typeof response.params[actualKey] !== 'object') {
  386. response.params[actualKey] = {
  387. charset: false,
  388. values: []
  389. };
  390. }
  391. value = response.params[key];
  392. if (nr === 0 && match[0].substr(-1) === '*' && (match = value.match(/^([^']*)'[^']*'(.*)$/))) {
  393. response.params[actualKey].charset = match[1] || 'iso-8859-1';
  394. value = match[2];
  395. }
  396. response.params[actualKey].values[nr] = value;
  397. // remove the old reference
  398. delete response.params[key];
  399. }
  400. });
  401. // concatenate split rfc2231 strings and convert encoded strings to mime encoded words
  402. Object.keys(response.params).forEach(key => {
  403. let value;
  404. if (response.params[key] && Array.isArray(response.params[key].values)) {
  405. value = response.params[key].values.map(val => val || '').join('');
  406. if (response.params[key].charset) {
  407. // convert "%AB" to "=?charset?Q?=AB?="
  408. response.params[key] =
  409. '=?' +
  410. response.params[key].charset +
  411. '?Q?' +
  412. value
  413. // fix invalidly encoded chars
  414. .replace(/[=?_\s]/g, s => {
  415. let c = s.charCodeAt(0).toString(16);
  416. if (s === ' ') {
  417. return '_';
  418. } else {
  419. return '%' + (c.length < 2 ? '0' : '') + c;
  420. }
  421. })
  422. // change from urlencoding to percent encoding
  423. .replace(/%/g, '=') +
  424. '?=';
  425. } else {
  426. response.params[key] = value;
  427. }
  428. }
  429. });
  430. return response;
  431. },
  432. /**
  433. * Returns file extension for a content type string. If no suitable extensions
  434. * are found, 'bin' is used as the default extension
  435. *
  436. * @param {String} mimeType Content type to be checked for
  437. * @return {String} File extension
  438. */
  439. detectExtension: mimeType => mimeTypes.detectExtension(mimeType),
  440. /**
  441. * Returns content type for a file extension. If no suitable content types
  442. * are found, 'application/octet-stream' is used as the default content type
  443. *
  444. * @param {String} extension Extension to be checked for
  445. * @return {String} File extension
  446. */
  447. detectMimeType: extension => mimeTypes.detectMimeType(extension),
  448. /**
  449. * Folds long lines, useful for folding header lines (afterSpace=false) and
  450. * flowed text (afterSpace=true)
  451. *
  452. * @param {String} str String to be folded
  453. * @param {Number} [lineLength=76] Maximum length of a line
  454. * @param {Boolean} afterSpace If true, leave a space in th end of a line
  455. * @return {String} String with folded lines
  456. */
  457. foldLines(str, lineLength, afterSpace) {
  458. str = (str || '').toString();
  459. lineLength = lineLength || 76;
  460. let pos = 0,
  461. len = str.length,
  462. result = '',
  463. line,
  464. match;
  465. while (pos < len) {
  466. line = str.substr(pos, lineLength);
  467. if (line.length < lineLength) {
  468. result += line;
  469. break;
  470. }
  471. if ((match = line.match(/^[^\n\r]*(\r?\n|\r)/))) {
  472. line = match[0];
  473. result += line;
  474. pos += line.length;
  475. continue;
  476. } else if ((match = line.match(/(\s+)[^\s]*$/)) && match[0].length - (afterSpace ? (match[1] || '').length : 0) < line.length) {
  477. line = line.substr(0, line.length - (match[0].length - (afterSpace ? (match[1] || '').length : 0)));
  478. } else if ((match = str.substr(pos + line.length).match(/^[^\s]+(\s*)/))) {
  479. line = line + match[0].substr(0, match[0].length - (!afterSpace ? (match[1] || '').length : 0));
  480. }
  481. result += line;
  482. pos += line.length;
  483. if (pos < len) {
  484. result += '\r\n';
  485. }
  486. }
  487. return result;
  488. },
  489. /**
  490. * Splits a mime encoded string. Needed for dividing mime words into smaller chunks
  491. *
  492. * @param {String} str Mime encoded string to be split up
  493. * @param {Number} maxlen Maximum length of characters for one part (minimum 12)
  494. * @return {Array} Split string
  495. */
  496. splitMimeEncodedString: (str, maxlen) => {
  497. let curLine,
  498. match,
  499. chr,
  500. done,
  501. lines = [];
  502. // require at least 12 symbols to fit possible 4 octet UTF-8 sequences
  503. maxlen = Math.max(maxlen || 0, 12);
  504. while (str.length) {
  505. curLine = str.substr(0, maxlen);
  506. // move incomplete escaped char back to main
  507. if ((match = curLine.match(/[=][0-9A-F]?$/i))) {
  508. curLine = curLine.substr(0, match.index);
  509. }
  510. done = false;
  511. while (!done) {
  512. done = true;
  513. // check if not middle of a unicode char sequence
  514. if ((match = str.substr(curLine.length).match(/^[=]([0-9A-F]{2})/i))) {
  515. chr = parseInt(match[1], 16);
  516. // invalid sequence, move one char back anc recheck
  517. if (chr < 0xc2 && chr > 0x7f) {
  518. curLine = curLine.substr(0, curLine.length - 3);
  519. done = false;
  520. }
  521. }
  522. }
  523. if (curLine.length) {
  524. lines.push(curLine);
  525. }
  526. str = str.substr(curLine.length);
  527. }
  528. return lines;
  529. },
  530. encodeURICharComponent: chr => {
  531. let res = '';
  532. let ord = chr
  533. .charCodeAt(0)
  534. .toString(16)
  535. .toUpperCase();
  536. if (ord.length % 2) {
  537. ord = '0' + ord;
  538. }
  539. if (ord.length > 2) {
  540. for (let i = 0, len = ord.length / 2; i < len; i++) {
  541. res += '%' + ord.substr(i, 2);
  542. }
  543. } else {
  544. res += '%' + ord;
  545. }
  546. return res;
  547. },
  548. safeEncodeURIComponent(str) {
  549. str = (str || '').toString();
  550. try {
  551. // might throw if we try to encode invalid sequences, eg. partial emoji
  552. str = encodeURIComponent(str);
  553. } catch (E) {
  554. // should never run
  555. return str.replace(/[^\x00-\x1F *'()<>@,;:\\"[\]?=\u007F-\uFFFF]+/g, '');
  556. }
  557. // ensure chars that are not handled by encodeURICompent are converted as well
  558. return str.replace(/[\x00-\x1F *'()<>@,;:\\"[\]?=\u007F-\uFFFF]/g, chr => this.encodeURICharComponent(chr));
  559. }
  560. };