1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
|
/**
* cleanup word content
* @param content
*/
function word(content) {
var bull = String.fromCharCode(8226);
var middot = String.fromCharCode(183);
var cb;
content = content.replace(new RegExp('<p class=MsoHeading.*?>(.*?)<\/p>', 'gi'), '<p><b>$1</b></p>');
content = content.replace(new RegExp('tab-stops: list [0-9]+.0pt">', 'gi'), '">' + "--list--");
content = content.replace(new RegExp(bull + "(.*?)<BR>", "gi"), "<p>" + middot + "$1</p>");
content = content.replace(new RegExp('<SPAN style="mso-list: Ignore">', 'gi'), "<span>" + bull); // Covert to bull list
content = content.replace(/<o:p><\/o:p>/gi, "");
content = content.replace(new RegExp('<br style="page-break-before: always;.*>', 'gi'), '-- page break --'); // Replace pagebreaks
content = content.replace(new RegExp('<(!--)([^>]*)(--)>', 'g'), ""); // Word comments
content = content.replace(/<\/?span[^>]*>/gi, "");
content = content.replace(new RegExp('<(\\w[^>]*) style="([^"]*)"([^>]*)', 'gi'), "<$1$3");
content = content.replace(/<\/?font[^>]*>/gi, "");
content = content.replace(/<(\w[^>]*) class=([^ |>]*)([^>]*)/gi, "<$1$3");
content = content.replace(/<(\w[^>]*) lang=([^ |>]*)([^>]*)/gi, "<$1$3");
content = content.replace(/<\\?\?xml[^>]*>/gi, "");
content = content.replace(/<\/?\w+:[^>]*>/gi, "");
content = content.replace(/-- page break --\s*<p> <\/p>/gi, ""); // Remove pagebreaks
content = content.replace(/-- page break --/gi, ""); // Remove pagebreaks
content = content.replace(/<h[1-6]> <\/h[1-6]>/gi, '<p> </p>');
content = content.replace(/<h[1-6]>/gi, '<p><b>');
content = content.replace(/<\/h[1-6]>/gi, '</b></p>');
content = content.replace(/<b> <\/b>/gi, '<b> </b>');
content = content.replace(/^( )*/gi, '');
content = content.replace(/--list--/gi, ""); // Remove --list--
return content;
}
|