1 #!/opt/isis/tcl/bin/tcl
5 # split a message into parts
6 # create a new message from selected headers and textified parts
7 # keep original non-text parts in files
8 # qualify cleaned message by dbacl
10 # in mime.tcl, BE SURE to remove the comment from the line
11 # error "termination string missing in $state(content)"
12 # cf. BUG 631314 "infinite loop on bad data"
14 # unterminated multipart is very frequent in SPAM,
15 # be VERY rare in non-automated mail
16 # when not erroring out, mime.tcl may hang
18 namespace import ::mime::*;
20 proc mail2txt file { exec sed -e {1,/^$/d} <$file >$file.txt }
21 proc html2txt file { exec lynx -dump -force_html -nolog $file >$file.txt }
22 proc winl2txt file { # stupid M$ "Cp1252"
23 exec tr {\200-\237} {E?,f".+#^%S<Ö?Z??''''""*--"Ts>ö?zY} <$file \
27 exec tr {\200-\237} {E?,f".+#^%S<Ö?Z??''''""*--"Ts>ö?zY} <$file >$file.txt
29 proc long2txt file { exec fmt -s <$file >$file.txt }
30 proc txt2txt file { file rename $file $file.txt }
31 proc pdf2txt file { exec pdftotext -raw $file - >$file.txt }
33 exec unrtf --nopict --html $file 2>/dev/null \
34 | lynx -dump -force_html -nolist /proc/self/fd/0 >$file.txt
36 proc doc2txt file { exec antiword $file >$file.txt }
39 # - figure out real file type
40 # - figure out an extension so that webserver will deliver with proper ctype
41 # - in any case, rename or convert file to that extension
42 # - for some "text" types, also create additional plain text version
43 proc fixpart {file {ctype ""}} {
44 # figure out file type
46 # shouldn't harm to believe image
47 switch -glob -- $ctype \
48 image/*gif { set ftype "GIF image" } \
49 image/*jpeg { set ftype "JPEG image" } \
50 image/*png { set ftype "PNG image"
53 set ftype [exec file -b $file]
55 # figure out extension and fix from file type
59 switch -glob -- $ftype \
60 "*mail text*" { set fix [set ext mail]2txt } \
61 *HTML* { # maybe ASCII or ISO-8859 HTML document text, so check before ASCII
62 set fix [set ext html]2txt } \
63 "*Non-ISO extended-ASCII*long lines" { # assume windows stuff
65 "*Non-ISO extended-ASCII*" { set fix win2txt } \
66 "ASCII*text*long lines" - "ISO-8859*text*long lines" { set fix long2txt } \
67 ASCII*text* - ISO-8859*text* {} \
68 PDF* { set fix [set ext pdf]2txt } \
69 "Rich Text Format*" { set fix [set ext rtf]2txt } \
70 "Microsoft Office Document*" { set fix [set ext doc]2txt } \
71 "*GIF image*" { set ext gif } \
72 "*JPEG image*" { set ext jpg } \
73 "*PC bitmap*" { set ext bmp } \
74 "*PNG image*" { set ext png } \
75 "*TIFF image*" { set ext tiff } \
76 Zip* { set ext zip } \
77 gzip* { set ext gz } \
78 *image* { set ext img } \
79 default { set ext bin }
80 ::puts stderr "file $file; c=\"$ctype\"; f=\"$ftype;\" e=$ext; fix=$fix"
81 # if we don't have no fix,
82 # or can not successfully apply a fix, we stick with the original
83 if {"" == $fix || [catch {$fix $file}]} {
85 ::puts stderr "FIXPART $file:\t$::errorInfo"
86 catch {file delete $file.txt}
88 file rename $file $file.$ext
91 # if the original is represented by a non-destructive txt fix,
92 # delete it and keep only the fix
97 # we have a successfull non-equivalence fix -- keep both
98 file rename $file $file.$ext
103 # operate on array env
104 # file => base filename
105 # attc => saved attachments (list of extensions and original ctypes)
106 # part => list of plaintext part tokens
107 proc saveparts {envname msg} {
109 if [catch {getproperty $msg content} ctype] {
110 set ctype "text/plain"
113 if ![catch {getproperty $msg parts} parts] {
114 if {"multipart/alternative" == $ctype} { # select one
115 set select [lindex $parts 0]
117 if {![catch {getproperty $msg ctype} pc] && "text/plain" == $pc } {
122 set parts [list $select]
124 # TODO: also care for mp/apple-double
130 # a leave is not plain or too long: save as $file.i, i=0,1..
131 set file $env(file).[expr [llength $env(attc)]/2]
132 set fid [open $file w]
133 fconfigure $fid -translation binary
134 puts -nonewline $fid [getbody $msg]
136 # check and fix the file, get primary and secondary extension
137 foreach {ext sec} [fixpart $file $ctype] break
138 # optionally create new part
139 if {"txt" == $ext} { # create new text/plain part
140 set fid [open $file.txt]
141 fconfigure $fid -translation binary
142 set body [read $fid 8000]
144 set complete [expr 8000 >= [file size $file.txt]]
145 set charset ISO-8859-1
146 if {"text/plain" == $ctype} {
147 foreach {key val} [getproperty $msg params] {
148 if {"charset" == $key} { set charset $val }
151 lappend env(part) [initialize -canonical text/plain \
152 -param [list charset $charset] \
153 -encoding quoted-printable -string $body]
154 if {$complete || "" != $sec} {
155 file delete $file.txt
157 if {"" != $sec} { # use that as attachment
159 } elseif {$complete} { # no attachment
163 lappend env(attc) $ext $ctype
167 proc fixmsg {file dir link} {
168 set msg [initialize -file $dir$file]
169 array set env [list file $dir$file part "" attc ""]
171 ::puts stderr "$dir$file\t$env(attc)\t$env(part)"
172 if {"" != $env(attc)} { # create referring part
174 if ![file isdirectory .attc] { file mkdir .attc }
176 foreach {ext ctype} $env(attc) {
177 set f $file.[incr i].$ext
178 file rename $dir$f .attc
179 append body "$link$f\t$ctype\n"
181 lappend env(part) [initialize -canonical text/plain \
182 -param {charset US-ASCII} -encoding 7bit -string $body]
184 if {1 == [llength $env(part)]} {
185 set nmsg [lindex $env(part) 0]
187 set nmsg [initialize -canonical multipart/mixed -parts $env(part)]
189 foreach {key vallist} [getheader $msg] {
190 foreach val $vallist {
191 # ::puts stderr "$key $val"
192 switch -- [string tolower $key] \
193 subject - date - from - to - cc - delivered-to - \
194 message-id - references - in-reply-to {
195 setheader $nmsg $key $val -mode append
199 if ![file isdirectory .orig] { file mkdir .orig }
200 file rename $dir$file .orig
202 set fid [open $dir$file w]
203 copymessage $nmsg $fid
210 foreach {file dir base link} $argv break
216 } elseif {[file isdirectory tmp]} {
219 if {"-" == $file || "" == $file} {
220 set file [clock format [clock seconds] -format %Y%m%d%H%M%S -gmt 1]
223 while {[catch {open $dir$file {WRONLY CREAT EXCL}} fid]} {
225 error "too many tries on $dir$file"
227 set file $base-[incr try]
229 while {![eof stdin]} {
230 puts -nonewline $fid [read stdin 8192]
235 if [catch {fixmsg $file $dir $link} err] {
236 # some errors have 99.99% SPAM probability
237 switch -glob -- $err \
238 "termination string missing in multipart*" - \
239 "multiple Content-Type fields*" {
240 set spam "FIXMSG $err"
242 ::puts stderr "FIXMSG $file:\t$::errorInfo"
244 if ![file isdirectory .spam] { file mkdir .spam }
246 catch {exec dbacl -c .cat/ok -c .cat/spam $dir$file}
247 if {2 != [scan $::errorCode "CHILDSTATUS %d %d" pid code]} {
248 ::puts stderr "OOPS! could not scan '$::errorCode'"
249 } elseif {2 == $code} {
254 ::puts stderr "FIXSPAM\t$file\t$spam"
255 file rename $dir$file .spam
256 # collect some headers even if message parsing failed
257 # for quick scan, ignore continuation lines
258 set headers "$file\t$spam\n"
259 set fid [open .spam/$file]
260 while {[gets $fid line]} {
261 if [regexp {^([\w\-]+):\s*(.*)} $line - key val] {
262 switch -- [string tolower $key] \
263 from - to - subject - delivered-to {
264 append headers $line "\n"
269 set fid [open .spam/.log {WRONLY APPEND CREAT}]
270 puts $fid $headers ;# let puts append a newline
274 if [file isdirectory new] {
275 file rename $dir$file new