#/usr/local/bin/dbzperl ($SERVER) = 'news.utdallas.edu'; $CNEWS = 1; $TIMEOUT = 15 * 60; $LIB = '/usr/lib/news'; $INCOMING = '/news/in.coming'; #$NEWSGROUPS = 'news.*,comp.*,sci.*,ba.*,biz.*,ca.*,ddn.*,gnu.*,info.*,la.*,misc.*,news.*,rec.*,soc.*,talk.*,trial.*,alt.*,control' $NEWSGROUPS = 'news.*'; $CHOKE = 400; # allow at least 400K in partition $start = time; $todo = "todo.$SERVER"; chdir $INCOMING || die "Can't chdir to $INCOMING: $!\n"; #require "$LIB/available.pl"; #&available(".", $CHOKE) || die "not enough disk space\n"; if (open(PID, "$todo/.fetch1pid")) { chop($pid = ); if (`/bin/ps ww$pid` =~ /fetch/) { print STDERR "locked " . `date`; exit 1; } } open(PID, ">$todo/.fetch1pid"); print PID $$,"\n"; close PID; open(STDOUT,">>$todo/fetch1.log"); open(STDERR,">&STDOUT"); open(LAST, "$todo/.lasttime") || die "Can't open $INCOMING/$todo/.lasttime: $!\n"; $_ = ; close LAST; (($date, $time) = /^(9\d[01]\d[0123]\d)\s+([012]\d[0-5]\d[0-5]\d)$/) || die "Format of $INCOMING/$todo/.lasttime isn't yymmdd hhmmss\n"; opendir(TODO, "$todo") || die "Can't opendir $todo: $!\n"; while ($_ = readdir(TODO)) { $last = $_ if /^\d+$/ && $_ > $last; } closedir(TODO); if ($last =~ /^\d+$/ && open(LAST, "$todo/$last")) { $Lastmessid = $_ while ; chop($Lastmessid); close LAST; } print STDERR "LAST\t$Lastmessid\t", `date`; # Connect to the server. $pat = 'S n C4 x8'; $af_unix = 1; $af_inet = 2; $stream = 1; $datagram = 2; ($name,$aliases,$proto) = getprotobyname('tcp'); $tcp = $proto; ($name,$aliase,$port,$proto) = getservbyname('nntp','tcp'); $nntp = $port; if ($SERVER =~ /^\d+\./) { @bytes = split(/\./,$SERVER); } else { ($name,$aliases,$addrtype,$length,@addrs) = gethostbyname($SERVER); die "Can't lookup $SERVER\n" unless $name; @bytes = unpack("C4",$addrs[0]); } $this = pack($pat,$af_inet,0, 0,0,0,0); $that = pack($pat,$af_inet,$nntp,@bytes); socket(NNTP,$af_inet,$stream,$tcp) || die "socket: $!\n"; bind(NNTP,$this) || die "bind: $!\n"; connect(NNTP,$that) || die "connect: $!\n"; $SIG{PIPE} = PIPE; $SIG{ALRM} = ALRM; select(NNTP); $| = 1; select(STDERR); $| = 1; select(STDOUT); $| = 1; print STDERR "\nConnected to NNTP server at $SERVER (",join('.',@bytes),").\n"; if ($CNEWS) { require "timelocal.pl"; @tm = reverse unpack(A2A2A2A2A2A2, $date . $time); $tm[4]--; $cmpdate = &timegm(@tm); print "(time = $cmpdate)\n"; } else { $cmpdate = $date; } print "Starting: ",`date`; select(STDERR); $| = 1; select(STDOUT); $| = 1; dbmopen(DHIST,"$LIB/history",0666) || die "Can't open history dbm file: $!\n"; print STDERR "Loading history..."; open(HIST,'/usr/lib/news/history') || die "Can't open history file"; ($st_dev,$st_ino,$st_mode,$st_nlink,$st_uid,$st_gid,$st_rdev,$st_size, $st_atime,$st_mtime,$st_ctime,$st_blksize,$st_blocks) = stat(HIST); for ($offset = $st_size - 100000; $offset > 0; $offset -= 100000) { if (seek(HIST,$offset,0)) { $_ = ; # probably starts in middle of a line $_ = ; if ($CNEWS) { m| (\d+)| || next; last if $1 < $cmpdate; } else { m| (\d+)/(\d+)/(\d+)| || next; last if $3 * 10000 + $1 * 100 + $2 < $cmpdate; } } else { $offset = -1; } } seek(HIST,0,0) if $offset < 0; while () { if ($CNEWS) { m| (\d+)| || next; last if $1 >= $cmpdate; } else { m| (\d+)/(\d+)/(\d+)| || next; last if $3 * 10000 + $1 * 100 + $2 >= $cmpdate; } } $pct = int(tell(HIST) * 100 / $st_size); print STDERR "starting at $pct%..."; $pos = tell(HIST); while () { /^(<[^>]*>)/ && ($history{$1} = $pos + 0); $pos = tell(HIST); } print STDERR "done\n"; print STDERR "newnews $NEWSGROUPS $date $time GMT\n"; print NNTP "newnews $NEWSGROUPS $date $time GMT\n"; open(TMP,">$todo/fetch.$start") || die "Can't open tmp file"; while () { print STDERR; last if /^230/; } chdir "/news" || die "Can't cd to /news: $!\n"; while () { alarm($TIMEOUT); chop; chop; $Messid = $_; y/A-Z/a-z/; last if $_ eq '.'; if ($history{$Messid}) { $loc = $DHIST{$_ . "\000"}; $loc = $DHIST{$Messid . "\000"} if $loc eq ''; if ($loc eq '') { $loc = $history{$Messid}; print STDERR "???d\t",$Messid,"\n"; } else { ($loc) = unpack("l",$loc); if ($loc != $history{$Messid}) { print STDERR "???\t$loc != $history{$Messid}\n"; $loc = $history{$Messid}; } } seek(HIST,$loc,0); $histline = ; ($messid,$date,$artlist) = split(/\t/,$histline); if ($messid =~ /^>>m$messid\t",$Messid,"\n"; &got_one; next; } @artlist = split(' ',$artlist); $exists = 0; for (@artlist) { y|.|/|; if (-e $_) { if (-z _) { --$exists; unlink $_; print STDERR "\t\t$Messid $_ zero size\n"; } else { print STDERR "\t\t$Messid $_ exists\n"; ++$exists; last; } } else { print STDERR "\t\t$Messid $_ doesn't exist\n"; } } if ($exists < 1) { #delete $DHIST{$_ . "\000"}; if ($exists < 0) { print STDERR ">>>z\t",$Messid,"\n"; } else { print STDERR ">>>e\t",$Messid,"\n"; } &got_one; next; } } else { #delete $DHIST{$_ . "\000"}; print STDERR ">>>s\t$Messid\t",$_,"\n"; &got_one; next; } print STDERR "\t",$Messid,"\n"; } else { #delete $DHIST{$_ . "\000"}; print STDERR ">>>h\t",$Messid,"\n"; &got_one; } } close TMP; chdir $INCOMING || die "Can't chdir to $INCOMING: $!\n"; if ($_ eq '.') { rename("$todo/fetch.$start", "$todo/$start"); $next_start = $start - 10 * 50; # allow 10 minutes clock disparity ($sec,$min,$hr,$mday,$mo,$yr) = gmtime($next_start); open(LAST, ">$todo/.newlasttime") || die "Can't create .newlasttime: $!\n"; $mo++; printf LAST "%02d%02d%02d %02d%02d%02d\n", $yr,$mo,$mday, $hr,$min,$sec; close LAST; rename("$todo/.newlasttime", "$todo/.lasttime"); } else { warn "NNTP closed connection prematurely\n"; } unlink "$todo/.fetch1pid"; ########################################################################## sub PIPE { die "Died on SIGPIPE\n"; } sub ALRM { die "NNTP connection timed out\n"; } sub got_one { print TMP $Messid,"\n"; if ($Messid eq $Lastmessid) { seek(TMP,0,0); truncate(TMP,0); print STDERR "Truncating\n"; } }