'; echo count($input); $pruned = array (); //our compacted array of events $output = array (); //flattened array to save $count = 0; //number of events $event = array (); //store our pruned events $eventChecksum = array (); //store the fields to checksum on $checkSumCheck = array (); //store the checksum array (for testing) foreach ($input as $line) { //this also puts the .ics header as an 'event', that's ok if (strstr($line, 'BEGIN:VEVENT')) { $checkSum = md5(serialize($eventChecksum)); if (!isset ($pruned[$checkSum])) { $pruned[$checkSum] = $event; $checkSumCheck[$checkSum] = $eventChecksum; $output[] = implode('', $event); } $count++; $event = array (); $eventChecksum = array (); $fieldNo = 0; } $event[] = $line; //some fields are always unique, only check on common fields //and description may have linebreaks; can't assume fields in order //so do a check by field name //if fields in below list, exclude from checksum //i.e. don't care if they vary from one entry to another $excluded = array ( 'UID', 'CREATED', 'LAST-MODIFIED', //a lot of events have lost their repeating info & got screwed up timezones //***we'll laregly assume same name,etc means unique & find any stragglers manually*** //can also manually remove the timezone GB defined in the header (TZID:GB) 'DTSTART;TZID=Europe/London', 'DTSTART;TZID=GB', 'DTSTART;TZID=EST5EDT', //'DTSTART;VALUE=DATE', //'DTEND;VALUE=DATE', 'DTEND;TZID=Europe/London', //'DURATION', 'CLASS', //private/public; personally I don't care, it's all private, public is a screw-up 'SEQUENCE', //looks like the order they appear in on calendar (priority) 'RRULE' ); $fieldName = substr($line, 0, strpos($line, ':')); //var_dump(array_search($fieldName, $excluded)); if (false === array_search($fieldName, $excluded)) { //var_dump($fieldName); $eventChecksum[] = $line; } } //var_dump($checkSumCheck); $output[] = 'END:VCALENDAR'; $size = file_put_contents($outfile, $output); echo "done: file size $size"; ?>