| 
<?php
/**
 Specialized parser for OpenOffice 2.3 sCalc saved files as HTML
 Copyright (C) 2007  Johan Barbier <[email protected]>
 
 This program is free software; you can redistribute it and/or
 modify it under the terms of the GNU General Public License
 as published by the Free Software Foundation; either version 2
 of the License, or (at your option) any later version.
 
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 
 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 */
 /**
 * @desc Specialized parser for OpenOffice 2.3 sCalc saved files as HTML
 * @author Johan Barbier <[email protected]>
 * @version 20071101
 *
 */
 class openoffice23htmlparser extends genparser {
 
 /**
 * @desc Long stuff! First, checks if there is a need to transform the html file to be able to read its contents as an xml file. If so, does so :
 * Replaces the HTML header.
 * Strips all unclosed tags.
 * Replaces html entities by decimal entities.
 *
 * Then reads the file as an XML feed, and creates modules, languages, constants, translation found in the file.
 *
 * @param string $sFile : file path
 */
 final protected function parseUploadedTranslation($sFile) {
 if(!file_exists($sFile)) {
 throw new fileUploadExceptions(fileUploadExceptions::_UPLOAD_ERR_NO_FILE_);
 }
 $sFileContents = file_get_contents($sFile);
 if(false !== strpos($sFileContents, '<TBODY>')) {
 $sFileContents = preg_replace("/(<\/?)(\w+)([^>]*>)/e", "'\\1'.strtolower('\\2').'\\3'", $sFileContents);
 $sFileContents = substr_replace($sFileContents,'<?xml version="1.0" encoding="'.$this->sEncoding.'"?><!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml"><body><table>', 0, strpos($sFileContents, '<tbody>'));
 $sFileContents = preg_replace('`(<\w+)\s([^>]*)(>)`', '$1$3', $sFileContents);
 $sFileContents = str_replace(array('<br>', '<font>', '</font>'), '', $sFileContents);
 $sFileContents = str_replace(array('<td>', '</td>'), array('<td><![CDATA[', ']]></td>'), $sFileContents);
 file_put_contents($sFile, $sFileContents);
 }
 
 $oXml = new DOMDocument();
 $oXml->load($sFile);
 $oNodeList = $oXml->getElementsByTagName('tr');
 $oLanguagesList = $oNodeList->item(0)->getElementsByTagName('td');
 $iCpt = 0;
 foreach($oLanguagesList as $oTd) {
 $aStored[$iCpt]['LNG'] = (string)preg_replace('`\s`', '',$oTd->nodeValue);
 $iCpt ++;
 }
 for($i = 1; $i < $oNodeList->length; $i++) {
 $oRowList = $oNodeList->item($i)->getElementsByTagName('td');
 for($j = 1; $j < $oRowList->length; $j++) {
 $aStored[$j]['MOD'][(string)preg_replace('`\s`', '',$oRowList->item(0)->nodeValue)]= (string)preg_replace('`\s$`', '',$oRowList->item($j)->nodeValue);
 }
 }
 foreach($aStored as $iK=>$aV) {
 if(!empty($aV['LNG'])) {
 $aV['LNG'] = trim(html_entity_decode(preg_replace('`\s`', '', $aV['LNG'])));
 $aPays = getCodes($aV['LNG']);
 if(!empty($aPays['ALPHA3'])) {
 $aV['LNG'] = $aPays['ALPHA3'];
 }
 if(!is_dir($this->subject->LOCALE_PATH.$aV['LNG'])) {
 mkdir($this->subject->LOCALE_PATH.$aV['LNG'], 0755);
 }
 foreach($aV['MOD'] as $sConst => $sVal) {
 if(!empty($sConst)) {
 $sConst = trim(preg_replace('`\s`', '', $sConst));
 $sVal = trim($sVal);
 $sMod = substr($sConst, 0, strpos($sConst, '_'));
 if(!file_exists($this->subject->LOCALE_PATH.'default/'.$sMod.'.xml')) {
 $this->subject->addModule($sMod, $this->sEncoding);
 }
 $this->subject->addNewConstant($sConst, $sMod);
 $this->subject->updateValue($sConst, $sVal, false, $aV['LNG'], $sMod);
 }
 }
 }
 }
 }
 }
 ?>
 |