diff --git a/configure.php b/configure.php index 327b39532..1bb9112a4 100755 --- a/configure.php +++ b/configure.php @@ -628,6 +628,7 @@ function getFileModificationHistory(): array { $ac["TRANSLATION_ONLY_INCL_END"] = "-->"; } checkvalue($ac['LANG']); +file_put_contents( __DIR__ . "/temp/lang" , $ac['LANG'] ); checking("whether the language is supported"); $LANGDIR = "{$ac['rootdir']}/{$ac['LANG']}"; @@ -850,8 +851,9 @@ function dom_saveload( DOMDocument $dom , string $filename = "" ) : string if ( dom_load( $dom , "{$ac['srcdir']}/{$ac["INPUT_FILENAME"]}" ) ) { + echo "1 "; dom_saveload( $dom ); // correct file/line/column on error messages - echo "done.\n"; + echo "2 done.\n"; } else { diff --git a/scripts/translation/libqa/OutputBuffer.php b/scripts/translation/libqa/OutputBuffer.php new file mode 100644 index 000000000..2c8a5b057 --- /dev/null +++ b/scripts/translation/libqa/OutputBuffer.php @@ -0,0 +1,142 @@ + | ++----------------------------------------------------------------------+ + +# Description + +This class caches formatted output, and calculates if this output is not +previously marked as ignored, before printing it. */ + +class OutputBuffer +{ + private string $filename = ""; + private string $header = ""; + private array $matter = []; + private array $footer = []; + + private OutputIgnore $ignore; + private string $options; + + public function __construct( string $header , string $filename , OutputIgnore $ignore ) + { + $filename = str_replace( "/./" , "/" , $filename ); + + $this->header = $header . ": " . $filename . "\n\n"; + $this->filename = $filename; + $this->ignore = $ignore; + + $copy = $ignore->residualArgv; + array_shift( $copy ); + $this->options = implode( " " , $copy ); + } + + public function add( string $text ) + { + $this->matter[] = $text; + } + + public function addDiff( string $text , int $sourceCount , int $targetCount ) + { + if ( $sourceCount == $targetCount ) + return; + $prefix = "* "; + $suffix = " -{$targetCount} +{$sourceCount}"; + if ( $sourceCount == 0 ) + { + $prefix = "- "; + $suffix = $targetCount == 1 ? "" : " -{$targetCount}"; + } + if ( $targetCount == 0 ) + { + $prefix = "+ "; + $suffix = $sourceCount == 1 ? "" : " +{$sourceCount}"; + } + $this->add( "{$prefix}{$text}{$suffix}\n" ); + } + + public function addFooter( string $text ) + { + $this->footer[] = $text; + } + + public function addLine() + { + if ( count( $this->matter ) > 0 && end( $this->matter ) != "\n" ) + $this->add( "\n" ); + } + + public function print( bool $useAlternatePrinting = false ) + { + if ( count( $this->matter ) == 0 && count( $this->footer ) == 0 ) + return; + + $hashHead = $this->hash( false ); + $hashFull = $this->hash( true ); + + if ( $this->ignore->shouldIgnore( $this , $this->filename , $hashHead , $hashFull ) ) + return; + + print $this->header; + + if ( $useAlternatePrinting ) + $this->printMatterAlternate(); + else + foreach( $this->matter as $text ) + print $text; + + if ( count( $this->matter ) ) + print "\n"; + + foreach( $this->footer as $text ) + print $text; + + if ( count( $this->footer ) ) + print "\n"; + } + + private function printMatterAlternate() : void + { + $add = array(); + $del = array(); + $rst = array(); + + foreach( $this->matter as $text ) + { + if ( $text[0] == '+' ) $add[] = $text; + elseif ( $text[0] == '-' ) $del[] = $text; + else $rst[] = $text; + } + + for ( $idx = 0 ; $idx < count( $this->matter ) ; $idx++ ) + { + if ( isset( $add[ $idx ] ) ) print $add[ $idx ]; + if ( isset( $del[ $idx ] ) ) print $del[ $idx ]; + } + + foreach( $rst as $text ) + print $text; + } + + private function hash( bool $withContents ) : string + { + $text = $this->header . $this->options; + if ( $withContents ) + $text .= implode( "" , $this->matter ); + $text = str_replace( " " , "" , $text ); + $text = str_replace( "\n" , "" , $text ); + $text = str_replace( "\r" , "" , $text ); + $text = str_replace( "\t" , "" , $text ); + return hash( "crc32b" , $text ); + } +} diff --git a/scripts/translation/libqa/OutputIgnore.php b/scripts/translation/libqa/OutputIgnore.php new file mode 100644 index 000000000..bada7fc2c --- /dev/null +++ b/scripts/translation/libqa/OutputIgnore.php @@ -0,0 +1,124 @@ + | ++----------------------------------------------------------------------+ + +# Description + +This class process commands for ignoring outputs, and complement non +ignored outputs with these commands. */ + +class OutputIgnore +{ + public array $residualArgv; + + private bool $appendIgnores = true; + private bool $showIgnore = true; + private string $filename = ".syncxml.ignores"; + private string $argv0 = ""; + + public function __construct( array & $argv ) + { + $this->argv0 = escapeshellarg( $argv[0] ); + + foreach( $argv as $key => $arg ) + { + if ( str_starts_with( $arg , "--add-ignore=" ) ) + { + $list = $this->loadIgnores(); + $line = substr( $arg , 13 ); + if ( ! in_array( $line , $list ) ) + { + $list[] = $line; + $this->saveIgnores( $list ); + } + exit; + } + + if ( str_starts_with( $arg , "--del-ignore=" ) ) + { + $list = $this->loadIgnores(); + $line = substr( $arg , 13 ); + $dels = 0; + while ( in_array( $line , $list ) ) + { + $key = array_search( $line , $list ); + unset( $list[$key] ); + $dels++; + } + if ( $dels == 0 ) + print "Ignore mark not found.\n"; + else + $this->saveIgnores( $list ); + exit; + } + + if ( $arg == "--disable-ignore" ) + { + $this->showIgnore = false; + unset( $argv[$key] ); + } + } + + $this->residualArgv = $argv; + } + + private function loadIgnores() + { + if ( ! file_exists( $this->filename ) ) + return []; + $data = file_get_contents( $this->filename ); + return unserialize( gzdecode( $data ) ); + } + + public function saveIgnores( $data ) + { + $contents = gzencode( serialize( $data ) ); + file_put_contents( $this->filename , $contents ); + } + + public function shouldIgnore( OutputBuffer $output , string $filename , string $hashHeader , string $hashMatter ) + { + $ret = false; + + $prefix = "{$filename}:{$hashHeader}:"; + $ignore = "{$filename}:{$hashHeader}:{$hashMatter}"; + $marks = $this->loadIgnores(); + + // --add-ignore command + + if ( in_array( $ignore , $marks ) ) + $ret = true; // is already ignored + else // + if ( $this->showIgnore ) // show add command + $output->addFooter( " php {$this->argv0} --add-ignore=$ignore\n" ); + + // Remove valid ignores, leaves outdated ones for listing + + while ( in_array( $ignore , $marks ) ) + { + $key = array_search( $ignore , $marks ); + unset( $marks[$key] ); + } + + // --del-ignore command + + if ( $this->showIgnore ) // show del commands (for this file/prefix) + foreach ( $marks as $mark ) + if ( $mark != null ) + if ( str_starts_with( $mark , $prefix ) ) + $output->addFooter( " php {$this->argv0} --del-ignore=$mark\n" ); + + return $ret; + } +} diff --git a/scripts/translation/libqa/SyncFileItem.php b/scripts/translation/libqa/SyncFileItem.php new file mode 100644 index 000000000..2bece482d --- /dev/null +++ b/scripts/translation/libqa/SyncFileItem.php @@ -0,0 +1,27 @@ + | ++----------------------------------------------------------------------+ + +# Description + +Holds file related data for synq XML tools. */ + +require_once __DIR__ . '/all.php'; + +class SyncFileItem +{ + public string $sourceDir; + public string $targetDir; + public string $file; +} diff --git a/scripts/translation/libqa/SyncFileList.php b/scripts/translation/libqa/SyncFileList.php new file mode 100644 index 000000000..77c2e6328 --- /dev/null +++ b/scripts/translation/libqa/SyncFileList.php @@ -0,0 +1,65 @@ + | ++----------------------------------------------------------------------+ + +# Description + +Generates (and caches) the list of files with TranslatedOk status. */ + +require_once __DIR__ . '/all.php'; + +class SyncFileList +{ + static function load() + { + $file = __DIR__ . "/../../../temp/lang"; + if ( ! file_exists( $file ) ) + { + fwrite( STDERR , "Language file not found, run 'doc-base/configure.php'.\n" ); + exit(); + } + + $lang = trim( file_get_contents( $file ) ); + $cache = __DIR__ . "/../../../temp/$lang.oklist"; + + if ( file_exists( $cache ) ) + { + $data = file_get_contents( $cache ); + return unserialize( gzdecode( $data ) ); + } + + require_once __DIR__ . '/../lib/all.php'; + + $revcheck = new RevcheckRun( 'en' , $lang ); + $revdata = $revcheck->revData; + $list = []; + + foreach( $revdata->fileDetail as $file ) + { + if ( $file->status != RevcheckStatus::TranslatedOk ) + continue; + + $item = new SyncFileItem(); + $item->sourceDir = $revcheck->sourceDir; + $item->targetDir = $revcheck->targetDir; + $item->file = $file->path . '/' . $file->name; + $list[] = $item; + } + + $contents = gzencode( serialize( $list ) ); + file_put_contents( $cache , $contents ); + + return $list; + } +} diff --git a/scripts/translation/libqa/XmlFrag.php b/scripts/translation/libqa/XmlFrag.php new file mode 100644 index 000000000..a940db12b --- /dev/null +++ b/scripts/translation/libqa/XmlFrag.php @@ -0,0 +1,152 @@ + | ++----------------------------------------------------------------------+ + +# Description + +Loads XML fragments bodies[1] in DOM, while tolerating and not dropping +undefined entities references. + +[1] https://www.w3.org/TR/xml-fragment/#d1e1332 */ + +class XmlFrag +{ + static function listNodes( DOMNode $node , int $type ) + { + $ret = array(); + XmlFrag::listNodesRecurse( $node , $type , $ret ); + return $ret; + } + + static function listNodesRecurse( DOMNode $node , int $type, array & $ret ) + { + if ( $node->nodeType == $type ) + $ret[] = $node; + foreach( $node->childNodes as $child ) + XmlFrag::listNodesRecurse( $child , $type, $ret ); + } + + static function loadXmlFragmentFile( string $filename ) + { + $contents = file_get_contents( $filename ); + + [ $doc , $ent , $err ] = XmlFrag::loadXmlFragmentText( $contents , "" ); + + if ( count( $err ) == 0 ) + return [ $doc , $err ]; + + $dtd = "\n\n"; + $dtd .= "]>\n"; + + [ $doc , $ent , $err ] = XmlFrag::loadXmlFragmentText( $contents , $dtd ); + + return [ $doc , $err ]; + } + + static function loadXmlFragmentText( string $contents , string $dtd ) + { + if ( str_starts_with( ltrim( $contents ) , '' , $pos1 ); + $contents = substr( $contents , $pos2 +2 ); + } + + $contents = $dtd . "" . $contents . ""; + + $doc = new DOMDocument(); + $doc->recover = true; + $doc->resolveExternals = false; + $doc->substituteEntities = false; + + $was = libxml_use_internal_errors( true ); + + $doc->loadXML( $contents ); + $errors = libxml_get_errors(); + libxml_clear_errors(); + libxml_use_internal_errors( $was ); + + static $prefix = "", $suffix = "", $extra = ""; + if ( $prefix == "" ) + XmlFrag::setupErrors( $prefix , $suffix , $extra ); + + $ent = []; + $err = []; + + foreach( $errors as $error ) + { + $message = trim( $error->message ); + + if ( str_starts_with( $message , $prefix ) && str_ends_with( $message , $suffix ) ) + { + $entity = $message; + $entity = str_replace( $prefix , "" , $entity ); + $entity = str_replace( $suffix , "" , $entity ); + $ent[] = $entity; + } + + $err[] = $message; + } + + $fragment = $doc->createDocumentFragment(); + foreach( $doc->documentElement->childNodes as $node ) + $fragment->append( $node->cloneNode( true ) ); + + $doc->removeChild( $doc->documentElement ); + $doc->appendChild( $fragment ); + + return [ $doc , $ent , $err ]; + } + + static function setupErrors( string & $prefix , string & $suffix , string & $extra ) + { + /* + Undefined entities references generate TWO different error messages on + some versions of libxml: + + - "Entity '?' not defined" (for entity inside elements) + - "Extra content at the end of the document" (entity outside elements) + */ + + $inside = "&ZZZ;"; + $outside = "&ZZZ;"; + + $doc = new DOMDocument(); + $doc->recover = true; + $doc->resolveExternals = false; + $doc->substituteEntities = false; + + $was = libxml_use_internal_errors( true ); + + // prefix, suffix + + $doc->loadXML( $inside ); + $message = trim( libxml_get_errors()[0]->message ); + [ $prefix , $suffix ] = explode( "ZZZ" , $message ); + libxml_clear_errors(); + + // extra + + $doc->loadXML( $outside ); + $extra = trim( libxml_get_errors()[0]->message ); + libxml_clear_errors(); + + if ( strpos( $extra, "ZZZ" ) !== false ) + throw new Exception( "Unexpected error message." ); + + libxml_use_internal_errors( $was ); + } +} diff --git a/scripts/translation/libqa/all.php b/scripts/translation/libqa/all.php new file mode 100644 index 000000000..513eba76d --- /dev/null +++ b/scripts/translation/libqa/all.php @@ -0,0 +1,25 @@ + | ++----------------------------------------------------------------------+ +*/ + +ini_set( 'display_errors' , 1 ); +ini_set( 'display_startup_errors' , 1 ); +error_reporting( E_ALL ); + +require_once __DIR__ . '/OutputBuffer.php'; +require_once __DIR__ . '/OutputIgnore.php'; +require_once __DIR__ . '/SyncFileList.php'; +require_once __DIR__ . '/SyncFileItem.php'; +require_once __DIR__ . '/XmlFrag.php'; diff --git a/scripts/translation/qaxml-sync-attributes.php b/scripts/translation/qaxml-sync-attributes.php new file mode 100644 index 000000000..4576e59cb --- /dev/null +++ b/scripts/translation/qaxml-sync-attributes.php @@ -0,0 +1,73 @@ + | ++----------------------------------------------------------------------+ + +# Description + +Compare attributes usage between two XML leaf/fragment files. */ + +require_once __DIR__ . '/libqa/all.php'; + +$ignore = new OutputIgnore( $argv ); // always first, may exit. +$oklist = SyncFileList::load(); + +foreach ( $oklist as $file ) +{ + $source = $file->sourceDir . '/' . $file->file; + $target = $file->targetDir . '/' . $file->file; + $output = new OutputBuffer( "# qaxml.a" , $target , $ignore ); + + [ $s , $e ] = XmlFrag::loadXmlFragmentFile( $source ); + [ $t , $e ] = XmlFrag::loadXmlFragmentFile( $target ); + + $s = XmlFrag::listNodes( $s , XML_ELEMENT_NODE ); + $t = XmlFrag::listNodes( $t , XML_ELEMENT_NODE ); + + $s = extractTriple( $s ); + $t = extractTriple( $t ); + + if ( implode( "\n" , $s ) == implode( "\n" , $t ) ) + continue; + + $match = array(); + + foreach( $s as $v ) + $match[$v] = array( 0 , 0 ); + foreach( $t as $v ) + $match[$v] = array( 0 , 0 ); + + foreach( $s as $v ) + $match[$v][0] += 1; + foreach( $t as $v ) + $match[$v][1] += 1; + + foreach( $match as $k => $v ) + { + if ( $v[0] == $v[1] ) + continue; + + $output->addDiff( $k , $v[0] , $v[1] ); + } + + $output->print(); +} + +function extractTriple( array $list ) +{ + $ret = array(); + foreach( $list as $elem ) + foreach( $elem->attributes as $attrib ) + $ret[] = "{$elem->nodeName} {$attrib->nodeName} {$attrib->nodeValue}"; + return $ret; +}