Advertisement
Semantiquele

check-vocabularyterms.pl

Dec 18th, 2011
92
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Perl 5.12 KB | None | 0 0
  1. #!/bin/perl
  2. #
  3. # A given textfile is checked for occurrences of misspelled
  4. # vocabulary terms from the following namespaces:
  5. #   rdf, rdfs, owl, xsd
  6. # USAGE: apply script to STDIN!
  7. #   It will print all lines that contains a misspelled term.
  8.  
  9. # === CONFIG ===
  10.  
  11. # Separators between prefix and localname (write as character sequence)
  12. $URISHORTFORMSEPARATORS=':';
  13.  
  14. # === PROGRAM ===
  15.  
  16. # --- Build map from localnames of vocabularyterms to /all/ its suffixes ---
  17.  
  18. %localname2prefixes = ();
  19. @vocabularyterms = getVocabularyTerms();
  20. foreach $vocabularyterm (@vocabularyterms) {
  21.     ($prefix, $localname) = split(/:/, $vocabularyterm);
  22.     if (not defined($localname2prefixes{$localname})) {
  23.         $localname2prefixes{$localname} = $prefix;
  24.     } else {
  25.         $localname2prefixes{$localname} = $localname2prefixes{$localname} . " " . $prefix;
  26.     }
  27. }
  28.  
  29. # --- check file ---
  30. # Each line of the file is searched for occurrences of the localnames
  31. # for all registered vocabulary terms. If an occurrence is found,
  32. # the actual prefix in the file is compared to all allowed prefixes
  33. # for the localname (sometimes there are more than one, as for "Class").
  34.  
  35. $linenumber = 0;
  36. while ($line = <STDIN>) {
  37.     ++$linenumber;
  38.     chomp($line);
  39.     LOCALNAME: foreach $localname (keys(%localname2prefixes)) {
  40.         $pattern = "([A-Za-z0-9]+)([$URISHORTFORMSEPARATORS])$localname\\W";
  41.         if ($line =~ /$pattern/) {
  42.             $actualPrefix = $1;
  43.             $separator = $2;
  44.             @expectedPrefixes = split(" ", $localname2prefixes{$localname});
  45.             for $expectedPrefix (@expectedPrefixes) {
  46.                 if ($actualPrefix eq $expectedPrefix) {
  47.                     next LOCALNAME;
  48.                 }
  49.             }
  50.             print "bad vocabulary term in line $linenumber: <$actualPrefix$separator$localname>\n";
  51.         }
  52.     }
  53. }
  54.  
  55. # === VOCABULARY TERMS ===
  56.  
  57. sub getVocabularyTerms() {
  58.  
  59. @vocabularyterms_rdf = (
  60. "rdf:type",
  61. "rdf:Property",
  62. "rdf:Statement",
  63. "rdf:subject",
  64. "rdf:predicate",
  65. "rdf:object",
  66. "rdf:List",
  67. "rdf:first",
  68. "rdf:rest",
  69. "rdf:nil",
  70. "rdf:Seq",
  71. "rdf:Bag",
  72. "rdf:Alt",
  73. "rdf:_1",
  74. "rdf:_2",
  75. "rdf:_3",
  76. "rdf:_4",
  77. "rdf:_5",
  78. "rdf:_6",
  79. "rdf:_7",
  80. "rdf:_8",
  81. "rdf:_9",
  82. "rdf:value",
  83. "rdf:XMLLiteral"
  84. );
  85.  
  86. @vocabularyterms_rdfs = (
  87. "rdfs:domain",
  88. "rdfs:range",
  89. "rdfs:Resource",
  90. "rdfs:Literal",
  91. "rdfs:Datatype",
  92. "rdfs:Class",
  93. "rdfs:subClassOf",
  94. "rdfs:subPropertyOf",
  95. "rdfs:member",
  96. "rdfs:Container",
  97. "rdfs:ContainerMembershipProperty",
  98. "rdfs:comment",
  99. "rdfs:seeAlso",
  100. "rdfs:isDefinedBy",
  101. "rdfs:label"
  102. );
  103.  
  104. @vocabularyterms_owl = (
  105. "owl:AllDifferent",
  106. "owl:AllDisjointClasses",
  107. "owl:AllDisjointProperties",
  108. "owl:allValuesFrom",
  109. "owl:annotatedProperty",
  110. "owl:annotatedSource",
  111. "owl:annotatedTarget",
  112. "owl:Annotation",
  113. "owl:AnnotationProperty",
  114. "owl:assertionProperty",
  115. "owl:AsymmetricProperty",
  116. "owl:Axiom",
  117. "owl:backwardCompatibleWith",
  118. "owl:bottomDataProperty",
  119. "owl:bottomObjectProperty",
  120. "owl:cardinality",
  121. "owl:Class",
  122. "owl:complementOf",
  123. "owl:DataRange",
  124. "owl:datatypeComplementOf",
  125. "owl:DatatypeProperty",
  126. "owl:deprecated",
  127. "owl:DeprecatedClass",
  128. "owl:DeprecatedProperty",
  129. "owl:differentFrom",
  130. "owl:disjointUnionOf",
  131. "owl:disjointWith",
  132. "owl:distinctMembers",
  133. "owl:equivalentClass",
  134. "owl:equivalentProperty",
  135. "owl:FunctionalProperty",
  136. "owl:hasKey",
  137. "owl:hasSelf",
  138. "owl:hasValue",
  139. "owl:imports",
  140. "owl:incompatibleWith",
  141. "owl:intersectionOf",
  142. "owl:InverseFunctionalProperty",
  143. "owl:inverseOf",
  144. "owl:IrreflexiveProperty",
  145. "owl:maxCardinality",
  146. "owl:maxQualifiedCardinality",
  147. "owl:members",
  148. "owl:minCardinality",
  149. "owl:minQualifiedCardinality",
  150. "owl:NamedIndividual",
  151. "owl:NegativePropertyAssertion",
  152. "owl:Nothing",
  153. "owl:ObjectProperty",
  154. "owl:onClass",
  155. "owl:onDataRange",
  156. "owl:onDatatype",
  157. "owl:oneOf",
  158. "owl:onProperty",
  159. "owl:onProperties",
  160. "owl:Ontology",
  161. "owl:OntologyProperty",
  162. "owl:priorVersion",
  163. "owl:propertyChainAxiom",
  164. "owl:propertyDisjointWith",
  165. "owl:qualifiedCardinality",
  166. "owl:ReflexiveProperty",
  167. "owl:Restriction",
  168. "owl:sameAs",
  169. "owl:someValuesFrom",
  170. "owl:sourceIndividual",
  171. "owl:SymmetricProperty",
  172. "owl:targetIndividual",
  173. "owl:targetValue",
  174. "owl:Thing",
  175. "owl:topDataProperty",
  176. "owl:topObjectProperty",
  177. "owl:TransitiveProperty",
  178. "owl:unionOf",
  179. "owl:versionInfo",
  180. "owl:versionIRI",
  181. "owl:withRestrictions"
  182. );
  183.  
  184. @vocabularyterms_dtype = (
  185. "xsd:anyURI",
  186. "xsd:base64Binary",
  187. "xsd:boolean",
  188. "xsd:byte",
  189. "xsd:dateTime",
  190. "xsd:dateTimeStamp",
  191. "xsd:decimal",
  192. "xsd:double",
  193. "xsd:float",
  194. "xsd:hexBinary",
  195. "xsd:int",
  196. "xsd:integer",
  197. "xsd:language",
  198. "xsd:long",
  199. "xsd:Name",
  200. "xsd:NCName",
  201. "xsd:negativeInteger",
  202. "xsd:NMTOKEN",
  203. "xsd:nonNegativeInteger",
  204. "xsd:nonPositiveInteger",
  205. "xsd:normalizedString",
  206. "rdf:PlainLiteral",
  207. "xsd:positiveInteger",
  208. "owl:rational",
  209. "owl:real",
  210. "xsd:short",
  211. "xsd:string",
  212. "xsd:token",
  213. "xsd:unsignedByte",
  214. "xsd:unsignedInt",
  215. "xsd:unsignedLong",
  216. "xsd:unsignedShort",
  217. "rdf:XMLLiteral"
  218. );
  219.  
  220. @vocabularyterms_facet = (
  221. "rdf:langRange",
  222. "xsd:length",
  223. "xsd:maxExclusive",
  224. "xsd:maxInclusive",
  225. "xsd:maxLength",
  226. "xsd:minExclusive",
  227. "xsd:minInclusive",
  228. "xsd:minLength",
  229. "xsd:pattern"
  230. );
  231.  
  232. return (
  233.     @vocabularyterms_rdf,
  234.     @vocabularyterms_rdfs,
  235.     @vocabularyterms_owl,
  236.     @vocabularyterms_dtype,
  237.     @vocabularyterms_facet
  238.     );
  239. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement