Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/bin/perl
- #
- # A given textfile is checked for occurrences of misspelled
- # vocabulary terms from the following namespaces:
- # rdf, rdfs, owl, xsd
- # USAGE: apply script to STDIN!
- # It will print all lines that contains a misspelled term.
- # === CONFIG ===
- # Separators between prefix and localname (write as character sequence)
- $URISHORTFORMSEPARATORS=':';
- # === PROGRAM ===
- # --- Build map from localnames of vocabularyterms to /all/ its suffixes ---
- %localname2prefixes = ();
- @vocabularyterms = getVocabularyTerms();
- foreach $vocabularyterm (@vocabularyterms) {
- ($prefix, $localname) = split(/:/, $vocabularyterm);
- if (not defined($localname2prefixes{$localname})) {
- $localname2prefixes{$localname} = $prefix;
- } else {
- $localname2prefixes{$localname} = $localname2prefixes{$localname} . " " . $prefix;
- }
- }
- # --- check file ---
- # Each line of the file is searched for occurrences of the localnames
- # for all registered vocabulary terms. If an occurrence is found,
- # the actual prefix in the file is compared to all allowed prefixes
- # for the localname (sometimes there are more than one, as for "Class").
- $linenumber = 0;
- while ($line = <STDIN>) {
- ++$linenumber;
- chomp($line);
- LOCALNAME: foreach $localname (keys(%localname2prefixes)) {
- $pattern = "([A-Za-z0-9]+)([$URISHORTFORMSEPARATORS])$localname\\W";
- if ($line =~ /$pattern/) {
- $actualPrefix = $1;
- $separator = $2;
- @expectedPrefixes = split(" ", $localname2prefixes{$localname});
- for $expectedPrefix (@expectedPrefixes) {
- if ($actualPrefix eq $expectedPrefix) {
- next LOCALNAME;
- }
- }
- print "bad vocabulary term in line $linenumber: <$actualPrefix$separator$localname>\n";
- }
- }
- }
- # === VOCABULARY TERMS ===
- sub getVocabularyTerms() {
- @vocabularyterms_rdf = (
- "rdf:type",
- "rdf:Property",
- "rdf:Statement",
- "rdf:subject",
- "rdf:predicate",
- "rdf:object",
- "rdf:List",
- "rdf:first",
- "rdf:rest",
- "rdf:nil",
- "rdf:Seq",
- "rdf:Bag",
- "rdf:Alt",
- "rdf:_1",
- "rdf:_2",
- "rdf:_3",
- "rdf:_4",
- "rdf:_5",
- "rdf:_6",
- "rdf:_7",
- "rdf:_8",
- "rdf:_9",
- "rdf:value",
- "rdf:XMLLiteral"
- );
- @vocabularyterms_rdfs = (
- "rdfs:domain",
- "rdfs:range",
- "rdfs:Resource",
- "rdfs:Literal",
- "rdfs:Datatype",
- "rdfs:Class",
- "rdfs:subClassOf",
- "rdfs:subPropertyOf",
- "rdfs:member",
- "rdfs:Container",
- "rdfs:ContainerMembershipProperty",
- "rdfs:comment",
- "rdfs:seeAlso",
- "rdfs:isDefinedBy",
- "rdfs:label"
- );
- @vocabularyterms_owl = (
- "owl:AllDifferent",
- "owl:AllDisjointClasses",
- "owl:AllDisjointProperties",
- "owl:allValuesFrom",
- "owl:annotatedProperty",
- "owl:annotatedSource",
- "owl:annotatedTarget",
- "owl:Annotation",
- "owl:AnnotationProperty",
- "owl:assertionProperty",
- "owl:AsymmetricProperty",
- "owl:Axiom",
- "owl:backwardCompatibleWith",
- "owl:bottomDataProperty",
- "owl:bottomObjectProperty",
- "owl:cardinality",
- "owl:Class",
- "owl:complementOf",
- "owl:DataRange",
- "owl:datatypeComplementOf",
- "owl:DatatypeProperty",
- "owl:deprecated",
- "owl:DeprecatedClass",
- "owl:DeprecatedProperty",
- "owl:differentFrom",
- "owl:disjointUnionOf",
- "owl:disjointWith",
- "owl:distinctMembers",
- "owl:equivalentClass",
- "owl:equivalentProperty",
- "owl:FunctionalProperty",
- "owl:hasKey",
- "owl:hasSelf",
- "owl:hasValue",
- "owl:imports",
- "owl:incompatibleWith",
- "owl:intersectionOf",
- "owl:InverseFunctionalProperty",
- "owl:inverseOf",
- "owl:IrreflexiveProperty",
- "owl:maxCardinality",
- "owl:maxQualifiedCardinality",
- "owl:members",
- "owl:minCardinality",
- "owl:minQualifiedCardinality",
- "owl:NamedIndividual",
- "owl:NegativePropertyAssertion",
- "owl:Nothing",
- "owl:ObjectProperty",
- "owl:onClass",
- "owl:onDataRange",
- "owl:onDatatype",
- "owl:oneOf",
- "owl:onProperty",
- "owl:onProperties",
- "owl:Ontology",
- "owl:OntologyProperty",
- "owl:priorVersion",
- "owl:propertyChainAxiom",
- "owl:propertyDisjointWith",
- "owl:qualifiedCardinality",
- "owl:ReflexiveProperty",
- "owl:Restriction",
- "owl:sameAs",
- "owl:someValuesFrom",
- "owl:sourceIndividual",
- "owl:SymmetricProperty",
- "owl:targetIndividual",
- "owl:targetValue",
- "owl:Thing",
- "owl:topDataProperty",
- "owl:topObjectProperty",
- "owl:TransitiveProperty",
- "owl:unionOf",
- "owl:versionInfo",
- "owl:versionIRI",
- "owl:withRestrictions"
- );
- @vocabularyterms_dtype = (
- "xsd:anyURI",
- "xsd:base64Binary",
- "xsd:boolean",
- "xsd:byte",
- "xsd:dateTime",
- "xsd:dateTimeStamp",
- "xsd:decimal",
- "xsd:double",
- "xsd:float",
- "xsd:hexBinary",
- "xsd:int",
- "xsd:integer",
- "xsd:language",
- "xsd:long",
- "xsd:Name",
- "xsd:NCName",
- "xsd:negativeInteger",
- "xsd:NMTOKEN",
- "xsd:nonNegativeInteger",
- "xsd:nonPositiveInteger",
- "xsd:normalizedString",
- "rdf:PlainLiteral",
- "xsd:positiveInteger",
- "owl:rational",
- "owl:real",
- "xsd:short",
- "xsd:string",
- "xsd:token",
- "xsd:unsignedByte",
- "xsd:unsignedInt",
- "xsd:unsignedLong",
- "xsd:unsignedShort",
- "rdf:XMLLiteral"
- );
- @vocabularyterms_facet = (
- "rdf:langRange",
- "xsd:length",
- "xsd:maxExclusive",
- "xsd:maxInclusive",
- "xsd:maxLength",
- "xsd:minExclusive",
- "xsd:minInclusive",
- "xsd:minLength",
- "xsd:pattern"
- );
- return (
- @vocabularyterms_rdf,
- @vocabularyterms_rdfs,
- @vocabularyterms_owl,
- @vocabularyterms_dtype,
- @vocabularyterms_facet
- );
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement