Guest User

Untitled

a guest
Nov 23rd, 2017
80
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 13.52 KB | None | 0 0
  1. diff --git a/fastavro/__init__.py b/fastavro/__init__.py
  2. index 0ae079d..5fa3e93 100644
  3. --- a/fastavro/__init__.py
  4. +++ b/fastavro/__init__.py
  5. @@ -79,6 +79,9 @@ acquaint_schema = _acquaint_schema
  6. _schema.acquaint_schema = _acquaint_schema
  7. is_avro = _reader.is_avro
  8.  
  9. +UnknownType = _schema.UnknownType
  10. +load_schema = _schema.load_schema
  11. +
  12. __all__ = [
  13. n for n in locals().keys() if not n.startswith('_')
  14. ] + ['__version__']
  15. diff --git a/fastavro/_reader_py.py b/fastavro/_reader_py.py
  16. index 3b41f3c..2b94890 100644
  17. --- a/fastavro/_reader_py.py
  18. +++ b/fastavro/_reader_py.py
  19. @@ -16,7 +16,7 @@ from uuid import UUID
  20. from .six import (
  21. MemoryIO, xrange, btou, utob, iteritems, is_str, str2ints, fstint
  22. )
  23. -from ._schema_py import (
  24. +from .schema import (
  25. extract_record_type, acquaint_schema, populate_schema_defs,
  26. extract_logical_type
  27. )
  28. diff --git a/fastavro/_schema_py.py b/fastavro/_schema_py.py
  29. deleted file mode 100644
  30. index 783389e..0000000
  31. --- a/fastavro/_schema_py.py
  32. +++ /dev/null
  33. @@ -1,163 +0,0 @@
  34. -# cython: auto_cpdef=True
  35. -
  36. -from os import path
  37. -import json
  38. -
  39. -from ._schema_public import PRIMITIVES, SCHEMA_DEFS, UnknownType
  40. -
  41. -
  42. -def extract_record_type(schema):
  43. - if isinstance(schema, dict):
  44. - return schema['type']
  45. -
  46. - if isinstance(schema, list):
  47. - return 'union'
  48. -
  49. - return schema
  50. -
  51. -
  52. -def extract_logical_type(schema):
  53. - if not isinstance(schema, dict):
  54. - return None
  55. - d_schema = schema
  56. - rt = d_schema['type']
  57. - lt = d_schema.get('logicalType')
  58. - if lt:
  59. - # TODO: Building this string every time is going to be relatively slow.
  60. - return '{}-{}'.format(rt, lt)
  61. - return None
  62. -
  63. -
  64. -def schema_name(schema, parent_ns):
  65. - name = schema.get('name')
  66. - if not name:
  67. - return parent_ns, None
  68. -
  69. - namespace = schema.get('namespace', parent_ns)
  70. - if not namespace:
  71. - return namespace, name
  72. -
  73. - return namespace, '%s.%s' % (namespace, name)
  74. -
  75. -
  76. -def extract_named_schemas_into_repo(schema, repo, transformer, parent_ns=None):
  77. - if type(schema) == list:
  78. - for index, enum_schema in enumerate(schema):
  79. - namespaced_name = extract_named_schemas_into_repo(
  80. - enum_schema,
  81. - repo,
  82. - transformer,
  83. - parent_ns,
  84. - )
  85. - if namespaced_name:
  86. - schema[index] = namespaced_name
  87. - return
  88. -
  89. - if type(schema) != dict:
  90. - # If a reference to another schema is an unqualified name, but not one
  91. - # of the primitive types, then we should add the current enclosing
  92. - # namespace to reference name.
  93. - if schema not in PRIMITIVES and '.' not in schema and parent_ns:
  94. - schema = parent_ns + '.' + schema
  95. -
  96. - if schema not in repo:
  97. - raise UnknownType(schema)
  98. - return schema
  99. -
  100. - namespace, name = schema_name(schema, parent_ns)
  101. -
  102. - if name:
  103. - repo[name] = transformer(schema)
  104. -
  105. - schema_type = schema.get('type')
  106. - if schema_type == 'array':
  107. - namespaced_name = extract_named_schemas_into_repo(
  108. - schema['items'],
  109. - repo,
  110. - transformer,
  111. - namespace,
  112. - )
  113. - if namespaced_name:
  114. - schema['items'] = namespaced_name
  115. - return
  116. - if schema_type == 'map':
  117. - namespaced_name = extract_named_schemas_into_repo(
  118. - schema['values'],
  119. - repo,
  120. - transformer,
  121. - namespace,
  122. - )
  123. - if namespaced_name:
  124. - schema['values'] = namespaced_name
  125. - return
  126. - # Normal record.
  127. - for field in schema.get('fields', []):
  128. - namespaced_name = extract_named_schemas_into_repo(
  129. - field['type'],
  130. - repo,
  131. - transformer,
  132. - namespace,
  133. - )
  134. - if namespaced_name:
  135. - field['type'] = namespaced_name
  136. -
  137. -
  138. -def load_schema(schema_path):
  139. - '''
  140. - Returns a schema loaded from the file at `schema_path`.
  141. -
  142. - Will recursively load referenced schemas assuming they can be found in
  143. - files in the same directory and named with the convention
  144. - `<type_name>.avsc`.
  145. - '''
  146. - with open(schema_path) as fd:
  147. - schema = json.load(fd)
  148. - schema_dir, schema_file = path.split(schema_path)
  149. - return _load_schema(schema, schema_dir)
  150. -
  151. -
  152. -def _reader():
  153. - # FIXME: This is due to circular depedency, find a better way
  154. - try:
  155. - from . import _reader as reader
  156. - except ImportError:
  157. - from . import reader
  158. -
  159. - return reader
  160. -
  161. -
  162. -def _load_schema(schema, schema_dir):
  163. - try:
  164. - _reader().acquaint_schema(schema)
  165. - except UnknownType as e:
  166. - try:
  167. - avsc = path.join(schema_dir, '%s.avsc' % e.name)
  168. - load_schema(avsc)
  169. - except IOError:
  170. - raise e
  171. - _load_schema(schema, schema_dir)
  172. - return schema
  173. -
  174. -
  175. -def populate_schema_defs(schema, repo=None):
  176. - repo = SCHEMA_DEFS if repo is None else repo
  177. - extract_named_schemas_into_repo(
  178. - schema,
  179. - repo,
  180. - lambda schema: schema,
  181. - )
  182. -
  183. -
  184. -def acquaint_schema(schema,
  185. - repo=None,
  186. - reader_schema_defs=None):
  187. - """Extract schema in repo (default READERS)"""
  188. - repo = _reader().READERS if repo is None else repo
  189. - reader_schema_defs = \
  190. - SCHEMA_DEFS if reader_schema_defs is None else reader_schema_defs
  191. - extract_named_schemas_into_repo(
  192. - schema,
  193. - repo,
  194. - lambda schema: lambda fo, _, r_schema: _reader().read_data(
  195. - fo, schema, reader_schema_defs.get(r_schema)),
  196. - )
  197. diff --git a/fastavro/schema.py b/fastavro/schema.py
  198. index 00b72bb..783389e 100644
  199. --- a/fastavro/schema.py
  200. +++ b/fastavro/schema.py
  201. @@ -1,6 +1,163 @@
  202. -from ._schema_public import UnknownType # noqa: F401
  203. +# cython: auto_cpdef=True
  204.  
  205. -try:
  206. - from ._schema import acquaint_schema, load_schema # noqa: F401
  207. -except ImportError:
  208. - from ._schema_py import acquaint_schema, load_schema # noqa: F401
  209. +from os import path
  210. +import json
  211. +
  212. +from ._schema_public import PRIMITIVES, SCHEMA_DEFS, UnknownType
  213. +
  214. +
  215. +def extract_record_type(schema):
  216. + if isinstance(schema, dict):
  217. + return schema['type']
  218. +
  219. + if isinstance(schema, list):
  220. + return 'union'
  221. +
  222. + return schema
  223. +
  224. +
  225. +def extract_logical_type(schema):
  226. + if not isinstance(schema, dict):
  227. + return None
  228. + d_schema = schema
  229. + rt = d_schema['type']
  230. + lt = d_schema.get('logicalType')
  231. + if lt:
  232. + # TODO: Building this string every time is going to be relatively slow.
  233. + return '{}-{}'.format(rt, lt)
  234. + return None
  235. +
  236. +
  237. +def schema_name(schema, parent_ns):
  238. + name = schema.get('name')
  239. + if not name:
  240. + return parent_ns, None
  241. +
  242. + namespace = schema.get('namespace', parent_ns)
  243. + if not namespace:
  244. + return namespace, name
  245. +
  246. + return namespace, '%s.%s' % (namespace, name)
  247. +
  248. +
  249. +def extract_named_schemas_into_repo(schema, repo, transformer, parent_ns=None):
  250. + if type(schema) == list:
  251. + for index, enum_schema in enumerate(schema):
  252. + namespaced_name = extract_named_schemas_into_repo(
  253. + enum_schema,
  254. + repo,
  255. + transformer,
  256. + parent_ns,
  257. + )
  258. + if namespaced_name:
  259. + schema[index] = namespaced_name
  260. + return
  261. +
  262. + if type(schema) != dict:
  263. + # If a reference to another schema is an unqualified name, but not one
  264. + # of the primitive types, then we should add the current enclosing
  265. + # namespace to reference name.
  266. + if schema not in PRIMITIVES and '.' not in schema and parent_ns:
  267. + schema = parent_ns + '.' + schema
  268. +
  269. + if schema not in repo:
  270. + raise UnknownType(schema)
  271. + return schema
  272. +
  273. + namespace, name = schema_name(schema, parent_ns)
  274. +
  275. + if name:
  276. + repo[name] = transformer(schema)
  277. +
  278. + schema_type = schema.get('type')
  279. + if schema_type == 'array':
  280. + namespaced_name = extract_named_schemas_into_repo(
  281. + schema['items'],
  282. + repo,
  283. + transformer,
  284. + namespace,
  285. + )
  286. + if namespaced_name:
  287. + schema['items'] = namespaced_name
  288. + return
  289. + if schema_type == 'map':
  290. + namespaced_name = extract_named_schemas_into_repo(
  291. + schema['values'],
  292. + repo,
  293. + transformer,
  294. + namespace,
  295. + )
  296. + if namespaced_name:
  297. + schema['values'] = namespaced_name
  298. + return
  299. + # Normal record.
  300. + for field in schema.get('fields', []):
  301. + namespaced_name = extract_named_schemas_into_repo(
  302. + field['type'],
  303. + repo,
  304. + transformer,
  305. + namespace,
  306. + )
  307. + if namespaced_name:
  308. + field['type'] = namespaced_name
  309. +
  310. +
  311. +def load_schema(schema_path):
  312. + '''
  313. + Returns a schema loaded from the file at `schema_path`.
  314. +
  315. + Will recursively load referenced schemas assuming they can be found in
  316. + files in the same directory and named with the convention
  317. + `<type_name>.avsc`.
  318. + '''
  319. + with open(schema_path) as fd:
  320. + schema = json.load(fd)
  321. + schema_dir, schema_file = path.split(schema_path)
  322. + return _load_schema(schema, schema_dir)
  323. +
  324. +
  325. +def _reader():
  326. + # FIXME: This is due to circular depedency, find a better way
  327. + try:
  328. + from . import _reader as reader
  329. + except ImportError:
  330. + from . import reader
  331. +
  332. + return reader
  333. +
  334. +
  335. +def _load_schema(schema, schema_dir):
  336. + try:
  337. + _reader().acquaint_schema(schema)
  338. + except UnknownType as e:
  339. + try:
  340. + avsc = path.join(schema_dir, '%s.avsc' % e.name)
  341. + load_schema(avsc)
  342. + except IOError:
  343. + raise e
  344. + _load_schema(schema, schema_dir)
  345. + return schema
  346. +
  347. +
  348. +def populate_schema_defs(schema, repo=None):
  349. + repo = SCHEMA_DEFS if repo is None else repo
  350. + extract_named_schemas_into_repo(
  351. + schema,
  352. + repo,
  353. + lambda schema: schema,
  354. + )
  355. +
  356. +
  357. +def acquaint_schema(schema,
  358. + repo=None,
  359. + reader_schema_defs=None):
  360. + """Extract schema in repo (default READERS)"""
  361. + repo = _reader().READERS if repo is None else repo
  362. + reader_schema_defs = \
  363. + SCHEMA_DEFS if reader_schema_defs is None else reader_schema_defs
  364. + extract_named_schemas_into_repo(
  365. + schema,
  366. + repo,
  367. + lambda schema: lambda fo, _, r_schema: _reader().read_data(
  368. + fo, schema, reader_schema_defs.get(r_schema)),
  369. + )
  370. diff --git a/fastavro/writer.py b/fastavro/writer.py
  371. index 40fe532..8da52e9 100644
  372. --- a/fastavro/writer.py
  373. +++ b/fastavro/writer.py
  374. @@ -9,7 +9,7 @@
  375. from .six import utob, MemoryIO, long, is_str, iterkeys, itervalues, \
  376. iteritems, mk_bits
  377. from ._reader_py import HEADER_SCHEMA, SYNC_SIZE, MAGIC
  378. -from ._schema_py import (
  379. +from .schema import (
  380. extract_named_schemas_into_repo, extract_record_type,
  381. extract_logical_type
  382. )
  383. diff --git a/tests/test_fastavro.py b/tests/test_fastavro.py
  384. index 4962f64..072cd36 100644
  385. --- a/tests/test_fastavro.py
  386. +++ b/tests/test_fastavro.py
  387. @@ -1,4 +1,5 @@
  388. -import fastavro.schema
  389. +import fastavro
  390. +from fastavro import UnknownType, acquaint_schema, load_schema
  391. from fastavro.six import MemoryIO
  392.  
  393. import pytest
  394. @@ -84,7 +85,7 @@ def test_not_avro():
  395.  
  396. def test_acquaint_schema_rejects_undleclared_name():
  397. try:
  398. - fastavro.schema.acquaint_schema({
  399. + acquaint_schema({
  400. "type": "record",
  401. "fields": [{
  402. "name": "left",
  403. @@ -92,13 +93,13 @@ def test_acquaint_schema_rejects_undleclared_name():
  404. }]
  405. })
  406. assert False, 'Never raised'
  407. - except fastavro.schema.UnknownType as e:
  408. + except UnknownType as e:
  409. assert 'Thinger' == e.name
  410.  
  411.  
  412. def test_acquaint_schema_rejects_unordered_references():
  413. try:
  414. - fastavro.schema.acquaint_schema({
  415. + acquaint_schema({
  416. "type": "record",
  417. "fields": [{
  418. "name": "left",
  419. @@ -116,12 +117,12 @@ def test_acquaint_schema_rejects_unordered_references():
  420. }]
  421. })
  422. assert False, 'Never raised'
  423. - except fastavro.schema.UnknownType as e:
  424. + except UnknownType as e:
  425. assert 'Thinger' == e.name
  426.  
  427.  
  428. def test_acquaint_schema_accepts_nested_namespaces():
  429. - fastavro.schema.acquaint_schema({
  430. + acquaint_schema({
  431. "namespace": "com.example",
  432. "name": "Outer",
  433. "type": "record",
  434. @@ -149,7 +150,7 @@ def test_acquaint_schema_accepts_nested_namespaces():
  435.  
  436.  
  437. def test_acquaint_schema_resolves_references_from_unions():
  438. - fastavro.schema.acquaint_schema({
  439. + acquaint_schema({
  440. "namespace": "com.other",
  441. "name": "Outer",
  442. "type": "record",
  443. @@ -175,7 +176,7 @@ def test_acquaint_schema_resolves_references_from_unions():
  444.  
  445.  
  446. def test_acquaint_schema_accepts_nested_records_from_arrays():
  447. - fastavro.schema.acquaint_schema({
  448. + acquaint_schema({
  449. "fields": [
  450. {
  451. "type": {
  452. @@ -202,15 +203,15 @@ def test_acquaint_schema_accepts_nested_records_from_arrays():
  453.  
  454. def test_compose_schemas():
  455. schema_path = join(data_dir, 'Parent.avsc')
  456. - schema = fastavro.schema.load_schema(schema_path)
  457. + schema = load_schema(schema_path)
  458. assert isinstance(schema, dict)
  459. assert 'Child' in fastavro._reader.READERS
  460.  
  461.  
  462. def test_missing_schema():
  463. schema_path = join(data_dir, 'ParentMissingChild.avsc')
  464. - with pytest.raises(fastavro.schema.UnknownType):
  465. - fastavro.schema.load_schema(schema_path)
  466. + with pytest.raises(UnknownType):
  467. + load_schema(schema_path)
  468.  
  469.  
  470. def test_schemaless_writer_and_reader():
Add Comment
Please, Sign In to add comment