Advertisement
Roman_Sarnov

Untitled

Nov 7th, 2019
127
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 16.05 KB | None | 0 0
  1. from os.path import exists
  2. from tempfile import mkdtemp, mkstemp
  3. from shutil import rmtree
  4. from binascii import b2a_hex
  5. from os import write, close
  6. from threading import Thread
  7. from kivy.config import Config
  8. from pdfminer.pdfpage import PDFPage
  9. from pdfminer.pdfparser import PDFParser
  10. from pdfminer.converter import PDFPageAggregator
  11. from pdfminer.pdfdocument import PDFDocument, PDFNoOutlines
  12. from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
  13. from pdfminer.layout import (
  14. LAParams, LTTextBox, LTTextLine, LTFigure, LTImage, LTChar, LTCurve,
  15. LTLine, LTRect,
  16. )
  17.  
  18. from kivy.lang import Builder
  19. from kivy.clock import Clock
  20. import kivy.graphics
  21. from kivy.graphics import Mesh, Color
  22. from kivy.graphics.tesselator import Tesselator
  23.  
  24. from kivy.uix.widget import Widget
  25. from kivy.uix.recycleview import RecycleView
  26. from kivy.uix.label import Label
  27. from kivy.uix.image import Image
  28. from kivy.uix.relativelayout import RelativeLayout
  29. from kivy.uix.boxlayout import BoxLayout
  30.  
  31. from kivy.properties import (
  32. StringProperty, ListProperty, NumericProperty, AliasProperty,
  33. DictProperty, ObjectProperty, BooleanProperty, ColorProperty,
  34. )
  35. Config.set("graphics", 'resizable', 0)
  36. Config.set("graphics", 'height', 960)
  37. Config.set("graphics", 'width', 640)
  38. Builder.load_string('''
  39. #:import RGBA kivy.utils.rgba
  40.  
  41. <PDFDocumentWidget>:
  42. viewclass: 'PDFPageWidget'
  43. key_size: 'size'
  44. # async load is buggy at the moment
  45. # async_load: True
  46.  
  47. RecycleGridLayout:
  48. spacing: 5
  49. cols: root.cols
  50. rows: root.rows
  51. size_hint: None, None
  52. size: self.minimum_size
  53. default_size_hint: None, None
  54.  
  55. <PDFPageWidget>:
  56. size_hint: None, None
  57.  
  58. canvas.before:
  59. Color:
  60. rgba: RGBA('FFFFFF')
  61. Rectangle:
  62. size: self.size
  63.  
  64. <PDFLabelWidget,PDFImageWidget>:
  65. size_hint: None, None
  66.  
  67. <PDFImageWidget>:
  68. pos: self.bbox[:2]
  69. size: self.bbox[2] - self.x, self.bbox[3] - self.y
  70.  
  71. <PDFLabelWidget>:
  72. text_size: self.width, None
  73. height: self.texture_size[1]
  74. color: RGBA('000000')
  75. font_size: 8
  76.  
  77. <PDFCurveWidget>:
  78. ''')
  79.  
  80.  
  81. class PDFDocumentWidget(RecycleView):
  82. source = StringProperty()
  83. password = StringProperty()
  84. cols = NumericProperty(None)
  85. rows = NumericProperty(None)
  86. _toc = ListProperty()
  87. async_load = BooleanProperty(False)
  88.  
  89. def __init__(self, **kwargs):
  90. super(PDFDocumentWidget, self).__init__(**kwargs)
  91. self._fp = None
  92. self._document = None
  93. self._tmpdir = None
  94. self.bind(source=self.load)
  95. if self.source:
  96. self.load()
  97.  
  98. def load(self, *args):
  99. if self._fp:
  100. # close the previous pdf file
  101. self._fp.close()
  102.  
  103. pdf_doc = self.source
  104. data = []
  105. if not pdf_doc or not exists(pdf_doc):
  106. self.pages = []
  107. self._doc = []
  108. self._document = None
  109. if self._tmpdir:
  110. rmtree(self._tmpdir)
  111. self._tmpdir = None
  112.  
  113. try:
  114. # open the pdf file
  115. self._fp = fp = open(pdf_doc, 'rb')
  116. # create a parser object associated with the file object
  117. parser = PDFParser(fp)
  118. # create a PDFDocument object that stores the document structure
  119. doc = PDFDocument(parser)
  120. # connect the parser and document objects
  121. parser.set_document(doc)
  122. # supply the password for initialization
  123. # doc.initialize(self.password)
  124.  
  125. # if doc.is_extractable:
  126. # apply the function and return the result
  127. self._document = doc
  128. self._parse_toc()
  129. self._create_tmpdir()
  130. self._parse_pages()
  131. except IOError as e:
  132. # the file doesn't exist or similar problem
  133. print(e)
  134.  
  135. def _create_tmpdir(self):
  136. if not self._tmpdir:
  137. self._tmpdir = mkdtemp()
  138. return self._tmpdir
  139.  
  140. def _parse_toc(self):
  141. """With an open PDFDocument object, get the table of contents (toc) data
  142. [this is a higher-order function to be passed to with_pdf()]"""
  143. toc = []
  144. doc = self._document
  145. try:
  146. outlines = doc.get_outlines()
  147. for (level, title, dest, a, se) in outlines:
  148. toc.append((level, title))
  149. except:
  150. pass
  151. finally:
  152. self._toc = toc
  153.  
  154. def _parse_pages(self):
  155. doc = self._document
  156. if not doc:
  157. self.data = []
  158. return
  159.  
  160. data = []
  161.  
  162. rsrcmgr = PDFResourceManager()
  163. laparams = LAParams()
  164. self.device = device = PDFPageAggregator(rsrcmgr, laparams=laparams)
  165. self.interpreter = PDFPageInterpreter(rsrcmgr, device)
  166.  
  167. for i, page in enumerate(PDFPage.create_pages(doc)):
  168. p = {
  169. 'manager': self,
  170. 'page': page,
  171. 'size': page.attrs.get('MediaBox', [0, 0, 0, 0])[2:],
  172. }
  173. data.append(p)
  174. self.data = data
  175.  
  176.  
  177. class PDFImageWidget(Image):
  178. bbox = ListProperty([0, 0, 100, 100])
  179.  
  180.  
  181. class PDFLabelWidget(Label):
  182. bbox = ListProperty([0, 0, 100, 100])
  183.  
  184.  
  185. class PDFCurveWidget(Widget):
  186. points = ListProperty()
  187. line_width = NumericProperty()
  188. stroke = BooleanProperty(False)
  189. fill = BooleanProperty(False)
  190. even_odd = BooleanProperty()
  191. color = ColorProperty()
  192. fill_color = ColorProperty()
  193.  
  194. def __init__(self, **kwargs):
  195. super(PDFCurveWidget, self).__init__(**kwargs)
  196. build = Clock.create_trigger(self.build, 0)
  197.  
  198. self.bind(
  199. points=build,
  200. line_width=build,
  201. stroke=build,
  202. fill=build,
  203. even_odd=build,
  204. color=build,
  205. fill_color=build
  206. )
  207.  
  208. def build(self, *args):
  209. self.canvas.clear()
  210. if not self.points:
  211. return
  212.  
  213. with self.canvas:
  214. if self.fill:
  215. Color(rgba=self.fill_color)
  216. t = Tesselator()
  217. t.add_contour(self.points)
  218. if tess.tesselate:
  219. for vertices, indices in tess.meshes:
  220. Mesh(
  221. vertices=vertices,
  222. indices=indices,
  223. mode='triangle fan'
  224. )
  225. else:
  226. print("mesh didn't tesselate!")
  227.  
  228. if self.stroke:
  229. Color(rgba=self.color)
  230. Line(
  231. points=self.points,
  232. width=self.line_width
  233. )
  234.  
  235.  
  236. class PDFPageWidget(RelativeLayout):
  237. labels = DictProperty()
  238. attributes = DictProperty()
  239. manager = ObjectProperty()
  240. page = ObjectProperty()
  241. items = ListProperty()
  242.  
  243. def on_page(self, *args):
  244. if self.manager.async_load:
  245. Thread(target=self._load_page).start()
  246. else:
  247. self._load_page()
  248.  
  249. def _load_page(self):
  250. self.manager.interpreter.process_page(self.page)
  251. self.items = self.manager.device.get_result()
  252.  
  253. def on_items(self, *args):
  254. self.clear_widgets()
  255. self._render_content(self.items)
  256.  
  257. def _render_content(self, lt_objs):
  258. """Iterate through the list of LT* objects and capture the text
  259. or image data contained in each
  260. """
  261. for lt_obj in lt_objs:
  262. print(lt_obj)
  263. if isinstance(lt_obj, LTChar):
  264. self.add_text(
  265. text=lt_obj.get_text(),
  266. box_pos=(lt_obj.x0, lt_obj.y0),
  267. box_size=(lt_obj.width, lt_obj.height),
  268. # font_size=lt_obj.fontsize,
  269. # font_name=lt_obj.fontname,
  270. )
  271.  
  272. elif isinstance(lt_obj, (LTTextBox, LTTextLine)):
  273. # text, so arrange is logically based on its column width
  274. # this way is very limited style wise, and doesn't allow
  275. # support for font, color, style, etc management, as
  276. # pdfminer doesn't provide these information at text box
  277. # level, by using the following nested loop, it's
  278. # possible to have font family info, but for individual
  279. # character, which is impractical to create direct
  280. # labels for.
  281. # for obj in lt_obj:
  282. # print(obj)
  283. # for o in obj:
  284. # print(o)
  285.  
  286. self.add_text(
  287. text=lt_obj.get_text(),
  288. box_pos=(lt_obj.x0, lt_obj.y0),
  289. box_size=(lt_obj.width, lt_obj.height),
  290. )
  291.  
  292. elif isinstance(lt_obj, LTImage):
  293. saved_file = self.save_image(lt_obj)
  294. if saved_file:
  295. self.add_widget(
  296. PDFImageWidget(
  297. source=saved_file,
  298. bbox=lt_obj.bbox
  299. )
  300. )
  301.  
  302. elif isinstance(lt_obj, LTFigure):
  303. self._render_content(lt_obj)
  304.  
  305. # all of these are actually LTCurves, but all types here for
  306. # clarity
  307. elif isinstance(lt_obj, (LTLine, LTRect, LTCurve)):
  308. self.add_widget(
  309. PDFCurveWidget(
  310. points=lt_obj.pts or [],
  311. line_width=lt_obj.linewidth or 1.0,
  312. stroke=lt_obj.stroke,
  313. fill=lt_obj.fill,
  314. even_odd=lt_obj.evenodd,
  315. # colors seem to be indices, to some dict i
  316. # can't find in what pdfminer exposes
  317. color='#FFFFFFFF', # lt_obj.stroking_color or
  318. fill_color='#00000000' # lt_obj.non_stroking_color or
  319. )
  320. )
  321.  
  322. def save_image(self, lt_image):
  323. """Try to save the image data from this LTImage object, and
  324. return the file name, if successful
  325. """
  326. if lt_image.stream:
  327. file_stream = lt_image.stream.get_rawdata()
  328. if file_stream:
  329. file_ext = self.determine_image_type(file_stream[0:4])
  330. if file_ext:
  331. fd, fn = mkstemp(dir=self.manager._tmpdir, suffix='.{}'.format(file_ext))
  332. write(fd, file_stream)
  333. close(fd)
  334. return fn
  335.  
  336. @staticmethod
  337. def determine_image_type(stream_first_4_bytes):
  338. """Find out the image file type based on the magic number comparison of the first 4 (or 2) bytes"""
  339. file_type = None
  340. bytes_as_hex = b2a_hex(stream_first_4_bytes)
  341. if bytes_as_hex.startswith(b'ffd8'):
  342. file_type = '.jpeg'
  343. elif bytes_as_hex == b'89504e47':
  344. file_type = '.png'
  345. elif bytes_as_hex == b'47494638':
  346. file_type = '.gif'
  347. elif bytes_as_hex.startswith(b'424d'):
  348. file_type = '.bmp'
  349. return file_type
  350.  
  351. def add_text(self, text, box_pos, box_size, **kwargs):
  352. label = self.labels.get((box_pos, box_pos))
  353. if not label:
  354. label = PDFLabelWidget(text=text, pos=box_pos, size=box_size, **kwargs)
  355. self.labels[(box_pos, box_size)] = label
  356. self.add_widget(label)
  357. else:
  358. label.text += text
  359.  
  360. def add_image(self, lt_image):
  361. source = self.save_image(lt_image)
  362. if source:
  363. image = PDFImageWidget(
  364. source=source,
  365. pos=(lt_image.x0, lt_image.y0),
  366. size=(lt_image.widt, lt_image.height)
  367. )
  368. self.add_widget(image)
  369. self.images.append(image)
  370.  
  371.  
  372. if __name__ == '__main__':
  373. from sys import argv
  374. from kivy.base import runTouchApp
  375. from kivy.uix.scrollview import ScrollView
  376. fn = 'c'
  377.  
  378.  
  379.  
  380.  
  381.  
  382.  
  383.  
  384. from kivy.app import App
  385. from kivy.lang import Builder
  386. from kivy.uix.recycleview import RecycleView
  387. from kivy.uix.screenmanager import ScreenManager, Screen
  388. from kivy.uix.boxlayout import BoxLayout
  389. from kivy.uix.gridlayout import GridLayout
  390. from kivy.config import Config
  391. from knopki import Knopki
  392.  
  393. Config.set("graphics", 'resizable', 0)
  394. Config.set("graphics", 'height', 960)
  395. Config.set("graphics", 'width', 640)
  396. Builder.load_string("""
  397. <Button1@Button>:
  398. background_color:255, 255, 255, 1
  399. size:300,150
  400. size_hint:None, None
  401.  
  402. <Image>:
  403. source:'фон.jpg'
  404. allow_stretch: True
  405.  
  406. <Label>:
  407. color:0,0,0,1
  408. font_size:30
  409.  
  410. <BoxLayout>:
  411. orientation:'vertical'
  412. spacing:200
  413. padding:170,200
  414. background_color:1,1,1,1
  415.  
  416. <GridLayout10@GridLayout>:
  417. cols:2
  418. spacing:10,40
  419. padding:15
  420.  
  421. <MainScreen>:
  422. name:"Menu"
  423. Image
  424. BoxLayout:
  425. Button1:
  426. on_press:root.manager.current="Subjects"
  427. text:"Subjects"
  428. Button1:
  429. text:"Special"
  430. on_press:root.manager.current="Special"
  431.  
  432.  
  433. <SubjectsScreen>:
  434. name:"Subjects"
  435. Image
  436. BoxLayout:
  437. Button1:
  438. text:"10 class"
  439. on_press:root.manager.current="SubjectsScreen10"
  440. Button1:
  441. text:"11 class"
  442. on_press:root.manager.current="SubjectsScreen11"
  443.  
  444. <SubjectsScreen10>:
  445. name:"SubjectsScreen10"
  446. Image
  447. GridLayout10:
  448. Button1:
  449. text:"Геометрия"
  450. Button1:
  451. text:"Алгебра"
  452. Button1:
  453. text:"Химия"
  454. Button1:
  455. text:"Физика"
  456. Button1:
  457. text:"Русская литература"
  458. Button1:
  459. text:"Русский язык"
  460. on_press:root.russ_yaz()
  461. Button1:
  462. text:"География"
  463. Button1:
  464. text:"Биология"
  465. Button1:
  466. text:"Белорусский язык"
  467.  
  468. <SubjectsScreen11>:
  469. name:"SubjectsScreen11"
  470. Image
  471. GridLayout10:
  472. Button1:
  473. text:"Алгебра"
  474. Button1:
  475. text:"Геометрия"
  476. Button1:
  477. text:"Физика"
  478. Button1:
  479. text:"Химия"
  480. Button1:
  481. text:"Русская литература"
  482. Button1:
  483. text:"Русский язык"
  484. on_press:self.russ_yaz
  485.  
  486. Button1:
  487. text:"География"
  488. Button1:
  489. text:"Биология"
  490. Button1:
  491. text:"Белорусский язык"
  492. Button1:
  493. text:"Астрономия"
  494.  
  495. <SpecialSubjectsScreen>:
  496. name:"Special"
  497. Image
  498. AnchorLayout:
  499. Button1:
  500. text:"No material(Back)"
  501. on_press: root.manager.current='Menu'
  502. """)
  503.  
  504.  
  505. class MainScreen(Screen):
  506. pass
  507.  
  508.  
  509. class SpecialSubjectsScreen(Screen):
  510. pass
  511.  
  512.  
  513. class SubjectsScreen(Screen):
  514. pass
  515.  
  516.  
  517. class SubjectsScreen10(Screen):
  518. def russ_yaz(self):
  519. fn = 'Дудников, А.В Русский язык.pdf'
  520. root = PDFDocumentWidget(source=fn, cols=1)
  521. runTouchApp(root)
  522. PDFDocumentWidget()
  523.  
  524.  
  525.  
  526. class SubjectsScreen11(Screen):
  527. pass
  528.  
  529.  
  530. sm = ScreenManager()
  531. sm.add_widget(MainScreen(name='Menu'))
  532. sm.add_widget(SpecialSubjectsScreen(name='Special'))
  533. sm.add_widget(SubjectsScreen(name="Subjects"))
  534. sm.add_widget(SubjectsScreen10(name="SubjectsScreen10"))
  535. sm.add_widget(SubjectsScreen11(name="SubjectsScreen11"))
  536.  
  537.  
  538. class LibraryApp(App):
  539. def russ_yaz(self):
  540. fn = 'Дудников, А.В Русский язык.pdf'
  541. root = PDFDocumentWidget(source=fn, cols=1)
  542. runTouchApp(root)
  543. def build(self):
  544. return sm
  545.  
  546.  
  547.  
  548. if __name__ == '__main__':
  549. LibraryApp().run()
  550.  
  551.  
  552.  
  553.  
  554. runTouchApp(root)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement