Advertisement
Roman_Sarnov

Untitled

Nov 7th, 2019
133
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 16.02 KB | None | 0 0
  1. from os.path import exists
  2. from tempfile import mkdtemp, mkstemp
  3. from shutil import rmtree
  4. from binascii import b2a_hex
  5. from os import write, close
  6. from threading import Thread
  7. from time import sleep
  8.  
  9. from kivy.config import Config
  10. import kivy.graphics
  11. from kivy.graphics.vertex_instructions import Line
  12. from pdfminer.pdfpage import PDFPage
  13. from pdfminer.pdfparser import PDFParser
  14. from pdfminer.converter import PDFPageAggregator
  15. from pdfminer.pdfdocument import PDFDocument, PDFNoOutlines
  16. from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
  17. from pdfminer.layout import (
  18. LAParams, LTTextBox, LTTextLine, LTFigure, LTImage, LTChar, LTCurve,
  19. LTLine, LTRect,
  20. )
  21.  
  22. from kivy.lang import Builder
  23. from kivy.clock import Clock
  24. import kivy.graphics
  25. from kivy.graphics import Mesh, Color
  26. from kivy.graphics.tesselator import Tesselator
  27.  
  28. from kivy.uix.widget import Widget
  29. from kivy.uix.recycleview import RecycleView
  30. from kivy.uix.label import Label
  31. from kivy.uix.image import Image
  32. from kivy.uix.relativelayout import RelativeLayout
  33. from kivy.uix.boxlayout import BoxLayout
  34.  
  35. from kivy.properties import (
  36. StringProperty, ListProperty, NumericProperty, AliasProperty,
  37. DictProperty, ObjectProperty, BooleanProperty, ColorProperty,
  38. )
  39.  
  40. Config.set("graphics", 'resizable', 0)
  41. Config.set("graphics", 'height', 960)
  42. Config.set("graphics", 'width', 640)
  43. Builder.load_string('''
  44. #:import RGBA kivy.utils.rgba
  45.  
  46. <PDFDocumentWidget>:
  47. viewclass: 'PDFPageWidget'
  48. key_size: 'size'
  49. # async load is buggy at the moment
  50. # async_load: True
  51.  
  52. RecycleGridLayout:
  53. spacing: 5
  54. cols: root.cols
  55. rows: root.rows
  56. size_hint: None, None
  57. size: self.minimum_size
  58. default_size_hint: None, None
  59.  
  60. <PDFPageWidget>:
  61. size_hint: None, None
  62.  
  63. canvas.before:
  64. Color:
  65. rgba: RGBA('FFFFFF')
  66. Rectangle:
  67. size: self.size
  68.  
  69. <PDFLabelWidget,PDFImageWidget>:
  70. size_hint: None, None
  71.  
  72. <PDFImageWidget>:
  73. pos: self.bbox[:2]
  74. size: self.bbox[2] - self.x, self.bbox[3] - self.y
  75.  
  76. <PDFLabelWidget>:
  77. text_size: self.width, None
  78. height: self.texture_size[1]
  79. color: RGBA('000000')
  80. font_size: 4
  81.  
  82. <PDFCurveWidget>:
  83. ''')
  84.  
  85.  
  86. class PDFDocumentWidget(RecycleView):
  87. source = StringProperty()
  88. password = StringProperty()
  89. cols = NumericProperty(None)
  90. rows = NumericProperty(None)
  91. _toc = ListProperty()
  92. async_load = BooleanProperty(False)
  93.  
  94. def __init__(self, **kwargs):
  95. super(PDFDocumentWidget, self).__init__(**kwargs)
  96. self._fp = None
  97. self._document = None
  98. self._tmpdir = None
  99. self.bind(source=self.load)
  100. if self.source:
  101. self.load()
  102.  
  103. def load(self, *args):
  104. if self._fp:
  105. # close the previous pdf file
  106. self._fp.close()
  107.  
  108. pdf_doc = self.source
  109. data = []
  110. if not pdf_doc or not exists(pdf_doc):
  111. self.pages = []
  112. self._doc = []
  113. self._document = None
  114. if self._tmpdir:
  115. rmtree(self._tmpdir)
  116. self._tmpdir = None
  117.  
  118. try:
  119. # open the pdf file
  120. self._fp = fp = open(pdf_doc, 'rb')
  121. # create a parser object associated with the file object
  122. parser = PDFParser(fp)
  123. # create a PDFDocument object that stores the document structure
  124. doc = PDFDocument(parser)
  125. # connect the parser and document objects
  126. parser.set_document(doc)
  127. # supply the password for initialization
  128. # doc.initialize(self.password)
  129.  
  130. # if doc.is_extractable:
  131. # apply the function and return the result
  132. self._document = doc
  133. self._parse_toc()
  134. self._create_tmpdir()
  135. self._parse_pages()
  136. except IOError as e:
  137. # the file doesn't exist or similar problem
  138. print(e)
  139.  
  140. def _create_tmpdir(self):
  141. if not self._tmpdir:
  142. self._tmpdir = mkdtemp()
  143. return self._tmpdir
  144.  
  145. def _parse_toc(self):
  146. """With an open PDFDocument object, get the table of contents (toc) data
  147. [this is a higher-order function to be passed to with_pdf()]"""
  148. toc = []
  149. doc = self._document
  150. try:
  151. outlines = doc.get_outlines()
  152. for (level, title, dest, a, se) in outlines:
  153. toc.append((level, title))
  154. except:
  155. pass
  156. finally:
  157. self._toc = toc
  158.  
  159. def _parse_pages(self):
  160. doc = self._document
  161. if not doc:
  162. self.data = []
  163. return
  164.  
  165. data = []
  166.  
  167. rsrcmgr = PDFResourceManager()
  168. laparams = LAParams()
  169. self.device = device = PDFPageAggregator(rsrcmgr, laparams=laparams)
  170. self.interpreter = PDFPageInterpreter(rsrcmgr, device)
  171.  
  172. for i, page in enumerate(PDFPage.create_pages(doc)):
  173. p = {
  174. 'manager': self,
  175. 'page': page,
  176. 'size': page.attrs.get('MediaBox', [0, 0, 0, 0])[2:],
  177. }
  178. data.append(p)
  179. self.data = data
  180.  
  181.  
  182. class PDFImageWidget(Image):
  183. bbox = ListProperty([0, 0, 100, 100])
  184.  
  185.  
  186. class PDFLabelWidget(Label):
  187. bbox = ListProperty([0, 0, 100, 100])
  188.  
  189.  
  190. class PDFCurveWidget(Widget):
  191. points = ListProperty()
  192. line_width = NumericProperty()
  193. stroke = BooleanProperty(False)
  194. fill = BooleanProperty(False)
  195. even_odd = BooleanProperty()
  196. color = ColorProperty()
  197. fill_color = ColorProperty()
  198.  
  199. def __init__(self, **kwargs):
  200. super(PDFCurveWidget, self).__init__(**kwargs)
  201. build = Clock.create_trigger(self.build, 0)
  202.  
  203. self.bind(
  204. points=build,
  205. line_width=build,
  206. stroke=build,
  207. fill=build,
  208. even_odd=build,
  209. color=build,
  210. fill_color=build
  211. )
  212.  
  213. def build(self, tess=None, *args):
  214. self.canvas.clear()
  215. if not self.points:
  216. return
  217.  
  218. with self.canvas:
  219. if self.fill:
  220. Color(rgba=self.fill_color)
  221. t = Tesselator()
  222. t.add_contour(self.points)
  223. if tess.tesselate:
  224. for vertices, indices in tess.meshes:
  225. Mesh(
  226. vertices=vertices,
  227. indices=indices,
  228. mode='triangle fan'
  229. )
  230. else:
  231. print("mesh didn't tesselate!")
  232.  
  233. if self.stroke:
  234. Color(rgba=self.color)
  235. Line(
  236. points=self.points,
  237. width=self.line_width
  238. )
  239.  
  240.  
  241. class PDFPageWidget(RelativeLayout):
  242. labels = DictProperty()
  243. attributes = DictProperty()
  244. manager = ObjectProperty()
  245. page = ObjectProperty()
  246. items = ListProperty()
  247.  
  248. def on_page(self, *args):
  249. if self.manager.async_load:
  250. Thread(target=self._load_page).start()
  251. else:
  252. self._load_page()
  253.  
  254. def _load_page(self):
  255. self.manager.interpreter.process_page(self.page)
  256. self.items = self.manager.device.get_result()
  257.  
  258. def on_items(self, *args):
  259. self.clear_widgets()
  260. self._render_content(self.items)
  261.  
  262. def _render_content(self, lt_objs):
  263. """Iterate through the list of LT* objects and capture the text
  264. or image data contained in each
  265. """
  266. for lt_obj in lt_objs:
  267. print(lt_obj)
  268. if isinstance(lt_obj, LTChar):
  269. self.add_text(
  270. text=lt_obj.get_text(),
  271. box_pos=(lt_obj.x0, lt_obj.y0),
  272. box_size=(lt_obj.width, lt_obj.height),
  273. # font_size=lt_obj.fontsize,
  274. # font_name=lt_obj.fontname,
  275. )
  276.  
  277. elif isinstance(lt_obj, (LTTextBox, LTTextLine)):
  278. # text, so arrange is logically based on its column width
  279. # this way is very limited style wise, and doesn't allow
  280. # support for font, color, style, etc management, as
  281. # pdfminer doesn't provide these information at text box
  282. # level, by using the following nested loop, it's
  283. # possible to have font family info, but for individual
  284. # character, which is impractical to create direct
  285. # labels for.
  286. # for obj in lt_obj:
  287. # print(obj)
  288. # for o in obj:
  289. # print(o)
  290.  
  291. self.add_text(
  292. text=lt_obj.get_text(),
  293. box_pos=(lt_obj.x0, lt_obj.y0),
  294. box_size=(lt_obj.width, lt_obj.height),
  295. )
  296.  
  297. elif isinstance(lt_obj, LTImage):
  298. saved_file = self.save_image(lt_obj)
  299. if saved_file:
  300. self.add_widget(
  301. PDFImageWidget(
  302. source=saved_file,
  303. bbox=lt_obj.bbox
  304. )
  305. )
  306.  
  307. elif isinstance(lt_obj, LTFigure):
  308. self._render_content(lt_obj)
  309.  
  310. # all of these are actually LTCurves, but all types here for
  311. # clarity
  312. elif isinstance(lt_obj, (LTLine, LTRect, LTCurve)):
  313. self.add_widget(
  314. PDFCurveWidget(
  315. points=lt_obj.pts or [],
  316. line_width=lt_obj.linewidth or 1.0,
  317. stroke=lt_obj.stroke,
  318. fill=lt_obj.fill,
  319. even_odd=lt_obj.evenodd,
  320. # colors seem to be indices, to some dict i
  321. # can't find in what pdfminer exposes
  322. color='#FFFFFFFF', # lt_obj.stroking_color or
  323. fill_color='#00000000' # lt_obj.non_stroking_color or
  324. )
  325. )
  326.  
  327. def save_image(self, lt_image):
  328. """Try to save the image data from this LTImage object, and
  329. return the file name, if successful
  330. """
  331. if lt_image.stream:
  332. file_stream = lt_image.stream.get_rawdata()
  333. if file_stream:
  334. file_ext = self.determine_image_type(file_stream[0:4])
  335. if file_ext:
  336. fd, fn = mkstemp(dir=self.manager._tmpdir, suffix='.{}'.format(file_ext))
  337. write(fd, file_stream)
  338. close(fd)
  339. return fn
  340.  
  341. @staticmethod
  342. def determine_image_type(stream_first_4_bytes):
  343. """Find out the image file type based on the magic number comparison of the first 4 (or 2) bytes"""
  344. file_type = None
  345. bytes_as_hex = b2a_hex(stream_first_4_bytes)
  346. if bytes_as_hex.startswith(b'ffd8'):
  347. file_type = '.jpeg'
  348. elif bytes_as_hex == b'89504e47':
  349. file_type = '.png'
  350. elif bytes_as_hex == b'47494638':
  351. file_type = '.gif'
  352. elif bytes_as_hex.startswith(b'424d'):
  353. file_type = '.bmp'
  354. return file_type
  355.  
  356. def add_text(self, text, box_pos, box_size, **kwargs):
  357. label = self.labels.get((box_pos, box_pos))
  358. if not label:
  359. label = PDFLabelWidget(text=text, pos=box_pos, size=box_size, **kwargs)
  360. self.labels[(box_pos, box_size)] = label
  361. self.add_widget(label)
  362. else:
  363. label.text += text
  364.  
  365. def add_image(self, lt_image):
  366. source = self.save_image(lt_image)
  367. if source:
  368. image = PDFImageWidget(
  369. source=source,
  370. pos=(lt_image.x0, lt_image.y0),
  371. size=(lt_image.widt, lt_image.height)
  372. )
  373. self.add_widget(image)
  374. self.images.append(image)
  375.  
  376.  
  377. if __name__ == '__main__':
  378. from sys import argv
  379. from kivy.base import runTouchApp
  380. from kivy.uix.scrollview import ScrollView
  381.  
  382. fn = 'c'
  383.  
  384. from kivy.app import App
  385. from kivy.lang import Builder
  386. from kivy.uix.recycleview import RecycleView
  387. from kivy.uix.screenmanager import ScreenManager, Screen
  388. from kivy.uix.boxlayout import BoxLayout
  389. from kivy.uix.gridlayout import GridLayout
  390. from kivy.config import Config
  391.  
  392. Config.set("graphics", 'resizable', 1)
  393. Config.set("graphics", 'height', 960)
  394. Config.set("graphics", 'width', 640)
  395. Builder.load_string("""
  396. <Button1@Button>:
  397. background_color:255, 255, 255, 1
  398. size:300,150
  399. size_hint:None, None
  400.  
  401. <Image>:
  402. source:'фон.jpg'
  403. allow_stretch: True
  404.  
  405. <Label>:
  406. color:0,0,0,1
  407. font_size:30
  408.  
  409. <BoxLayout>:
  410. orientation:'vertical'
  411. spacing:200
  412. padding:170,200
  413. background_color:1,1,1,1
  414.  
  415. <GridLayout10@GridLayout>:
  416. cols:2
  417. spacing:10,40
  418. padding:15
  419.  
  420. <MainScreen>:
  421. name:"Menu"
  422. Image
  423. BoxLayout:
  424. Button1:
  425. on_press:root.manager.current="Subjects"
  426. text:"Subjects"
  427. Button1:
  428. text:"Special"
  429. on_press:root.manager.current="Special"
  430.  
  431.  
  432. <SubjectsScreen>:
  433. name:"Subjects"
  434. Image
  435. BoxLayout:
  436. Button1:
  437. text:"10 class"
  438. on_press:root.manager.current="SubjectsScreen10"
  439. Button1:
  440. text:"11 class"
  441. on_press:root.manager.current="SubjectsScreen11"
  442.  
  443. <SubjectsScreen10>:
  444. name:"SubjectsScreen10"
  445. Image
  446. GridLayout10:
  447. Button1:
  448. text:"Геометрия"
  449. Button1:
  450. text:"Алгебра"
  451. Button1:
  452. text:"Химия"
  453. Button1:
  454. text:"Физика"
  455. Button1:
  456. text:"Русская литература"
  457. Button1:
  458. text:"Русский язык"
  459. on_press: root.russ_yaz()
  460. on_press: root.close()
  461. Button1:
  462. text:"География"
  463. Button1:
  464. text:"Биология"
  465. Button1:
  466. text:"Белорусский язык"
  467.  
  468. <SubjectsScreen11>:
  469. name:"SubjectsScreen11"
  470. Image
  471. GridLayout10:
  472. Button1:
  473. text:"Алгебра"
  474. Button1:
  475. text:"Геометрия"
  476. Button1:
  477. text:"Физика"
  478. Button1:
  479. text:"Химия"
  480. Button1:
  481. text:"Русская литература"
  482. Button1:
  483. text:"Русский язык"
  484. on_press:self.russ_yaz
  485.  
  486. Button1:
  487. text:"География"
  488. Button1:
  489. text:"Биология"
  490. Button1:
  491. text:"Белорусский язык"
  492. Button1:
  493. text:"Астрономия"
  494.  
  495. <SpecialSubjectsScreen>:
  496. name:"Special"
  497. Image
  498. AnchorLayout:
  499. Button1:
  500. text:"No material(Back)"
  501. on_press: root.manager.current='Menu'
  502. """)
  503.  
  504.  
  505. class MainScreen(Screen):
  506. pass
  507.  
  508.  
  509. class SpecialSubjectsScreen(Screen):
  510. pass
  511.  
  512.  
  513. class SubjectsScreen(Screen):
  514. pass
  515.  
  516.  
  517. class SubjectsScreen10(Screen):
  518. def russ_yaz(self):
  519. fn = 'Дудников, А.В Русский язык.pdf'
  520. root = PDFDocumentWidget(source=fn, cols=1)
  521. runTouchApp(root)
  522. PDFDocumentWidget()
  523.  
  524. def close(self):
  525. App.get_running_app().stop()
  526.  
  527.  
  528. class SubjectsScreen11(Screen):
  529. pass
  530.  
  531.  
  532. sm = ScreenManager()
  533. sm.add_widget(MainScreen(name='Menu'))
  534. sm.add_widget(SpecialSubjectsScreen(name='Special'))
  535. sm.add_widget(SubjectsScreen(name="Subjects"))
  536. sm.add_widget(SubjectsScreen10(name="SubjectsScreen10"))
  537. sm.add_widget(SubjectsScreen11(name="SubjectsScreen11"))
  538.  
  539.  
  540. class LibraryApp(App):
  541.  
  542. def build(self):
  543. return sm
  544.  
  545.  
  546. if __name__ == '__main__':
  547. LibraryApp().run()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement