Advertisement
Roman_Sarnov

Untitled

Nov 7th, 2019
121
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 15.57 KB | None | 0 0
  1. from kivy.app import App
  2. from kivy.lang import Builder
  3. from kivy.uix.recycleview import RecycleView
  4. from kivy.uix.screenmanager import ScreenManager, Screen
  5. from kivy.uix.boxlayout import BoxLayout
  6. from kivy.uix.gridlayout import GridLayout
  7. from kivy.config import Config
  8.  
  9. Config.set("graphics", 'resizable', 0)
  10. Config.set("graphics", 'height', 960)
  11. Config.set("graphics", 'width', 640)
  12. Builder.load_string("""
  13.  
  14. <Button1@Button>:
  15. background_color:255, 255, 255, 1
  16. size:300,150
  17. size_hint:None, None
  18.  
  19. <Image>:
  20. source:'фон.jpg'
  21. allow_stretch: True
  22.  
  23. <Label>:
  24. color:0,0,0,1
  25. font_size:30
  26.  
  27. <BoxLayout>:
  28. orientation:'vertical'
  29. spacing:200
  30. padding:170,200
  31. background_color:1,1,1,1
  32.  
  33. <GridLayout10@GridLayout>:
  34. cols:2
  35. spacing:10,40
  36. padding:15
  37.  
  38. <MainScreen>:
  39. name:"Menu"
  40. Image
  41. BoxLayout:
  42. Button1:
  43. on_press:root.manager.current="Subjects"
  44. text:"Subjects"
  45. Button1:
  46. text:"Special"
  47. on_press:root.manager.current="Special"
  48.  
  49.  
  50. <SubjectsScreen>:
  51. name:"Subjects"
  52. Image
  53. BoxLayout:
  54. Button1:
  55. text:"10 class"
  56. on_press:root.manager.current="SubjectsScreen10"
  57. Button1:
  58. text:"11 class"
  59. on_press:root.manager.current="SubjectsScreen11"
  60.  
  61. <SubjectsScreen10>:
  62. name:"SubjectsScreen10"
  63. Image
  64. GridLayout10:
  65. Button1:
  66. text:"Геометрия"
  67. Button1:
  68. text:"Алгебра"
  69. Button1:
  70. text:"Химия"
  71. Button1:
  72. text:"Физика"
  73. Button1:
  74. text:"Русская литература"
  75. Button1:
  76. text:"Русский язык"
  77. Button1:
  78. text:"География"
  79. Button1:
  80. text:"Биология"
  81. Button1:
  82. text:"Белорусский язык"
  83.  
  84. <SubjectsScreen11>:
  85. name:"SubjectsScreen11"
  86. Image
  87. GridLayout10:
  88. Button1:
  89. text:"Алгебра"
  90. Button1:
  91. text:"Геометрия"
  92. Button1:
  93. text:"Физика"
  94. Button1:
  95. text:"Химия"
  96. Button1:
  97. text:"Русская литература"
  98. Button1:
  99. text:"Русский язык"
  100. Button1:
  101. text:"География"
  102. Button1:
  103. text:"Биология"
  104. Button1:
  105. text:"Белорусский язык"
  106. Button1:
  107. text:"Астрономия"
  108.  
  109. <SpecialSubjectsScreen>:
  110. name:"Special"
  111. Image
  112. AnchorLayout:
  113. Button1:
  114. text:"No material(Back)"
  115. on_press: root.manager.current='Menu'
  116. """)
  117.  
  118.  
  119. class MainScreen(Screen):
  120. pass
  121.  
  122.  
  123. class SpecialSubjectsScreen(Screen):
  124. pass
  125.  
  126.  
  127. class SubjectsScreen(Screen):
  128. pass
  129.  
  130.  
  131. class SubjectsScreen10(Screen):
  132. pass
  133.  
  134.  
  135. class SubjectsScreen11(Screen):
  136. pass
  137.  
  138.  
  139. sm = ScreenManager()
  140. sm.add_widget(MainScreen(name='Menu'))
  141. sm.add_widget(SpecialSubjectsScreen(name='Special'))
  142. sm.add_widget(SubjectsScreen(name="Subjects"))
  143. sm.add_widget(SubjectsScreen10(name="SubjectsScreen10"))
  144. sm.add_widget(SubjectsScreen11(name="SubjectsScreen11"))
  145.  
  146.  
  147. class LibraryApp(App):
  148. def build(self):
  149. return sm
  150.  
  151.  
  152. if __name__ == '__main__':
  153. LibraryApp().run()
  154.  
  155.  
  156.  
  157.  
  158.  
  159. from os.path import exists
  160. from tempfile import mkdtemp, mkstemp
  161. from shutil import rmtree
  162. from binascii import b2a_hex
  163. from os import write, close
  164. from threading import Thread
  165.  
  166. from pdfminer.pdfpage import PDFPage
  167. from pdfminer.pdfparser import PDFParser
  168. from pdfminer.converter import PDFPageAggregator
  169. from pdfminer.pdfdocument import PDFDocument, PDFNoOutlines
  170. from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
  171. from pdfminer.layout import (
  172. LAParams, LTTextBox, LTTextLine, LTFigure, LTImage, LTChar, LTCurve,
  173. LTLine, LTRect,
  174. )
  175.  
  176. from kivy.lang import Builder
  177. from kivy.clock import Clock
  178.  
  179. from kivy.graphics import Mesh, Color
  180. from kivy.graphics.tesselator import Tesselator
  181.  
  182. from kivy.uix.widget import Widget
  183. from kivy.uix.recycleview import RecycleView
  184. from kivy.uix.label import Label
  185. from kivy.uix.image import Image
  186. from kivy.uix.relativelayout import RelativeLayout
  187. from kivy.uix.boxlayout import BoxLayout
  188.  
  189. from kivy.properties import (
  190. StringProperty, ListProperty, NumericProperty, AliasProperty,
  191. DictProperty, ObjectProperty, BooleanProperty, ColorProperty,
  192. )
  193.  
  194. Builder.load_string('''
  195. #:import RGBA kivy.utils.rgba
  196.  
  197. <PDFDocumentWidget>:
  198. viewclass: 'PDFPageWidget'
  199. key_size: 'size'
  200. # async load is buggy at the moment
  201. # async_load: True
  202.  
  203. RecycleGridLayout:
  204. spacing: 5
  205. cols: root.cols
  206. rows: root.rows
  207. size_hint: None, None
  208. size: self.minimum_size
  209. default_size_hint: None, None
  210.  
  211. <PDFPageWidget>:
  212. size_hint: None, None
  213.  
  214. canvas.before:
  215. Color:
  216. rgba: RGBA('FFFFFF')
  217. Rectangle:
  218. size: self.size
  219.  
  220. <PDFLabelWidget,PDFImageWidget>:
  221. size_hint: None, None
  222.  
  223. <PDFImageWidget>:
  224. pos: self.bbox[:2]
  225. size: self.bbox[2] - self.x, self.bbox[3] - self.y
  226.  
  227. <PDFLabelWidget>:
  228. text_size: self.width, None
  229. height: self.texture_size[1]
  230. color: RGBA('000000')
  231. font_size: 8
  232.  
  233. <PDFCurveWidget>:
  234. ''')
  235.  
  236.  
  237. class PDFDocumentWidget(RecycleView):
  238. source = StringProperty()
  239. password = StringProperty()
  240. cols = NumericProperty(None)
  241. rows = NumericProperty(None)
  242. _toc = ListProperty()
  243. async_load = BooleanProperty(False)
  244.  
  245. def __init__(self, **kwargs):
  246. super(PDFDocumentWidget, self).__init__(**kwargs)
  247. self._fp = None
  248. self._document = None
  249. self._tmpdir = None
  250. self.bind(source=self.load)
  251. if self.source:
  252. self.load()
  253.  
  254. def load(self, *args):
  255. if self._fp:
  256. # close the previous pdf file
  257. self._fp.close()
  258.  
  259. pdf_doc = self.source
  260. data = []
  261. if not pdf_doc or not exists(pdf_doc):
  262. self.pages = []
  263. self._doc = []
  264. self._document = None
  265. if self._tmpdir:
  266. rmtree(self._tmpdir)
  267. self._tmpdir = None
  268.  
  269. try:
  270. # open the pdf file
  271. self._fp = fp = open(pdf_doc, 'rb')
  272. # create a parser object associated with the file object
  273. parser = PDFParser(fp)
  274. # create a PDFDocument object that stores the document structure
  275. doc = PDFDocument(parser)
  276. # connect the parser and document objects
  277. parser.set_document(doc)
  278. # supply the password for initialization
  279. # doc.initialize(self.password)
  280.  
  281. # if doc.is_extractable:
  282. # apply the function and return the result
  283. self._document = doc
  284. self._parse_toc()
  285. self._create_tmpdir()
  286. self._parse_pages()
  287. except IOError as e:
  288. # the file doesn't exist or similar problem
  289. print(e)
  290.  
  291. def _create_tmpdir(self):
  292. if not self._tmpdir:
  293. self._tmpdir = mkdtemp()
  294. return self._tmpdir
  295.  
  296. def _parse_toc(self):
  297. """With an open PDFDocument object, get the table of contents (toc) data
  298. [this is a higher-order function to be passed to with_pdf()]"""
  299. toc = []
  300. doc = self._document
  301. try:
  302. outlines = doc.get_outlines()
  303. for (level, title, dest, a, se) in outlines:
  304. toc.append((level, title))
  305. except:
  306. pass
  307. finally:
  308. self._toc = toc
  309.  
  310. def _parse_pages(self):
  311. doc = self._document
  312. if not doc:
  313. self.data = []
  314. return
  315.  
  316. data = []
  317.  
  318. rsrcmgr = PDFResourceManager()
  319. laparams = LAParams()
  320. self.device = device = PDFPageAggregator(rsrcmgr, laparams=laparams)
  321. self.interpreter = PDFPageInterpreter(rsrcmgr, device)
  322.  
  323. for i, page in enumerate(PDFPage.create_pages(doc)):
  324. p = {
  325. 'manager': self,
  326. 'page': page,
  327. 'size': page.attrs.get('MediaBox', [0, 0, 0, 0])[2:],
  328. }
  329. data.append(p)
  330. self.data = data
  331.  
  332.  
  333. class PDFImageWidget(Image):
  334. bbox = ListProperty([0, 0, 100, 100])
  335.  
  336.  
  337. class PDFLabelWidget(Label):
  338. bbox = ListProperty([0, 0, 100, 100])
  339.  
  340.  
  341. class PDFCurveWidget(Widget):
  342. points = ListProperty()
  343. line_width = NumericProperty()
  344. stroke = BooleanProperty(False)
  345. fill = BooleanProperty(False)
  346. even_odd = BooleanProperty()
  347. color = ColorProperty()
  348. fill_color = ColorProperty()
  349.  
  350. def __init__(self, **kwargs):
  351. super(PDFCurveWidget, self).__init__(**kwargs)
  352. build = Clock.create_trigger(self.build, 0)
  353.  
  354. self.bind(
  355. points=build,
  356. line_width=build,
  357. stroke=build,
  358. fill=build,
  359. even_odd=build,
  360. color=build,
  361. fill_color=build
  362. )
  363.  
  364. def build(self, *args):
  365. self.canvas.clear()
  366. if not self.points:
  367. return
  368.  
  369. with self.canvas:
  370. if self.fill:
  371. Color(rgba=self.fill_color)
  372. t = Tesselator()
  373. t.add_contour(self.points)
  374. if tess.tesselate:
  375. for vertices, indices in tess.meshes:
  376. Mesh(
  377. vertices=vertices,
  378. indices=indices,
  379. mode='triangle fan'
  380. )
  381. else:
  382. print("mesh didn't tesselate!")
  383.  
  384. if self.stroke:
  385. Color(rgba=self.color)
  386. Line(
  387. points=self.points,
  388. width=self.line_width
  389. )
  390.  
  391.  
  392. class PDFPageWidget(RelativeLayout):
  393. labels = DictProperty()
  394. attributes = DictProperty()
  395. manager = ObjectProperty()
  396. page = ObjectProperty()
  397. items = ListProperty()
  398.  
  399. def on_page(self, *args):
  400. if self.manager.async_load:
  401. Thread(target=self._load_page).start()
  402. else:
  403. self._load_page()
  404.  
  405. def _load_page(self):
  406. self.manager.interpreter.process_page(self.page)
  407. self.items = self.manager.device.get_result()
  408.  
  409. def on_items(self, *args):
  410. self.clear_widgets()
  411. self._render_content(self.items)
  412.  
  413. def _render_content(self, lt_objs):
  414. """Iterate through the list of LT* objects and capture the text
  415. or image data contained in each
  416. """
  417. for lt_obj in lt_objs:
  418. print(lt_obj)
  419. if isinstance(lt_obj, LTChar):
  420. self.add_text(
  421. text=lt_obj.get_text(),
  422. box_pos=(lt_obj.x0, lt_obj.y0),
  423. box_size=(lt_obj.width, lt_obj.height),
  424. # font_size=lt_obj.fontsize,
  425. # font_name=lt_obj.fontname,
  426. )
  427.  
  428. elif isinstance(lt_obj, (LTTextBox, LTTextLine)):
  429. # text, so arrange is logically based on its column width
  430. # this way is very limited style wise, and doesn't allow
  431. # support for font, color, style, etc management, as
  432. # pdfminer doesn't provide these information at text box
  433. # level, by using the following nested loop, it's
  434. # possible to have font family info, but for individual
  435. # character, which is impractical to create direct
  436. # labels for.
  437. # for obj in lt_obj:
  438. # print(obj)
  439. # for o in obj:
  440. # print(o)
  441.  
  442. self.add_text(
  443. text=lt_obj.get_text(),
  444. box_pos=(lt_obj.x0, lt_obj.y0),
  445. box_size=(lt_obj.width, lt_obj.height),
  446. )
  447.  
  448. elif isinstance(lt_obj, LTImage):
  449. saved_file = self.save_image(lt_obj)
  450. if saved_file:
  451. self.add_widget(
  452. PDFImageWidget(
  453. source=saved_file,
  454. bbox=lt_obj.bbox
  455. )
  456. )
  457.  
  458. elif isinstance(lt_obj, LTFigure):
  459. self._render_content(lt_obj)
  460.  
  461. # all of these are actually LTCurves, but all types here for
  462. # clarity
  463. elif isinstance(lt_obj, (LTLine, LTRect, LTCurve)):
  464. self.add_widget(
  465. PDFCurveWidget(
  466. points=lt_obj.pts or [],
  467. line_width=lt_obj.linewidth or 1.0,
  468. stroke=lt_obj.stroke,
  469. fill=lt_obj.fill,
  470. even_odd=lt_obj.evenodd,
  471. # colors seem to be indices, to some dict i
  472. # can't find in what pdfminer exposes
  473. color='#FFFFFFFF', # lt_obj.stroking_color or
  474. fill_color='#00000000' # lt_obj.non_stroking_color or
  475. )
  476. )
  477.  
  478. def save_image(self, lt_image):
  479. """Try to save the image data from this LTImage object, and
  480. return the file name, if successful
  481. """
  482. if lt_image.stream:
  483. file_stream = lt_image.stream.get_rawdata()
  484. if file_stream:
  485. file_ext = self.determine_image_type(file_stream[0:4])
  486. if file_ext:
  487. fd, fn = mkstemp(dir=self.manager._tmpdir, suffix='.{}'.format(file_ext))
  488. write(fd, file_stream)
  489. close(fd)
  490. return fn
  491.  
  492. @staticmethod
  493. def determine_image_type(stream_first_4_bytes):
  494. """Find out the image file type based on the magic number comparison of the first 4 (or 2) bytes"""
  495. file_type = None
  496. bytes_as_hex = b2a_hex(stream_first_4_bytes)
  497. if bytes_as_hex.startswith(b'ffd8'):
  498. file_type = '.jpeg'
  499. elif bytes_as_hex == b'89504e47':
  500. file_type = '.png'
  501. elif bytes_as_hex == b'47494638':
  502. file_type = '.gif'
  503. elif bytes_as_hex.startswith(b'424d'):
  504. file_type = '.bmp'
  505. return file_type
  506.  
  507. def add_text(self, text, box_pos, box_size, **kwargs):
  508. label = self.labels.get((box_pos, box_pos))
  509. if not label:
  510. label = PDFLabelWidget(text=text, pos=box_pos, size=box_size, **kwargs)
  511. self.labels[(box_pos, box_size)] = label
  512. self.add_widget(label)
  513. else:
  514. label.text += text
  515.  
  516. def add_image(self, lt_image):
  517. source = self.save_image(lt_image)
  518. if source:
  519. image = PDFImageWidget(
  520. source=source,
  521. pos=(lt_image.x0, lt_image.y0),
  522. size=(lt_image.widt, lt_image.height)
  523. )
  524. self.add_widget(image)
  525. self.images.append(image)
  526.  
  527.  
  528. if __name__ == '__main__':
  529. from sys import argv
  530. from kivy.base import runTouchApp
  531. from kivy.uix.scrollview import ScrollView
  532.  
  533. if len(argv) > 1:
  534. fn = argv[1]
  535. else:
  536. fn = 'Дудников, А.В Русский язык.pdf'
  537. root = PDFDocumentWidget(source=fn, cols=1)
  538. runTouchApp(root)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement