Advertisement
Roman_Sarnov

Untitled

Nov 7th, 2019
102
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 15.75 KB | None | 0 0
  1. from kivy.app import App
  2. from kivy.lang import Builder
  3. from kivy.uix.recycleview import RecycleView
  4. from kivy.uix.screenmanager import ScreenManager, Screen
  5. from kivy.uix.boxlayout import BoxLayout
  6. from kivy.uix.gridlayout import GridLayout
  7. from kivy.config import Config
  8.  
  9. Config.set("graphics", 'resizable', 0)
  10. Config.set("graphics", 'height', 960)
  11. Config.set("graphics", 'width', 640)
  12. Builder.load_string("""
  13. <Button1@Button>:
  14. background_color:255, 255, 255, 1
  15. size:300,150
  16. size_hint:None, None
  17.  
  18. <Image>:
  19. source:'фон.jpg'
  20. allow_stretch: True
  21.  
  22. <Label>:
  23. color:0,0,0,1
  24. font_size:30
  25.  
  26. <BoxLayout>:
  27. orientation:'vertical'
  28. spacing:200
  29. padding:170,200
  30. background_color:1,1,1,1
  31.  
  32. <GridLayout10@GridLayout>:
  33. cols:2
  34. spacing:10,40
  35. padding:15
  36.  
  37. <MainScreen>:
  38. name:"Menu"
  39. Image
  40. BoxLayout:
  41. Button1:
  42. on_press:root.manager.current="Subjects"
  43. text:"Subjects"
  44. Button1:
  45. text:"Special"
  46. on_press:root.manager.current="Special"
  47.  
  48.  
  49. <SubjectsScreen>:
  50. name:"Subjects"
  51. Image
  52. BoxLayout:
  53. Button1:
  54. text:"10 class"
  55. on_press:root.manager.current="SubjectsScreen10"
  56. Button1:
  57. text:"11 class"
  58. on_press:root.manager.current="SubjectsScreen11"
  59.  
  60. <SubjectsScreen10>:
  61. name:"SubjectsScreen10"
  62. Image
  63. GridLayout10:
  64. Button1:
  65. text:"Геометрия"
  66. Button1:
  67. text:"Алгебра"
  68. Button1:
  69. text:"Химия"
  70. Button1:
  71. text:"Физика"
  72. Button1:
  73. text:"Русская литература"
  74. Button1:
  75. text:"Русский язык"
  76. on_press:root.russ_yaz()
  77. on_press: app.stop()
  78. Button1:
  79. text:"География"
  80. Button1:
  81. text:"Биология"
  82. Button1:
  83. text:"Белорусский язык"
  84.  
  85. <SubjectsScreen11>:
  86. name:"SubjectsScreen11"
  87. Image
  88. GridLayout10:
  89. Button1:
  90. text:"Алгебра"
  91. Button1:
  92. text:"Геометрия"
  93. Button1:
  94. text:"Физика"
  95. Button1:
  96. text:"Химия"
  97. Button1:
  98. text:"Русская литература"
  99. Button1:
  100. text:"Русский язык"
  101. on_press:self.russ_yaz
  102.  
  103.  
  104. Button1:
  105. text:"География"
  106. Button1:
  107. text:"Биология"
  108. Button1:
  109. text:"Белорусский язык"
  110. Button1:
  111. text:"Астрономия"
  112.  
  113. <SpecialSubjectsScreen>:
  114. name:"Special"
  115. Image
  116. AnchorLayout:
  117. Button1:
  118. text:"No material(Back)"
  119. on_press: root.manager.current='Menu'
  120. """)
  121.  
  122.  
  123. class MainScreen(Screen):
  124. pass
  125.  
  126.  
  127. class SpecialSubjectsScreen(Screen):
  128. pass
  129.  
  130.  
  131. class SubjectsScreen(Screen):
  132. pass
  133.  
  134.  
  135. class SubjectsScreen10(Screen):
  136. def russ_yaz(self):
  137. LibraryApp.stop(self)
  138. global a
  139. a = 'Дудников, А.В Русский язык.pdf'
  140.  
  141.  
  142. class SubjectsScreen11(Screen):
  143. pass
  144.  
  145.  
  146. sm = ScreenManager()
  147. sm.add_widget(MainScreen(name='Menu'))
  148. sm.add_widget(SpecialSubjectsScreen(name='Special'))
  149. sm.add_widget(SubjectsScreen(name="Subjects"))
  150. sm.add_widget(SubjectsScreen10(name="SubjectsScreen10"))
  151. sm.add_widget(SubjectsScreen11(name="SubjectsScreen11"))
  152.  
  153.  
  154. class LibraryApp(App):
  155. def build(self):
  156. return sm
  157.  
  158.  
  159. if __name__ == '__main__':
  160. LibraryApp().run()
  161.  
  162. from os.path import exists
  163. from tempfile import mkdtemp, mkstemp
  164. from shutil import rmtree
  165. from binascii import b2a_hex
  166. from os import write, close
  167. from threading import Thread
  168.  
  169. from pdfminer.pdfpage import PDFPage
  170. from pdfminer.pdfparser import PDFParser
  171. from pdfminer.converter import PDFPageAggregator
  172. from pdfminer.pdfdocument import PDFDocument, PDFNoOutlines
  173. from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
  174. from pdfminer.layout import (
  175. LAParams, LTTextBox, LTTextLine, LTFigure, LTImage, LTChar, LTCurve,
  176. LTLine, LTRect,
  177. )
  178.  
  179. from kivy.lang import Builder
  180. from kivy.clock import Clock
  181.  
  182. from kivy.graphics import Mesh, Color
  183. from kivy.graphics.tesselator import Tesselator
  184.  
  185. from kivy.uix.widget import Widget
  186. from kivy.uix.recycleview import RecycleView
  187. from kivy.uix.label import Label
  188. from kivy.uix.image import Image
  189. from kivy.uix.relativelayout import RelativeLayout
  190. from kivy.uix.boxlayout import BoxLayout
  191.  
  192. from kivy.properties import (
  193. StringProperty, ListProperty, NumericProperty, AliasProperty,
  194. DictProperty, ObjectProperty, BooleanProperty, ColorProperty,
  195. )
  196.  
  197. Builder.load_string('''
  198. #:import RGBA kivy.utils.rgba
  199.  
  200. <PDFDocumentWidget>:
  201. viewclass: 'PDFPageWidget'
  202. key_size: 'size'
  203. # async load is buggy at the moment
  204. # async_load: True
  205.  
  206. RecycleGridLayout:
  207. spacing: 5
  208. cols: root.cols
  209. rows: root.rows
  210. size_hint: None, None
  211. size: self.minimum_size
  212. default_size_hint: None, None
  213.  
  214. <PDFPageWidget>:
  215. size_hint: None, None
  216.  
  217. canvas.before:
  218. Color:
  219. rgba: RGBA('FFFFFF')
  220. Rectangle:
  221. size: self.size
  222.  
  223. <PDFLabelWidget,PDFImageWidget>:
  224. size_hint: None, None
  225.  
  226. <PDFImageWidget>:
  227. pos: self.bbox[:2]
  228. size: self.bbox[2] - self.x, self.bbox[3] - self.y
  229.  
  230. <PDFLabelWidget>:
  231. text_size: self.width, None
  232. height: self.texture_size[1]
  233. color: RGBA('000000')
  234. font_size: 8
  235.  
  236. <PDFCurveWidget>:
  237. ''')
  238.  
  239.  
  240. class PDFDocumentWidget(RecycleView):
  241. source = StringProperty()
  242. password = StringProperty()
  243. cols = NumericProperty(None)
  244. rows = NumericProperty(None)
  245. _toc = ListProperty()
  246. async_load = BooleanProperty(False)
  247.  
  248. def __init__(self, **kwargs):
  249. super(PDFDocumentWidget, self).__init__(**kwargs)
  250. self._fp = None
  251. self._document = None
  252. self._tmpdir = None
  253. self.bind(source=self.load)
  254. if self.source:
  255. self.load()
  256.  
  257. def load(self, *args):
  258. if self._fp:
  259. # close the previous pdf file
  260. self._fp.close()
  261.  
  262. pdf_doc = self.source
  263. data = []
  264. if not pdf_doc or not exists(pdf_doc):
  265. self.pages = []
  266. self._doc = []
  267. self._document = None
  268. if self._tmpdir:
  269. rmtree(self._tmpdir)
  270. self._tmpdir = None
  271.  
  272. try:
  273. # open the pdf file
  274. self._fp = fp = open(pdf_doc, 'rb')
  275. # create a parser object associated with the file object
  276. parser = PDFParser(fp)
  277. # create a PDFDocument object that stores the document structure
  278. doc = PDFDocument(parser)
  279. # connect the parser and document objects
  280. parser.set_document(doc)
  281. # supply the password for initialization
  282. # doc.initialize(self.password)
  283.  
  284. # if doc.is_extractable:
  285. # apply the function and return the result
  286. self._document = doc
  287. self._parse_toc()
  288. self._create_tmpdir()
  289. self._parse_pages()
  290. except IOError as e:
  291. # the file doesn't exist or similar problem
  292. print(e)
  293.  
  294. def _create_tmpdir(self):
  295. if not self._tmpdir:
  296. self._tmpdir = mkdtemp()
  297. return self._tmpdir
  298.  
  299. def _parse_toc(self):
  300. """With an open PDFDocument object, get the table of contents (toc) data
  301. [this is a higher-order function to be passed to with_pdf()]"""
  302. toc = []
  303. doc = self._document
  304. try:
  305. outlines = doc.get_outlines()
  306. for (level, title, dest, a, se) in outlines:
  307. toc.append((level, title))
  308. except:
  309. pass
  310. finally:
  311. self._toc = toc
  312.  
  313. def _parse_pages(self):
  314. doc = self._document
  315. if not doc:
  316. self.data = []
  317. return
  318.  
  319. data = []
  320.  
  321. rsrcmgr = PDFResourceManager()
  322. laparams = LAParams()
  323. self.device = device = PDFPageAggregator(rsrcmgr, laparams=laparams)
  324. self.interpreter = PDFPageInterpreter(rsrcmgr, device)
  325.  
  326. for i, page in enumerate(PDFPage.create_pages(doc)):
  327. p = {
  328. 'manager': self,
  329. 'page': page,
  330. 'size': page.attrs.get('MediaBox', [0, 0, 0, 0])[2:],
  331. }
  332. data.append(p)
  333. self.data = data
  334.  
  335.  
  336. class PDFImageWidget(Image):
  337. bbox = ListProperty([0, 0, 100, 100])
  338.  
  339.  
  340. class PDFLabelWidget(Label):
  341. bbox = ListProperty([0, 0, 100, 100])
  342.  
  343.  
  344. class PDFCurveWidget(Widget):
  345. points = ListProperty()
  346. line_width = NumericProperty()
  347. stroke = BooleanProperty(False)
  348. fill = BooleanProperty(False)
  349. even_odd = BooleanProperty()
  350. color = ColorProperty()
  351. fill_color = ColorProperty()
  352.  
  353. def __init__(self, **kwargs):
  354. super(PDFCurveWidget, self).__init__(**kwargs)
  355. build = Clock.create_trigger(self.build, 0)
  356.  
  357. self.bind(
  358. points=build,
  359. line_width=build,
  360. stroke=build,
  361. fill=build,
  362. even_odd=build,
  363. color=build,
  364. fill_color=build
  365. )
  366.  
  367. def build(self, *args):
  368. self.canvas.clear()
  369. if not self.points:
  370. return
  371.  
  372. with self.canvas:
  373. if self.fill:
  374. Color(rgba=self.fill_color)
  375. t = Tesselator()
  376. t.add_contour(self.points)
  377. if tess.tesselate:
  378. for vertices, indices in tess.meshes:
  379. Mesh(
  380. vertices=vertices,
  381. indices=indices,
  382. mode='triangle fan'
  383. )
  384. else:
  385. print("mesh didn't tesselate!")
  386.  
  387. if self.stroke:
  388. Color(rgba=self.color)
  389. Line(
  390. points=self.points,
  391. width=self.line_width
  392. )
  393.  
  394.  
  395. class PDFPageWidget(RelativeLayout):
  396. labels = DictProperty()
  397. attributes = DictProperty()
  398. manager = ObjectProperty()
  399. page = ObjectProperty()
  400. items = ListProperty()
  401.  
  402. def on_page(self, *args):
  403. if self.manager.async_load:
  404. Thread(target=self._load_page).start()
  405. else:
  406. self._load_page()
  407.  
  408. def _load_page(self):
  409. self.manager.interpreter.process_page(self.page)
  410. self.items = self.manager.device.get_result()
  411.  
  412. def on_items(self, *args):
  413. self.clear_widgets()
  414. self._render_content(self.items)
  415.  
  416. def _render_content(self, lt_objs):
  417. """Iterate through the list of LT* objects and capture the text
  418. or image data contained in each
  419. """
  420. for lt_obj in lt_objs:
  421. print(lt_obj)
  422. if isinstance(lt_obj, LTChar):
  423. self.add_text(
  424. text=lt_obj.get_text(),
  425. box_pos=(lt_obj.x0, lt_obj.y0),
  426. box_size=(lt_obj.width, lt_obj.height),
  427. # font_size=lt_obj.fontsize,
  428. # font_name=lt_obj.fontname,
  429. )
  430.  
  431. elif isinstance(lt_obj, (LTTextBox, LTTextLine)):
  432. # text, so arrange is logically based on its column width
  433. # this way is very limited style wise, and doesn't allow
  434. # support for font, color, style, etc management, as
  435. # pdfminer doesn't provide these information at text box
  436. # level, by using the following nested loop, it's
  437. # possible to have font family info, but for individual
  438. # character, which is impractical to create direct
  439. # labels for.
  440. # for obj in lt_obj:
  441. # print(obj)
  442. # for o in obj:
  443. # print(o)
  444.  
  445. self.add_text(
  446. text=lt_obj.get_text(),
  447. box_pos=(lt_obj.x0, lt_obj.y0),
  448. box_size=(lt_obj.width, lt_obj.height),
  449. )
  450.  
  451. elif isinstance(lt_obj, LTImage):
  452. saved_file = self.save_image(lt_obj)
  453. if saved_file:
  454. self.add_widget(
  455. PDFImageWidget(
  456. source=saved_file,
  457. bbox=lt_obj.bbox
  458. )
  459. )
  460.  
  461. elif isinstance(lt_obj, LTFigure):
  462. self._render_content(lt_obj)
  463.  
  464. # all of these are actually LTCurves, but all types here for
  465. # clarity
  466. elif isinstance(lt_obj, (LTLine, LTRect, LTCurve)):
  467. self.add_widget(
  468. PDFCurveWidget(
  469. points=lt_obj.pts or [],
  470. line_width=lt_obj.linewidth or 1.0,
  471. stroke=lt_obj.stroke,
  472. fill=lt_obj.fill,
  473. even_odd=lt_obj.evenodd,
  474. # colors seem to be indices, to some dict i
  475. # can't find in what pdfminer exposes
  476. color='#FFFFFFFF', # lt_obj.stroking_color or
  477. fill_color='#00000000' # lt_obj.non_stroking_color or
  478. )
  479. )
  480.  
  481. def save_image(self, lt_image):
  482. """Try to save the image data from this LTImage object, and
  483. return the file name, if successful
  484. """
  485. if lt_image.stream:
  486. file_stream = lt_image.stream.get_rawdata()
  487. if file_stream:
  488. file_ext = self.determine_image_type(file_stream[0:4])
  489. if file_ext:
  490. fd, fn = mkstemp(dir=self.manager._tmpdir, suffix='.{}'.format(file_ext))
  491. write(fd, file_stream)
  492. close(fd)
  493. return fn
  494.  
  495. @staticmethod
  496. def determine_image_type(stream_first_4_bytes):
  497. """Find out the image file type based on the magic number comparison of the first 4 (or 2) bytes"""
  498. file_type = None
  499. bytes_as_hex = b2a_hex(stream_first_4_bytes)
  500. if bytes_as_hex.startswith(b'ffd8'):
  501. file_type = '.jpeg'
  502. elif bytes_as_hex == b'89504e47':
  503. file_type = '.png'
  504. elif bytes_as_hex == b'47494638':
  505. file_type = '.gif'
  506. elif bytes_as_hex.startswith(b'424d'):
  507. file_type = '.bmp'
  508. return file_type
  509.  
  510. def add_text(self, text, box_pos, box_size, **kwargs):
  511. label = self.labels.get((box_pos, box_pos))
  512. if not label:
  513. label = PDFLabelWidget(text=text, pos=box_pos, size=box_size, **kwargs)
  514. self.labels[(box_pos, box_size)] = label
  515. self.add_widget(label)
  516. else:
  517. label.text += text
  518.  
  519. def add_image(self, lt_image):
  520. source = self.save_image(lt_image)
  521. if source:
  522. image = PDFImageWidget(
  523. source=source,
  524. pos=(lt_image.x0, lt_image.y0),
  525. size=(lt_image.widt, lt_image.height)
  526. )
  527. self.add_widget(image)
  528. self.images.append(image)
  529.  
  530.  
  531. if __name__ == '__main__':
  532. from sys import argv
  533. from kivy.base import runTouchApp
  534. from kivy.uix.scrollview import ScrollView
  535.  
  536. if len(argv) > 1:
  537. fn = argv[1]
  538. else:
  539. fn = a
  540. root = PDFDocumentWidget(source=fn, cols=1)
  541. runTouchApp(root)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement