Advertisement
Roman_Sarnov

Untitled

Nov 7th, 2019
106
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 15.83 KB | None | 0 0
  1. from kivy.app import App
  2. from kivy.lang import Builder
  3. from kivy.uix.recycleview import RecycleView
  4. from kivy.uix.screenmanager import ScreenManager, Screen
  5. from kivy.uix.boxlayout import BoxLayout
  6. from kivy.uix.gridlayout import GridLayout
  7. from kivy.config import Config
  8.  
  9. Config.set("graphics", 'resizable', 0)
  10. Config.set("graphics", 'height', 960)
  11. Config.set("graphics", 'width', 640)
  12. Builder.load_string("""
  13.  
  14. <Button1@Button>:
  15. background_color:255, 255, 255, 1
  16. size:300,150
  17. size_hint:None, None
  18.  
  19. <Image>:
  20. source:'фон.jpg'
  21. allow_stretch: True
  22.  
  23. <Label>:
  24. color:0,0,0,1
  25. font_size:30
  26.  
  27. <BoxLayout>:
  28. orientation:'vertical'
  29. spacing:200
  30. padding:170,200
  31. background_color:1,1,1,1
  32.  
  33. <GridLayout10@GridLayout>:
  34. cols:2
  35. spacing:10,40
  36. padding:15
  37.  
  38. <MainScreen>:
  39. name:"Menu"
  40. Image
  41. BoxLayout:
  42. Button1:
  43. on_press:root.manager.current="Subjects"
  44. text:"Subjects"
  45. Button1:
  46. text:"Special"
  47. on_press:root.manager.current="Special"
  48.  
  49.  
  50. <SubjectsScreen>:
  51. name:"Subjects"
  52. Image
  53. BoxLayout:
  54. Button1:
  55. text:"10 class"
  56. on_press:root.manager.current="SubjectsScreen10"
  57. Button1:
  58. text:"11 class"
  59. on_press:root.manager.current="SubjectsScreen11"
  60.  
  61. <SubjectsScreen10>:
  62. name:"SubjectsScreen10"
  63. Image
  64. GridLayout10:
  65. Button1:
  66. text:"Геометрия"
  67. Button1:
  68. text:"Алгебра"
  69. Button1:
  70. text:"Химия"
  71. Button1:
  72. text:"Физика"
  73. Button1:
  74. text:"Русская литература"
  75. Button1:
  76. text:"Русский язык"
  77. on_press:russ_yaz
  78. Button1:
  79. text:"География"
  80. Button1:
  81. text:"Биология"
  82. Button1:
  83. text:"Белорусский язык"
  84.  
  85. <SubjectsScreen11>:
  86. name:"SubjectsScreen11"
  87. Image
  88. GridLayout10:
  89. Button1:
  90. text:"Алгебра"
  91. Button1:
  92. text:"Геометрия"
  93. Button1:
  94. text:"Физика"
  95. Button1:
  96. text:"Химия"
  97. Button1:
  98. text:"Русская литература"
  99. Button1:
  100. text:"Русский язык"
  101. on_press:self.russ_yaz
  102.  
  103. Button1:
  104. text:"География"
  105. Button1:
  106. text:"Биология"
  107. Button1:
  108. text:"Белорусский язык"
  109. Button1:
  110. text:"Астрономия"
  111.  
  112. <SpecialSubjectsScreen>:
  113. name:"Special"
  114. Image
  115. AnchorLayout:
  116. Button1:
  117. text:"No material(Back)"
  118. on_press: root.manager.current='Menu'
  119. """)
  120.  
  121.  
  122. class MainScreen(Screen):
  123. pass
  124.  
  125.  
  126. class SpecialSubjectsScreen(Screen):
  127. pass
  128.  
  129.  
  130. class SubjectsScreen(Screen):
  131. pass
  132.  
  133.  
  134. class SubjectsScreen10(Screen):
  135. pass
  136.  
  137.  
  138. class SubjectsScreen11(Screen):
  139. pass
  140.  
  141.  
  142. sm = ScreenManager()
  143. sm.add_widget(MainScreen(name='Menu'))
  144. sm.add_widget(SpecialSubjectsScreen(name='Special'))
  145. sm.add_widget(SubjectsScreen(name="Subjects"))
  146. sm.add_widget(SubjectsScreen10(name="SubjectsScreen10"))
  147. sm.add_widget(SubjectsScreen11(name="SubjectsScreen11"))
  148.  
  149.  
  150. class LibraryApp(App):
  151.  
  152. def build(self):
  153. return sm
  154.  
  155. def russ_yaz(self,instance):
  156. fn = 'Дудников, А.В Русский язык.pdf'
  157. root = PDFDocumentWidget(source=fn, cols=1)
  158. runTouchApp(root)
  159.  
  160. if __name__ == '__main__':
  161. LibraryApp().run()
  162.  
  163.  
  164.  
  165.  
  166.  
  167. from os.path import exists
  168. from tempfile import mkdtemp, mkstemp
  169. from shutil import rmtree
  170. from binascii import b2a_hex
  171. from os import write, close
  172. from threading import Thread
  173.  
  174. from pdfminer.pdfpage import PDFPage
  175. from pdfminer.pdfparser import PDFParser
  176. from pdfminer.converter import PDFPageAggregator
  177. from pdfminer.pdfdocument import PDFDocument, PDFNoOutlines
  178. from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
  179. from pdfminer.layout import (
  180. LAParams, LTTextBox, LTTextLine, LTFigure, LTImage, LTChar, LTCurve,
  181. LTLine, LTRect,
  182. )
  183.  
  184. from kivy.lang import Builder
  185. from kivy.clock import Clock
  186.  
  187. from kivy.graphics import Mesh, Color
  188. from kivy.graphics.tesselator import Tesselator
  189.  
  190. from kivy.uix.widget import Widget
  191. from kivy.uix.recycleview import RecycleView
  192. from kivy.uix.label import Label
  193. from kivy.uix.image import Image
  194. from kivy.uix.relativelayout import RelativeLayout
  195. from kivy.uix.boxlayout import BoxLayout
  196.  
  197. from kivy.properties import (
  198. StringProperty, ListProperty, NumericProperty, AliasProperty,
  199. DictProperty, ObjectProperty, BooleanProperty, ColorProperty,
  200. )
  201.  
  202. Builder.load_string('''
  203. #:import RGBA kivy.utils.rgba
  204.  
  205. <PDFDocumentWidget>:
  206. viewclass: 'PDFPageWidget'
  207. key_size: 'size'
  208. # async load is buggy at the moment
  209. # async_load: True
  210.  
  211. RecycleGridLayout:
  212. spacing: 5
  213. cols: root.cols
  214. rows: root.rows
  215. size_hint: None, None
  216. size: self.minimum_size
  217. default_size_hint: None, None
  218.  
  219. <PDFPageWidget>:
  220. size_hint: None, None
  221.  
  222. canvas.before:
  223. Color:
  224. rgba: RGBA('FFFFFF')
  225. Rectangle:
  226. size: self.size
  227.  
  228. <PDFLabelWidget,PDFImageWidget>:
  229. size_hint: None, None
  230.  
  231. <PDFImageWidget>:
  232. pos: self.bbox[:2]
  233. size: self.bbox[2] - self.x, self.bbox[3] - self.y
  234.  
  235. <PDFLabelWidget>:
  236. text_size: self.width, None
  237. height: self.texture_size[1]
  238. color: RGBA('000000')
  239. font_size: 8
  240.  
  241. <PDFCurveWidget>:
  242. ''')
  243.  
  244.  
  245. class PDFDocumentWidget(RecycleView):
  246. source = StringProperty()
  247. password = StringProperty()
  248. cols = NumericProperty(None)
  249. rows = NumericProperty(None)
  250. _toc = ListProperty()
  251. async_load = BooleanProperty(False)
  252.  
  253. def __init__(self, **kwargs):
  254. super(PDFDocumentWidget, self).__init__(**kwargs)
  255. self._fp = None
  256. self._document = None
  257. self._tmpdir = None
  258. self.bind(source=self.load)
  259. if self.source:
  260. self.load()
  261.  
  262. def load(self, *args):
  263. if self._fp:
  264. # close the previous pdf file
  265. self._fp.close()
  266.  
  267. pdf_doc = self.source
  268. data = []
  269. if not pdf_doc or not exists(pdf_doc):
  270. self.pages = []
  271. self._doc = []
  272. self._document = None
  273. if self._tmpdir:
  274. rmtree(self._tmpdir)
  275. self._tmpdir = None
  276.  
  277. try:
  278. # open the pdf file
  279. self._fp = fp = open(pdf_doc, 'rb')
  280. # create a parser object associated with the file object
  281. parser = PDFParser(fp)
  282. # create a PDFDocument object that stores the document structure
  283. doc = PDFDocument(parser)
  284. # connect the parser and document objects
  285. parser.set_document(doc)
  286. # supply the password for initialization
  287. # doc.initialize(self.password)
  288.  
  289. # if doc.is_extractable:
  290. # apply the function and return the result
  291. self._document = doc
  292. self._parse_toc()
  293. self._create_tmpdir()
  294. self._parse_pages()
  295. except IOError as e:
  296. # the file doesn't exist or similar problem
  297. print(e)
  298.  
  299. def _create_tmpdir(self):
  300. if not self._tmpdir:
  301. self._tmpdir = mkdtemp()
  302. return self._tmpdir
  303.  
  304. def _parse_toc(self):
  305. """With an open PDFDocument object, get the table of contents (toc) data
  306. [this is a higher-order function to be passed to with_pdf()]"""
  307. toc = []
  308. doc = self._document
  309. try:
  310. outlines = doc.get_outlines()
  311. for (level, title, dest, a, se) in outlines:
  312. toc.append((level, title))
  313. except:
  314. pass
  315. finally:
  316. self._toc = toc
  317.  
  318. def _parse_pages(self):
  319. doc = self._document
  320. if not doc:
  321. self.data = []
  322. return
  323.  
  324. data = []
  325.  
  326. rsrcmgr = PDFResourceManager()
  327. laparams = LAParams()
  328. self.device = device = PDFPageAggregator(rsrcmgr, laparams=laparams)
  329. self.interpreter = PDFPageInterpreter(rsrcmgr, device)
  330.  
  331. for i, page in enumerate(PDFPage.create_pages(doc)):
  332. p = {
  333. 'manager': self,
  334. 'page': page,
  335. 'size': page.attrs.get('MediaBox', [0, 0, 0, 0])[2:],
  336. }
  337. data.append(p)
  338. self.data = data
  339.  
  340.  
  341. class PDFImageWidget(Image):
  342. bbox = ListProperty([0, 0, 100, 100])
  343.  
  344.  
  345. class PDFLabelWidget(Label):
  346. bbox = ListProperty([0, 0, 100, 100])
  347.  
  348.  
  349. class PDFCurveWidget(Widget):
  350. points = ListProperty()
  351. line_width = NumericProperty()
  352. stroke = BooleanProperty(False)
  353. fill = BooleanProperty(False)
  354. even_odd = BooleanProperty()
  355. color = ColorProperty()
  356. fill_color = ColorProperty()
  357.  
  358. def __init__(self, **kwargs):
  359. super(PDFCurveWidget, self).__init__(**kwargs)
  360. build = Clock.create_trigger(self.build, 0)
  361.  
  362. self.bind(
  363. points=build,
  364. line_width=build,
  365. stroke=build,
  366. fill=build,
  367. even_odd=build,
  368. color=build,
  369. fill_color=build
  370. )
  371.  
  372. def build(self, *args):
  373. self.canvas.clear()
  374. if not self.points:
  375. return
  376.  
  377. with self.canvas:
  378. if self.fill:
  379. Color(rgba=self.fill_color)
  380. t = Tesselator()
  381. t.add_contour(self.points)
  382. if tess.tesselate:
  383. for vertices, indices in tess.meshes:
  384. Mesh(
  385. vertices=vertices,
  386. indices=indices,
  387. mode='triangle fan'
  388. )
  389. else:
  390. print("mesh didn't tesselate!")
  391.  
  392. if self.stroke:
  393. Color(rgba=self.color)
  394. Line(
  395. points=self.points,
  396. width=self.line_width
  397. )
  398.  
  399.  
  400. class PDFPageWidget(RelativeLayout):
  401. labels = DictProperty()
  402. attributes = DictProperty()
  403. manager = ObjectProperty()
  404. page = ObjectProperty()
  405. items = ListProperty()
  406.  
  407. def on_page(self, *args):
  408. if self.manager.async_load:
  409. Thread(target=self._load_page).start()
  410. else:
  411. self._load_page()
  412.  
  413. def _load_page(self):
  414. self.manager.interpreter.process_page(self.page)
  415. self.items = self.manager.device.get_result()
  416.  
  417. def on_items(self, *args):
  418. self.clear_widgets()
  419. self._render_content(self.items)
  420.  
  421. def _render_content(self, lt_objs):
  422. """Iterate through the list of LT* objects and capture the text
  423. or image data contained in each
  424. """
  425. for lt_obj in lt_objs:
  426. print(lt_obj)
  427. if isinstance(lt_obj, LTChar):
  428. self.add_text(
  429. text=lt_obj.get_text(),
  430. box_pos=(lt_obj.x0, lt_obj.y0),
  431. box_size=(lt_obj.width, lt_obj.height),
  432. # font_size=lt_obj.fontsize,
  433. # font_name=lt_obj.fontname,
  434. )
  435.  
  436. elif isinstance(lt_obj, (LTTextBox, LTTextLine)):
  437. # text, so arrange is logically based on its column width
  438. # this way is very limited style wise, and doesn't allow
  439. # support for font, color, style, etc management, as
  440. # pdfminer doesn't provide these information at text box
  441. # level, by using the following nested loop, it's
  442. # possible to have font family info, but for individual
  443. # character, which is impractical to create direct
  444. # labels for.
  445. # for obj in lt_obj:
  446. # print(obj)
  447. # for o in obj:
  448. # print(o)
  449.  
  450. self.add_text(
  451. text=lt_obj.get_text(),
  452. box_pos=(lt_obj.x0, lt_obj.y0),
  453. box_size=(lt_obj.width, lt_obj.height),
  454. )
  455.  
  456. elif isinstance(lt_obj, LTImage):
  457. saved_file = self.save_image(lt_obj)
  458. if saved_file:
  459. self.add_widget(
  460. PDFImageWidget(
  461. source=saved_file,
  462. bbox=lt_obj.bbox
  463. )
  464. )
  465.  
  466. elif isinstance(lt_obj, LTFigure):
  467. self._render_content(lt_obj)
  468.  
  469. # all of these are actually LTCurves, but all types here for
  470. # clarity
  471. elif isinstance(lt_obj, (LTLine, LTRect, LTCurve)):
  472. self.add_widget(
  473. PDFCurveWidget(
  474. points=lt_obj.pts or [],
  475. line_width=lt_obj.linewidth or 1.0,
  476. stroke=lt_obj.stroke,
  477. fill=lt_obj.fill,
  478. even_odd=lt_obj.evenodd,
  479. # colors seem to be indices, to some dict i
  480. # can't find in what pdfminer exposes
  481. color='#FFFFFFFF', # lt_obj.stroking_color or
  482. fill_color='#00000000' # lt_obj.non_stroking_color or
  483. )
  484. )
  485.  
  486. def save_image(self, lt_image):
  487. """Try to save the image data from this LTImage object, and
  488. return the file name, if successful
  489. """
  490. if lt_image.stream:
  491. file_stream = lt_image.stream.get_rawdata()
  492. if file_stream:
  493. file_ext = self.determine_image_type(file_stream[0:4])
  494. if file_ext:
  495. fd, fn = mkstemp(dir=self.manager._tmpdir, suffix='.{}'.format(file_ext))
  496. write(fd, file_stream)
  497. close(fd)
  498. return fn
  499.  
  500. @staticmethod
  501. def determine_image_type(stream_first_4_bytes):
  502. """Find out the image file type based on the magic number comparison of the first 4 (or 2) bytes"""
  503. file_type = None
  504. bytes_as_hex = b2a_hex(stream_first_4_bytes)
  505. if bytes_as_hex.startswith(b'ffd8'):
  506. file_type = '.jpeg'
  507. elif bytes_as_hex == b'89504e47':
  508. file_type = '.png'
  509. elif bytes_as_hex == b'47494638':
  510. file_type = '.gif'
  511. elif bytes_as_hex.startswith(b'424d'):
  512. file_type = '.bmp'
  513. return file_type
  514.  
  515. def add_text(self, text, box_pos, box_size, **kwargs):
  516. label = self.labels.get((box_pos, box_pos))
  517. if not label:
  518. label = PDFLabelWidget(text=text, pos=box_pos, size=box_size, **kwargs)
  519. self.labels[(box_pos, box_size)] = label
  520. self.add_widget(label)
  521. else:
  522. label.text += text
  523.  
  524. def add_image(self, lt_image):
  525. source = self.save_image(lt_image)
  526. if source:
  527. image = PDFImageWidget(
  528. source=source,
  529. pos=(lt_image.x0, lt_image.y0),
  530. size=(lt_image.widt, lt_image.height)
  531. )
  532. self.add_widget(image)
  533. self.images.append(image)
  534.  
  535.  
  536. if __name__ == '__main__':
  537. from sys import argv
  538. from kivy.base import runTouchApp
  539. from kivy.uix.scrollview import ScrollView
  540.  
  541. if len(argv) > 1:
  542. fn = argv[1]
  543. else:
  544. fn = 'Дудников, А.В Русский язык.pdf'
  545. root = PDFDocumentWidget(source=fn, cols=1)
  546. runTouchApp(root)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement