Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def findnth(haystack, needle, n):
- parts= haystack.split(needle, n+1)
- if len(parts)<=n+1:
- return -1
- return len(haystack)-len(parts[-1])-len(needle)
- 'foo bar bar bar'.replace('bar', 'XXX', 1).find('bar')
- def find_nth(haystack, needle, n):
- start = haystack.find(needle)
- while start >= 0 and n > 1:
- start = haystack.find(needle, start+len(needle))
- n -= 1
- return start
- >>> find_nth("foofoofoofoo", "foofoo", 2)
- 6
- def find_nth_overlapping(haystack, needle, n):
- start = haystack.find(needle)
- while start >= 0 and n > 1:
- start = haystack.find(needle, start+1)
- n -= 1
- return start
- >>> find_nth_overlapping("foofoofoofoo", "foofoo", 2)
- 3
- def find_2nd(string, substring):
- return string.find(substring, string.find(substring) + 1)
- >>> import re
- >>> s = "ababdfegtduab"
- >>> [m.start() for m in re.finditer(r"ab",s)]
- [0, 2, 11]
- >>> [m.start() for m in re.finditer(r"ab",s)][2] #index 2 is third occurrence
- 11
- def findnth(haystack, needle, n):
- parts= haystack.split(needle, n+1)
- if len(parts)<=n+1:
- return -1
- return len(haystack)-len(parts[-1])-len(needle)
- def find_nth(s, x, n=0, overlap=False):
- l = 1 if overlap else len(x)
- i = -l
- for c in xrange(n + 1):
- i = s.find(x, i + l)
- if i < 0:
- break
- return i
- In [1]: import _find_nth, find_nth, mmap
- In [2]: f = open('bigfile', 'r')
- In [3]: mm = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
- In [4]: %time s = mm[:]
- CPU times: user 813 ms, sys: 3.25 s, total: 4.06 s
- Wall time: 17.7 s
- In [5]: %timeit find_nth.findnth(s, 'n', 1000000)
- 1 loops, best of 3: 29.9 s per loop
- In [6]: %timeit find_nth.find_nth(s, 'n', 1000000)
- 1 loops, best of 3: 774 ms per loop
- In [7]: %timeit find_nth.find_nth(mm, 'n', 1000000)
- 1 loops, best of 3: 1.21 s per loop
- #include <Python.h>
- #include <string.h>
- off_t _find_nth(const char *buf, size_t l, char c, int n) {
- off_t i;
- for (i = 0; i < l; ++i) {
- if (buf[i] == c && n-- == 0) {
- return i;
- }
- }
- return -1;
- }
- off_t _find_nth2(const char *buf, size_t l, char c, int n) {
- const char *b = buf - 1;
- do {
- b = memchr(b + 1, c, l);
- if (!b) return -1;
- } while (n--);
- return b - buf;
- }
- /* mmap_object is private in mmapmodule.c - replicate beginning here */
- typedef struct {
- PyObject_HEAD
- char *data;
- size_t size;
- } mmap_object;
- typedef struct {
- const char *s;
- size_t l;
- char c;
- int n;
- } params;
- int parse_args(PyObject *args, params *P) {
- PyObject *obj;
- const char *x;
- if (!PyArg_ParseTuple(args, "Osi", &obj, &x, &P->n)) {
- return 1;
- }
- PyTypeObject *type = Py_TYPE(obj);
- if (type == &PyString_Type) {
- P->s = PyString_AS_STRING(obj);
- P->l = PyString_GET_SIZE(obj);
- } else if (!strcmp(type->tp_name, "mmap.mmap")) {
- mmap_object *m_obj = (mmap_object*) obj;
- P->s = m_obj->data;
- P->l = m_obj->size;
- } else {
- PyErr_SetString(PyExc_TypeError, "Cannot obtain char * from argument 0");
- return 1;
- }
- P->c = x[0];
- return 0;
- }
- static PyObject* py_find_nth(PyObject *self, PyObject *args) {
- params P;
- if (!parse_args(args, &P)) {
- return Py_BuildValue("i", _find_nth(P.s, P.l, P.c, P.n));
- } else {
- return NULL;
- }
- }
- static PyObject* py_find_nth2(PyObject *self, PyObject *args) {
- params P;
- if (!parse_args(args, &P)) {
- return Py_BuildValue("i", _find_nth2(P.s, P.l, P.c, P.n));
- } else {
- return NULL;
- }
- }
- static PyMethodDef methods[] = {
- {"find_nth", py_find_nth, METH_VARARGS, ""},
- {"find_nth2", py_find_nth2, METH_VARARGS, ""},
- {0}
- };
- PyMODINIT_FUNC init_find_nth(void) {
- Py_InitModule("_find_nth", methods);
- }
- from distutils.core import setup, Extension
- module = Extension('_find_nth', sources=['_find_nthmodule.c'])
- setup(ext_modules=[module])
- In [8]: %timeit _find_nth.find_nth(mm, 'n', 1000000)
- 1 loops, best of 3: 218 ms per loop
- In [9]: %timeit _find_nth.find_nth(s, 'n', 1000000)
- 1 loops, best of 3: 216 ms per loop
- In [10]: %timeit _find_nth.find_nth2(mm, 'n', 1000000)
- 1 loops, best of 3: 307 ms per loop
- In [11]: %timeit _find_nth.find_nth2(s, 'n', 1000000)
- 1 loops, best of 3: 304 ms per loop
- def find_nth(s, x, n):
- i = -1
- for _ in range(n):
- i = s.find(x, i + len(x))
- if i == -1:
- break
- return i
- print find_nth('bananabanana', 'an', 3)
- def find_nth(s, x, n, i = 0):
- i = s.find(x, i)
- if n == 1 or i == -1:
- return i
- else:
- return find_nth(s, x, n - 1, i + len(x))
- print find_nth('bananabanana', 'an', 3)
- text = "This is a test from a test ok"
- firstTest = text.find('test')
- print text.find('test', firstTest + 1)
- import itertools
- import re
- def find_nth(haystack, needle, n = 1):
- """
- Find the starting index of the nth occurrence of ``needle`` in
- ``haystack``.
- If ``needle`` is a ``str``, this will perform an exact substring
- match; if it is a ``RegexpObject``, this will perform a regex
- search.
- If ``needle`` doesn't appear in ``haystack``, return ``-1``. If
- ``needle`` doesn't appear in ``haystack`` ``n`` times,
- return ``-1``.
- Arguments
- ---------
- * ``needle`` the substring (or a ``RegexpObject``) to find
- * ``haystack`` is a ``str``
- * an ``int`` indicating which occurrence to find; defaults to ``1``
- >>> find_nth("foo", "o", 1)
- 1
- >>> find_nth("foo", "o", 2)
- 2
- >>> find_nth("foo", "o", 3)
- -1
- >>> find_nth("foo", "b")
- -1
- >>> import re
- >>> either_o = re.compile("[oO]")
- >>> find_nth("foo", either_o, 1)
- 1
- >>> find_nth("FOO", either_o, 1)
- 1
- """
- if (hasattr(needle, 'finditer')):
- matches = needle.finditer(haystack)
- else:
- matches = re.finditer(re.escape(needle), haystack)
- start_here = itertools.dropwhile(lambda x: x[0] < n, enumerate(matches, 1))
- try:
- return next(start_here)[1].start()
- except StopIteration:
- return -1
- from re import finditer
- from itertools import dropwhile
- needle='an'
- haystack='bananabanana'
- n=2
- next(dropwhile(lambda x: x[0]<n, enumerate(re.finditer(needle,haystack))))[1].start()
- >>> s="abcdefabcdefababcdef"
- >>> j=0
- >>> for n,i in enumerate(s):
- ... if s[n:n+2] =="ab":
- ... print n,i
- ... j=j+1
- ... if j==2: print "2nd occurence at index position: ",n
- ...
- 0 a
- 6 a
- 2nd occurence at index position: 6
- 12 a
- 14 a
- import re
- indices = [s.start() for s in re.finditer(':', yourstring)]
- n = 2
- nth_entry = indices[n-1]
- num_instances = len(indices)
- def iter_find(haystack, needle):
- return [i for i in range(0, len(haystack)) if haystack[i:].startswith(needle)]
- >>> iter_find("http://stackoverflow.com/questions/1883980/", '/')
- [5, 6, 24, 34, 42]
- def findN(s,sub,N,replaceString="XXX"):
- return s.replace(sub,replaceString,N-1).find(sub) - (len(replaceString)-len(sub))*(N-1)
- len("substring".join([s for s in ori.split("substring")[:2]]))
- c = os.getcwd().split('\')
- print '\'.join(c[0:-2])
- def Find(String,ToFind,Occurence = 1):
- index = 0
- count = 0
- while index <= len(String):
- try:
- if String[index:index + len(ToFind)] == ToFind:
- count += 1
- if count == Occurence:
- return index
- break
- index += 1
- except IndexError:
- return False
- break
- return False
- # return -1 if nth substr (0-indexed) d.n.e, else return index
- def find_nth(s, substr, n):
- i = 0
- while n >= 0:
- n -= 1
- i = s.find(substr, i + 1)
- return i
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement