• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python urlparse.urlunsplit函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中urlparse.urlunsplit函数的典型用法代码示例。如果您正苦于以下问题:Python urlunsplit函数的具体用法?Python urlunsplit怎么用?Python urlunsplit使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了urlunsplit函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: video

def video():
    '''
    Video request handler.
    :return: list of available videos in json format.
    '''
    entries = []
    for entry in os.walk(settings.VIDEO_FILES_PATH):
        if not entry[2]:  # there is no file
            continue
        date = os.path.basename(entry[0])
        for basename in entry[2]:
            filename = os.path.join(entry[0], basename)
            relpath = os.path.relpath(filename,
                                      start=settings.VIDEO_FILES_PATH)
            parts = list(urlparse.urlsplit(request.base_url)[:2])
            parts.append(settings.VIDEO_FILES_LOCATION + '/' + relpath)
            parts.extend(['', ''])
            url = urlparse.urlunsplit(parts)
            parts[2] = settings.THUMBNAIL_FILES_LOCATION + '/'
            parts[2] += os.path.splitext(relpath)[0] + '.png'
            thumbnail = urlparse.urlunsplit(parts)
            entries.append({'date': date, 'url': url, 'thumbnail': thumbnail})
    entries.sort(reverse=True, key=lambda x: x['date'])

    response = Response()
    response.headers['Content-Type'] = 'application/json'
    response.data = json.dumps(entries)
    return response
开发者ID:weisert,项目名称:mpsb,代码行数:28,代码来源:api.py


示例2: handler

    def handler(self, fname, language='text', linenumbers=False, filename=None, site=None, data=None, lang=None, post=None):
        """Create HTML for a listing."""
        fname = fname.replace('/', os.sep)
        if len(self.folders) == 1:
            listings_folder = next(iter(self.folders.keys()))
            if fname.startswith(listings_folder):
                fpath = os.path.join(fname)  # new syntax: specify folder name
            else:
                # old syntax: don't specify folder name
                fpath = os.path.join(listings_folder, fname)
        else:
            # must be new syntax: specify folder name
            fpath = os.path.join(fname)
        linenumbers = 'table' if linenumbers else False
        deps = [fpath]
        with open(fpath, 'r') as inf:
            target = urlunsplit(
                ("link", 'listing', fpath.replace('\\', '/'), '', ''))
            src_target = urlunsplit(
                ("link", 'listing_source', fpath.replace('\\', '/'), '', ''))
            src_label = self.site.MESSAGES('Source')

            data = inf.read()
            lexer = pygments.lexers.get_lexer_by_name(language)
            formatter = pygments.formatters.get_formatter_by_name(
                'html', linenos=linenumbers)
            output = '<a href="{1}">{0}</a>  <a href="{3}">({2})</a>' .format(
                fname, target, src_label, src_target) + pygments.highlight(data, lexer, formatter)

        return output, deps
开发者ID:getnikola,项目名称:nikola,代码行数:30,代码来源:listing.py


示例3: to_python

    def to_python(self, value):
        def split_url(url):
            """
            Returns a list of url parts via ``urlparse.urlsplit`` (or raises a
            ``ValidationError`` exception for certain).
            """
            try:
                return list(urlparse.urlsplit(url))
            except ValueError:
                # urlparse.urlsplit can raise a ValueError with some
                # misformatted URLs.
                raise ValidationError(self.error_messages["invalid"])

        value = super(URLField, self).to_python(value)
        if value:
            url_fields = split_url(value)
            if not url_fields[0]:
                # If no URL scheme given, assume http://
                url_fields[0] = "http"
            if not url_fields[1]:
                # Assume that if no domain is provided, that the path segment
                # contains the domain.
                url_fields[1] = url_fields[2]
                url_fields[2] = ""
                # Rebuild the url_fields list, since the domain segment may now
                # contain the path too.
                url_fields = split_url(urlparse.urlunsplit(url_fields))
            if not url_fields[2]:
                # the path portion may need to be added before query params
                url_fields[2] = "/"
            value = urlparse.urlunsplit(url_fields)
        return value
开发者ID:tovenja,项目名称:django,代码行数:32,代码来源:fields.py


示例4: get_relative_url

def get_relative_url(destination, source):
    """Get relative URL between two sources.

    http://stackoverflow.com/a/7469668/315168

    :param destination:
    :param source:
    :return: tuple (is same domain, relative url)
    """

    u_dest = urlparse.urlsplit(destination)
    u_src = urlparse.urlsplit(source)

    _uc1 = urlparse.urlunsplit(u_dest[:2]+tuple('' for i in range(3)))
    _uc2 = urlparse.urlunsplit(u_src[:2]+tuple('' for i in range(3)))

    if _uc1 != _uc2:
        ## This is a different domain
        return False, destination

    # If there is no / component in url assume it's root path
    src_path = u_src.path or "/"

    _relpath = posixpath.relpath(u_dest.path, posixpath.dirname(src_path))

    return True, _relpath
    # return True, urlparse.urlunsplit(('', '', _relpath, u_dest.query, u_dest.fragment))
开发者ID:kiok46,项目名称:webkivy,代码行数:27,代码来源:relurl.py


示例5: _generalize_url

    def _generalize_url(self, url):
        parts = urlsplit(url)
        simplified_url = urlunsplit((parts.scheme,
                                     parts.netloc,
                                     '',
                                     '',
                                     ''))
        url = simplified_url
        segments = split_path_into_segments(parts.path)
        parent_is_collection = False

        for segment in segments:
            simplified_url = simplified_url + '/' + (ID_SUBSTITUTE_CHAR if parent_is_collection else segment)
            url = url + '/' + segment
            if url in self and self._is_a_collection(url):
                parent_is_collection = True
            else:
                parent_is_collection = False

        generalized_path = urlsplit(simplified_url).path

        return urlunsplit((parts.scheme,
                           parts.netloc,
                           generalized_path,
                           parts.query,
                           parts.fragment))
开发者ID:01org,项目名称:intelRSD,代码行数:26,代码来源:discovery_container.py


示例6: parse

    def parse(self, response):
        sel = Selector(response)

        # Extract any cars found
        cars = sel.xpath('//*[contains(@class, "inv-type-used")]')
        for c in cars:
            car = Car()

            # Title and year
            car['title'] = c.xpath('.//div/div/h1/a/text()').extract()[0].strip()
            car['year'] = car['title'][0:4]

            # Price, but remove non-number characters.
            # Examples: '$12,000', 'Please Call', etc.
            price = c.xpath('.//*[contains(@class, "value")]/text()').extract()[0]
            car['price'] = ''.join(d for d in price if d.isdigit())

            # url
            path = c.xpath('.//div/div/h1/a/@href').extract()[0]
            url = urlparse.urlparse(response.url)
            car['url'] = urlparse.urlunsplit([url.scheme, url.netloc, path, None, None])

            # Certain specs are frequently missing, so we need to handle
            # them with try / except
            specs = [
                {
                    'name': 'vin',
                    'xpath': './/*/dt[text()="VIN:"]/following-sibling::dd/text()'
                },
                {
                    'name': 'color',
                    'xpath': './/*/dt[text()="Exterior Color:"]/following-sibling::dd/text()'
                },
                {
                    'name': 'miles',
                    'xpath': './/*/dt[text()="Mileage:"]/following-sibling::dd/text()'
                },
                {
                    'name': 'transmission',
                    'xpath': './/*/dt[text()="Transmission:"]/following-sibling::dd/text()'
                }
            ]

            for s in specs:
                try:
                    car[s['name']] = c.xpath(s['xpath']).extract()[0]
                except IndexError:
                    car[s['name']] = None

            yield car

        # If there's a next page link, parse it for cars as well
        next_links = sel.xpath('//*[@rel="next"]/@href').extract()
        if len(next_links) > 0:
            query = next_links[0]
            url = urlparse.urlparse(response.url)
            base = urlparse.urlunsplit([url.scheme, url.netloc, url.path, None, None])
            next_url = urlparse.urljoin(base, query)
            # Construct url
            yield Request(next_url, callback=self.parse)
开发者ID:JeffPaine,项目名称:subaru_search,代码行数:60,代码来源:subaru_spider.py


示例7: _load_uri

    def _load_uri(self, base_uri, uri_to_resolve):
        """
        Obtain a remote instruction.

        Returns the instruction as a python object, along with the resolved uri
        """
        resolved_uri = urlparse.urlsplit(urlparse.urljoin(base_uri, uri_to_resolve))
        base_scheme = urlparse.urlsplit(base_uri).scheme
        if base_scheme is not None and base_scheme != resolved_uri.scheme:
            raise SchemeSecurityError("Cannot cross from '%s' to '%s'" % (
                base_scheme, resolved_uri.scheme))

        try:
            if resolved_uri.scheme in ['http', 'https']:
                instruction = json.loads(requests.get(resolved_uri).text)
            elif resolved_uri.scheme is '':
                instruction = json.load(open(urlparse.urlunsplit(resolved_uri)))
            else:
                raise InvalidInstructionError("Reference to unsupported scheme '%s'" % (
                    resolved_uri.scheme))
            return instruction, urlparse.urlunsplit(resolved_uri)
        except requests.exceptions.RequestException as e:
            raise InvalidInstructionError("Couldn't load '%s': %s" % (resolved_uri, e))
        except IOError as e:
            raise InvalidInstructionError("Couldn't open '%s': %s" % (resolved_uri, e))
        except ValueError:
            raise InvalidInstructionError("Invalid JSON in '%s'" % resolved_uri)
开发者ID:pombredanne,项目名称:pycaustic,代码行数:27,代码来源:scraper.py


示例8: from_url

    def from_url(url, headers=None, allowed=None):
        if headers is None:
            headers = {}

        result = urlparse.urlsplit(url)
        if result.scheme == 'qpid':
            # remove the queue from the url
            queue, query = extract_param('queue', result.query)

            if queue is None:
                raise ApplicationException('No queue provided in qpid url!')

            new_url = urlparse.urlunsplit((result.scheme, result.netloc, result.path,
                                           query, result.fragment))
            return QpidPublisher(new_url, queue, headers, allowed)

        elif result.scheme == 'rabbit':
            queue, query = extract_param('queue', result.query)

            if queue is None:
                raise ApplicationException('No queue provided in qpid url!')

            new_url = urlparse.urlunsplit(('amqp', result.netloc, result.path,
                                           query, result.fragment))
            return RabbitPublisher(new_url, queue, headers, allowed)

        elif result.scheme == 'log':
            return LogPublisher(allowed)

        elif result.scheme == 'count':
            return CountPublisher(allowed)
开发者ID:cwingard,项目名称:mi-instrument,代码行数:31,代码来源:publisher.py


示例9: requestData

 def requestData(self, basepath):
     self.log.info("Attempting to communicate with Nexus server.")
     auth = "Basic " + base64.b64encode(self.user + ':' + self.pasw)
     deppath = self.url[2] + basepath
     delpath = deppath + '/artifactorymigrator'
     runpath = delpath + '/run'
     depurl = urlparse.urlunsplit((self.url[0], self.url[1], deppath, '', ''))
     delurl = urlparse.urlunsplit((self.url[0], self.url[1], delpath, '', ''))
     runurl = urlparse.urlunsplit((self.url[0], self.url[1], runpath, '', ''))
     delheaders = {'User-Agent': 'nex2art', 'Authorization': auth}
     depheaders, runheaders = delheaders.copy(), delheaders.copy()
     depheaders['Content-Type'] = 'application/json'
     runheaders['Content-Type'] = 'text/plain'
     depjson = {'name': 'artifactorymigrator', 'type': 'groovy'}
     depjson['content'] = pkgutil.get_data('nex2art', 'resources/plugin.groovy')
     depbody = json.dumps(depjson)
     res, data = None, None
     self.log.info("Deploying extraction plugin to Nexus.")
     ex, _ = self.dorequest(depurl, depbody, depheaders, 'POST', "deploy")
     if ex == None:
         try:
             self.log.info("Executing Nexus extraction.")
             ex, res = self.dorequest(runurl, None, runheaders, 'POST', "execute", True)
         finally:
             self.log.info("Deleting extraction plugin from Nexus.")
             self.dorequest(delurl, None, delheaders, 'DELETE', "delete")
         if res != None and 'result' in res: data = json.loads(res['result'])
     if ex != None:
         self.log.error("Error accessing Nexus instance: %s", ex)
         return "Error accessing Nexus instance."
     self.log.info("Successfully fetched Nexus data.")
     return data
开发者ID:JFrogDev,项目名称:nexus2artifactory,代码行数:32,代码来源:Nexus3.py


示例10: resolve_links

	def resolve_links(self, links, pageurl):
		for x in links:
			p = urlparse.urlsplit(x)
			if p.scheme == "http":
				if p.netloc != self.hostname:
					# Remote link
					continue
				# Turn this into a host-relative url
				p = ('', '', p.path, p.query, '')

			if p[4] != "" or p[3] != "":
				# Remove fragments (part of the url past #)
				p = (p[0], p[1], p[2], '', '')

			if p[0] == "":
				if p[2] == "":
					# Nothing in the path, so it's a pure fragment url
					continue

				if p[2][0] == "/":
					# Absolute link on this host, so just return it
					yield urlparse.urlunsplit(p)
				else:
					# Relative link
					yield urlparse.urljoin(pageurl, urlparse.urlunsplit(p))
			else:
				# Ignore unknown url schemes like mailto
				pass
开发者ID:a1exsh,项目名称:pgweb,代码行数:28,代码来源:basecrawler.py


示例11: _split_uri

    def _split_uri(self, identifier):
        if isinstance(identifier, URIRef):
            scheme, netloc, path, query, fragment = urlsplit(identifier)
            if query:
                namespace, resource_id = split_uri(identifier)
            if fragment:
                # if we have a fragment, we will split there
                namespace, resource_id = urldefrag(identifier)
                namespace += "#"
            elif "/" in path and len(path) > 1:
                splits = path.split("/")
                if path.endswith("/"):
                    resource_id = "/".join(splits[-2:])
                    path = "/".join(splits[:-2]) + "/"
                    namespace = urlunsplit((scheme, netloc, path, "", ""))
                else:
                    resource_id = "/".join(splits[-1:])
                    path = "/".join(splits[:-1]) + "/"
                    namespace = urlunsplit((scheme, netloc, path, "", ""))
            elif path:
                resource_id = path
                namespace = urlunsplit((scheme, netloc, "", "", ""))
            else:
                namespace, resource_id = split_uri(identifier)

            log.debug("Split %s to %s, %s" % (identifier, namespace, resource_id))
            return namespace, resource_id
        else:
            raise ValueError("Unknown identifier type %r" % identifier)
开发者ID:handloomweaver,项目名称:agamemnon,代码行数:29,代码来源:rdf_store.py


示例12: rewrite_urls

def rewrite_urls(origin_url, urls):
    origin_pack = urlparse.urlsplit(origin_url)
    for u in urls:
        # kill breaks
        if u:
            u = re.sub("(\n|\t)", "", u)

        pack = urlparse.urlsplit(u)
        (scheme, netloc, path, query, fragment) = pack

        # try to rewrite scheme
        scheme = rewrite_scheme(pack.scheme)

        # rewrite netloc to include credentials
        if origin_pack.username and pack.hostname == origin_pack.hostname:
            netloc = assemble_netloc(origin_pack.username,\
                        origin_pack.password, pack.hostname, pack.port)

        # reassemble into url
        new_u = urlparse.urlunsplit((scheme, netloc, path, query, None))

        # no scheme or netloc, it's a path on-site
        if not scheme and not netloc and (path or query):
            path_query = urlparse.urlunsplit((None, None, path, query, None))
            new_u = urlparse.urljoin(origin_url, path_query)

        # quote spaces
        new_u = new_u.replace(" ", "%20")
        if new_u:
            yield new_u
开发者ID:numerodix,项目名称:qontexte,代码行数:30,代码来源:urlrewrite.py


示例13: verify_image

    def verify_image(self, baseURL, imageURL):
        fullImageURL = imageURL
        if not urlsplit(imageURL).scheme:
            # Resolve relative path
            fullImageURL = urljoin(baseURL, imageURL)

        echo("Checking image: {}".format(fullImageURL))
        urlparts = urlsplit(fullImageURL)
        escapedparts = self.get_escaped_address_parts_minus_host(urlparts)
        
        if urlparts.netloc and urlparts.path:
            try:                    
                conn = httplib.HTTPConnection(urlparts.netloc)
                conn.request("HEAD", urlunsplit(escapedparts))
                echo("Going to path: {}\n".format(urlunsplit(escapedparts)))
                res = conn.getresponse()
            except Exception as inst:
                self.fail("While checking image {}, encountered exception: {}".format(
                    fullImageURL, inst))
                    
            self.assertEqual(res.status, 200, 
                'The image at {} is not OK. Looking for it resulted in HTTP code: {}'.format(
                    urlunsplit([urlparts.scheme, urlparts.netloc, escapedparts[2], 
                        escapedparts[3], escapedparts[4]]), 
                    res.status))
        else:
            self.fail("The URL for this image is invalid: {}".format(fullImageURL))
开发者ID:Digidai,项目名称:Harvard-Mobile-Web,代码行数:27,代码来源:check_modules.py


示例14: get_onedrive_embed_code

    def get_onedrive_embed_code(self, onedrive_url):

        onedrive_url = onedrive_url.strip()

        # check if it already is an embed code
        embed_code_regex = '<iframe'
        matched = re.match(embed_code_regex, onedrive_url, re.IGNORECASE)

        if matched is not None:
            return onedrive_url

        scheme, netloc, path, query_string, fragment = urlsplit(onedrive_url)
        query_params = parse_qs(query_string)

        # OneDrive for Business
        odb_regex = 'https?:\/\/((\w|-)+)-my.sharepoint.com\/'
        matched = re.match(odb_regex, onedrive_url, re.IGNORECASE)

        if matched is not None:
            query_params['action'] = ['embedview']
            new_query_string = urlencode(query_params, doseq=True)
            document_url = urlunsplit((scheme, netloc, path, new_query_string, fragment))
            return self.EMBED_CODE_TEMPLATE.format(document_url)

        # OneDrive (for consumers)
        onedrive_regex = '(https?:\/\/(onedrive\.)?)(live\.com)'
        matched = re.match(onedrive_regex, onedrive_url, re.IGNORECASE)

        if matched is not None:
            new_path = path.replace('view.aspx', 'embed').replace('redir', 'embed')
            query_params = parse_qs(query_string)
            query_params['em'] = ['2']
            new_query_string = urlencode(query_params, doseq=True)
            document_url = urlunsplit((scheme, netloc, new_path, new_query_string, fragment))
            return self.EMBED_CODE_TEMPLATE.format(document_url)
开发者ID:introp-software,项目名称:xblock-onedrive,代码行数:35,代码来源:onedrive.py


示例15: normalize_url

def normalize_url(url, domain_canonical=None):
    """
    Ensure we have a value url - raise exception if not.
    
    If given, we convert the domain to a domain_canonical
    """
    url = url.strip()
    rgURL = list(urlparse.urlsplit(url))
    if rgURL[split.scheme] == '':
        url = r"http://%s" % url
        rgURL = list(urlparse.urlsplit(url))
    
    # Invalid protocol
    if rgURL[split.scheme] != "http" and rgURL[split.scheme] != "https":
        raise reqfilter.Error("Invalid protocol: %s" % rgURL[split.scheme]) 

    if domain_canonical is not None:
        rgURL[split.domain] = domain_canonical
    
    if rgURL[split.domain]:
        rgURL[split.domain] = rgURL[split.domain].lower()
    
    if not rgURL[split.domain] or not regDomain.search(rgURL[split.domain]) or len(rgURL[split.domain]) > 255:
        raise reqfilter.Error("Invalid URL: %s" % urlparse.urlunsplit(rgURL))

    # Always end naked domains with a trailing slash as canonical
    if rgURL[split.path] == '':
        rgURL[split.path] = '/'

    return urlparse.urlunsplit(rgURL)
开发者ID:BenFort,项目名称:startpad,代码行数:30,代码来源:util.py


示例16: login

def login(self):
    """
    Set a cookie and redirect to the url that we tried to
    authenticate against originally.

    FIXME - I don't think we need this any more now that the EULA is gone -EAD
    """

    request = self.REQUEST
    response = request['RESPONSE']

    login = request.get('__ac_name', '')
    password = request.get('__ac_password', '')
    submitted = request.get('submitted', '')

    pas_instance = self._getPAS()

    if pas_instance is not None:
        try:
            pas_instance.updateCredentials(request, response, login, password)
        except (KeyError, POSKeyError):
            # see defect ZEN-2942 If the time changes while the server is running
            # set the session database to a sane state.
            ts = self.unrestrictedTraverse('/temp_folder/session_data')
            ts._reset()
            _originalResetCredentials(self, request, response)

    came_from = request.form.get('came_from') or ''
    if came_from:
        parts = urlparse.urlsplit(came_from)
        querydict = parse_qs(parts[3])
        querydict.pop('terms', None)
        if 'submitted' not in querydict.keys():
            querydict['submitted'] = submitted
        newqs = urllib.urlencode(querydict, doseq=True)
        parts = parts[:3] + (newqs,) + parts[4:]
        came_from = urlparse.urlunsplit(parts)
    else:
        submittedQs = 'submitted=%s' % submitted
        came_from = '/zport/dmd?%s' % submittedQs

    if not self.dmd.acceptedTerms:
        url = "%s/zenoss_terms/?came_from=%s" % (
                    self.absolute_url(), urllib.quote(came_from))
    else:
        # get rid of host part of URL (prevents open redirect attacks)
        clean_url = ['', ''] + list(urlparse.urlsplit(came_from))[2:]
        url = urlparse.urlunsplit(clean_url)

    fragment = request.get('fragment', '')
    if fragment:
        fragment = urllib.unquote( fragment)
        if not fragment.startswith( '#'):
            fragment = '#' + fragment
        url += fragment

    if self.dmd.uuid is None:
        self.dmd.uuid = str(uuid1())

    return response.redirect(url)
开发者ID:damilare,项目名称:zenoss-prodbin,代码行数:60,代码来源:pasmonkey.py


示例17: get

 def get(self):
     articles = models.Article.all().order('-pubdate').fetch(9)
     items = []
     mostRecentDate = None
     url_parts = list(urlparse.urlsplit(self.request.url)[0:2])
     for article in articles:
         if not mostRecentDate:
             mostRecentDate = article.pubdate
         article.rimages = [db.get(image) for image in article.images]
         url = urlparse.urlunsplit(url_parts + ['/page/%s' % article.slug, '', ''])
         items.append(
             PyRSS2Gen.RSSItem(
               title = article.title,
               link = url,
               description = article.text,
               pubDate = article.pubdate))
     rss = PyRSS2Gen.RSS2(
          title = "RSS feed",
          link = urlparse.urlunsplit(url_parts + ['', '', '']),
          description = "My RSS feed",
          lastBuildDate = mostRecentDate,
          items = items,
          )
     self.response.headers['Content-Type'] = 'text/xml'
     self.response.out.write(rss.to_xml())
开发者ID:ghber,项目名称:cygnuscms,代码行数:25,代码来源:main.py


示例18: key

 def key(self, obj):        
     """Return a cache key (relative path to file in cache) for an object"""
     if isnumpy(obj):
         # Key is byte view sha1 hash with .h5 extension           
         byteview = obj.view(numpy.uint8)
         key = str(hashlib.sha1(byteview).hexdigest()) + '.h5' 
     elif isurl(obj):
         # key is URL filename with an appended hash (for uniqueness)
         p = urlparse.urlsplit(obj)
         urlquery = urlparse.urlunsplit([p[0],p[1],p[2],p[3],None])        
         urlpath = urlparse.urlunsplit([p[0],p[1],p[2],None,None])
         urlhash = self._hash(obj)
         (filename, ext) = splitextension(path.basename(urlpath))
         key = str(urlhash) + str(ext)
     elif os.path.isfile(obj):
         # within cache?
         filebase = obj.split(self.root(),1)
         if len(filebase) == 2:
             # key is subpath within cache
             key = filebase[1][1:]
         else:
             # key is filename with unique appended hash
             (head, tail) = os.path.split(obj)
             (filename, ext) = splitextension(tail)                 
             namehash = hashlib.sha1(tail).hexdigest()                 
             key = filename + '_' + str(namehash[0:7]) + ext
     elif (path.isfile(self.abspath(obj)) or path.isdir(self.abspath(obj))):
         key = obj   # Already a cache key
     elif isstring(obj):
         key = obj   # Use arbitrary string if not file or url
     else:
         raise CacheError('[bobo.cache][ERROR]: Unsupported object for constructing key')
     return key
开发者ID:jethrotan,项目名称:bobo,代码行数:33,代码来源:cache.py


示例19: __init__

    def __init__(self, baseUri, headers=None, maxClients=None,
        maxConnections=None):

        self._headers = headers or HTTPHeaders()

        self._user = None
        self._passwd = None

        baseUri = baseUri.rstrip('/')
        self._scheme, loc, self._path, query, frag = urlparse.urlsplit(baseUri)

        userpass, self._hostport = urllib.splituser(loc)
        if userpass:
            self._user, self._passwd = urllib.splitpasswd(userpass)

        self._baseUri = urlparse.urlunsplit((self._scheme, self._hostport,
            self._path, None, None))

        if self._scheme not in ('http', 'https'):
            raise ValueError(self._scheme)

        self._dispatcher = RequestDispatcher(maxClients=maxClients,
            maxConnections=maxConnections)

        self._queryFragment = urlparse.urlunsplit(('', '', '', query, frag))
开发者ID:pombreda,项目名称:robj,代码行数:25,代码来源:client.py


示例20: _get_robotparser

 def _get_robotparser(self, link):
     """Return the proper robots parser for the given url or None if one
     cannot be constructed. Robot parsers are cached per scheme and
     netloc."""
     # only some schemes have a meaningful robots.txt file
     if link.scheme != 'http' and link.scheme != 'https':
         debugio.debug('crawler._get_robotparser() called with unsupported scheme (%s)' % link.scheme)
         return None
     # split out the key part of the url
     location = urlparse.urlunsplit((link.scheme, link.netloc, '', '', ''))
     # try to create a new robotparser if we don't already have one
     if not self._robotparsers.has_key(location):
         import httplib
         debugio.info('  getting robots.txt for %s' % location)
         self._robotparsers[location] = None
         try:
             rp = robotparser.RobotFileParser()
             rp.set_url(urlparse.urlunsplit(
               (link.scheme, link.netloc, '/robots.txt', '', '') ))
             rp.read()
             self._robotparsers[location] = rp
         except (TypeError, IOError, httplib.HTTPException):
             # ignore any problems setting up robot parser
             pass
     return self._robotparsers[location]
开发者ID:BackupTheBerlios,项目名称:rheinaufcms-svn,代码行数:25,代码来源:crawler.py



注:本文中的urlparse.urlunsplit函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python urlparse.urlparse函数代码示例发布时间:2022-05-27
下一篇:
Python urlparse.urlunparse函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap