• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python urlparse.urljoin函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中urllib2.urlparse.urljoin函数的典型用法代码示例。如果您正苦于以下问题:Python urljoin函数的具体用法?Python urljoin怎么用?Python urljoin使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了urljoin函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: parse_products

    def parse_products(self, response):
        print "parse_products", response.url
        sel = Selector(response)
        breadcrumb = sel.xpath('//div[contains(@class,"breadCrumb")]')
        categories = [span for span in breadcrumb.xpath(".//span[@itemprop='title']/text()").extract()[1:]]
        categories.append(breadcrumb.xpath(".//span/text()").extract()[-1])
        print categories
        
        for product in sel.xpath('//div[contains(@id,"quickLookItem")]'):
            # check if it is a multistore product
            if product.xpath('.//span[contains(@id, "numStoresQA")]'):
                print product.xpath(".//a/@href").extract()[0]
                url = product.xpath(".//a/@href").extract()[0]
                url = "/".join(url.split("/")[:-1])+"/prices"
                yield Request(urlparse.urljoin(response.url, url), callback=self.parse_multiple_store_product)
            else:
                # It is not a multistore product. Parse it.
                item = ShoppingdotcomItem()
                item["categories"] = categories
                item["product_name"] = product.xpath(".//span[contains(@id, 'nameQA')]/@title").extract()[0]
                if product.xpath(".//span[@class='placeholderImg']").extract():
                    item["image_urls"] = product.xpath(".//span[@class='placeholderImg']/text()").extract()
                else:
                    item["image_urls"] = product.xpath(".//div[@class='gridItemTop']//img/@src").extract()
                item["product_urls"] = [urlparse.urljoin(response.url, product.xpath(".//a/@href").extract()[0])]
                item["stores"] = product.xpath(".//a[@class='newMerchantName']/text()").extract()
                item["prices"] = [price.replace("\n","") for price in product.xpath(".//span[@class='productPrice']/a/text()").extract()]
                yield item

        # Check if Next page link is there then yeild request with next URL
        if sel.xpath("//a[@name='PLN']").extract():
            yield Request(urlparse.urljoin(response.url, sel.xpath("//a[@name='PLN']/@href").extract()[0]), self.parse_products)
            pass
开发者ID:Scorpio1987,项目名称:shoppingdotcom,代码行数:33,代码来源:shoppingdotcomspider.py


示例2: parse_start_url

 def parse_start_url(self, response):
     print response.url
     sel = Selector(response)
     
     for url in sel.xpath("//a"):
         #print url.xpath("@href").extract()
         href = url.xpath("@href").extract()[0] if url.xpath("@href").extract() else None
         if href and href.split("/")[-1] == "products":
             yield Request(urlparse.urljoin(response.url, href), callback=self.parse_products)
         if href and href.find("xFA-") >= 0:
             href = href.replace("xFA-", "").split("~")[0]+"/products"
             yield Request(urlparse.urljoin(response.url, href), callback=self.parse_products)
         pass
开发者ID:Scorpio1987,项目名称:shoppingdotcom,代码行数:13,代码来源:shoppingdotcomspider.py


示例3: main

def main(argv=sys.argv):
    """ Punto de entrada al programa """
    url = "http://www.vientonomade.com.ar/index.php?option=com_content&view=category&" "layout=blog&id=8&Itemid=10"
    fetcher = httplib2.Http()
    get = partial(obtener_pagina, fetcher)

    while url:
        html = get(url)
        uri, links = buscar_links(html)
        for link in links:
            try:
                print urlparse.urljoin(url, link)
            except UnicodeEncodeError:
                pass
        url = uri and urlparse.urljoin(url, uri) or None
开发者ID:D3f0,项目名称:vnomade,代码行数:15,代码来源:scrap.py


示例4: check_config

    def check_config():
        """
        Check crucial configuration details for existence and workability.

        Runs checks to see whether bugtracker's URL is reachable, whether
        backend is available at the right filename, and whether the script has
        the key arguments it needs to run: URL, backend, and database details.

        The filename for the backend in the backends/ directory needs to be the
        same as the configuration argument specifying that backend. For
        instance, invoking the Launchpad backend uses 'lp', and so the filename
        is 'lp.py'.
        """
        Config.check_params(['url', 'backend'])

        if Config.backend + ".py" not in Backend.get_all_backends():
            raise InvalidConfig('Backend "' + Config.backend + '" does not exist')

        url = urlparse.urlparse(Config.url)
        check_url = urlparse.urljoin(url.scheme + '://' + url.netloc, '')
        print("Checking URL: " + check_url)
        req = Request(check_url)

        if Config.backend != 'github':
            try:
                response = urlopen(req)
            except HTTPError, e:
                raise InvalidConfig('The server could not fulfill the request '
                                    + str(e.msg) + '(' + str(e.code) + ')')
            except URLError, e:
                raise InvalidConfig('We failed to reach a server. ' + str(e.reason))
开发者ID:davidziman,项目名称:Bicho,代码行数:31,代码来源:config.py


示例5: transform

def transform(row, table):
    'Transform row "link" into full URL and add "state" based on "name"'

    data = row._asdict()
    data['link'] = urlparse.urljoin('https://pt.wikipedia.org', data['link'])
    data['name'], data['state'] = regexp_city_state.findall(data['name'])[0]
    return data
开发者ID:abelthf,项目名称:rows,代码行数:7,代码来源:brazilian_cities_wikipedia.py


示例6: bot_send_video

def bot_send_video(gesture, video_url, video_preview_img, to_mid="u2ef38a8c1f3f1c2c63bdf9c0a629023c"):
    
    headers = {}
    headers['Content-type'] = 'application/json; charset=UTF-8'
    headers['X-Line-ChannelID'] = settings.CHANNEL_ID
    headers['X-Line-ChannelSecret'] = settings.CHANNEL_SECRET
    headers['X-Line-Trusted-User-With-ACL'] = settings.CHANNEL_MID

    api = 'https://trialbot-api.line.me/v1/events'

    body = {}
    body['to'] = [to_mid]
    body['toChannel'] = 1383378250
    body['eventType'] = "138311608800106203"

    #gesture = Gesture.objects.all()[0]
    myurl = 'https://eldertranslator.herokuapp.com/'
    video_url = urlparse.urljoin(myurl, gesture.video.url)

    content = {
        "contentType": 3,
        "toType": 1,
        "originalContentUrl": video_url,
        "previewImageUrl": video_preview_img 
    }

    body['content'] = content
    req = requests.post(api, data=json.dumps(body), headers=headers, verify=False)
    
    return req
开发者ID:rasca0027,项目名称:ElderTranslator,代码行数:30,代码来源:line.py


示例7: parse

    def parse(self, response):
        delinquent_link = Selector(response).xpath(
            '//*[@id="box1"]/td[1]/li/font/i/a/@href').extract()
        urllib.urlretrieve(urlparse.urljoin(response.url, delinquent_link[0]), 'delinquent.zip')
        unzip('delinquent.zip', 'delinquent')

        with open(glob.glob('delinquent/*.csv')[0], 'rb') as csvfile:
            csvreader = csv.reader(csvfile, delimiter=',')
            for idx, column in enumerate(csvreader.next()):
                column = re.sub('["]', "", column).strip()
                if column.startswith("PARCELID"):
                    parcelidcol = idx
                if column.startswith("OWNERNAME1"):
                    ownernamecol = idx
                if column.startswith("PARCELLOCATION"):
                    parcellocationcol = idx
                if column.startswith("CLS"):
                    parcelclass = idx
                if column.startswith("ASMTBLDG"):
                    buildingvalue = idx
            for row in csvreader:
                item = ReapItem()
                item['parcel_id'] = re.sub('["]', "", row[parcelidcol]).strip()
                item['parcel_location'] = row[parcellocationcol].strip()
                item['parcel_class'] = row[parcelclass].strip()
                item['building_value'] = row[buildingvalue].strip()
                request = scrapy.Request(
                    "http://mctreas.org/master.cfm?parid={0}&taxyr={1}&own1={2}".format(
                        item['parcel_id'], str(YEAR), row[ownernamecol]),
                    callback=self.get_tax_eligibility)
                request.meta['item'] = item
                yield request
开发者ID:AndrewADev,项目名称:scrapers,代码行数:32,代码来源:reap_spider.py


示例8: parse

def parse(url, body, **kwargs):
    for line in body.decode('gbk', errors='ignore').splitlines():
        if line.lstrip().startswith('var docData'):
            l, r = line.find('{'), line.rfind('}')
            obj = json.loads(line[l:r+1])
            doc = obj['result']['docinfo'][0]['foolrinfo']
            doc['title'] = obj['result']['sDocTitle']
            doc['url'] = urlparse.urljoin('http://www.xici.net', obj['result']['strPageUrl'])
            doc['date'] = '20'+doc['LongDate']
            doc['content'] = html.fromstring(doc['floorcontent']).text_content()

            tpl = Template('''
                <html>
                <head>
                    <meta content="text/html; charset=utf-8" http-equiv="content-type">
                    <title>{{doc['title']}}</title>
                </head>
                <body>
                    <a id="title" href="{{doc['url']}}">{{doc['title']}}</a>
                    <p id="date">{{doc['date']}}</p>
                    <div id="content">{{doc['content']}}</div>
                </body>
                </html>''')

            return tpl.render(doc=doc).encode('gbk', errors='ignore')
    else:
        return '<html/>'
开发者ID:UncleJim,项目名称:project,代码行数:27,代码来源:xici_plugin.py


示例9: submit

    def submit(self, opener, res):
        """submit login form to COSign IdP

        :param opener: the urllib2 opener
        :param data: the form data
           as a dictionary :param res: the response object :param cm: a
           :class:`~slick.passmgr.CredentialManager` containing the URL
           to the service provider you want to connect to

        """
        idp_data = {}
        cm = self.cm
        data = self.data
        url = urlparse.urljoin(res.url, data["form"]["action"])
        log.info("Form Authentication from: %s" % url)
        idp_data[self.username_field] = cm.get_username()
        idp_data[self.password_field] = cm.get_password()
        idp_data["service"] = data["service"]["value"]
        idp_data["ref"] = data["ref"]["value"]
        data = urllib.urlencode(idp_data)
        request = Request(url, data=data)
        log.info("Submitting login form")
        log.debug("POST: %s" % request.get_full_url())
        response = opener.open(request)
        return request, response
开发者ID:russell,项目名称:sibboleth,代码行数:25,代码来源:forms.py


示例10: submit

    def submit(self, opener, res):
        """submit WAYF form with IDP

        :param opener: the urllib2 opener
        :param data: the form data as a dictionary
        :param res: the response object

        """
        log.info('Submitting form to wayf')
        #Set IDP to correct IDP
        wayf_data = {}
        idp = self.idp
        data = self.data
        if not idp.get_idp() in data['origin']:
            raise WAYFException(
                "Can't find IdP '{0}' in WAYF's IdP list".format(
                    idp.get_idp()))
        wayf_data['origin'] = data['origin'][idp.get_idp()]
        wayf_data['shire'] = data['shire']['value']
        wayf_data['providerId'] = data['providerId']['value']
        wayf_data['target'] = data['target']['value']
        wayf_data['time'] = data['time']['value']
        wayf_data['cache'] = 'false'
        wayf_data['action'] = 'selection'
        url = urlparse.urljoin(res.url, data['form']['action'])
        data = urllib.urlencode(wayf_data)
        request = Request(url + '?' + data)
        log.debug("POST: %s" % request.get_full_url())
        response = opener.open(request)
        return request, response
开发者ID:grith,项目名称:sibboleth,代码行数:30,代码来源:forms.py


示例11: install_artifacts

def install_artifacts(artifacts, dirstruct, installdir, basestaticurl):
    """
    Install the artifacts.
    """
    assert basestaticurl.endswith("/"), "Basestaticurl should end with /"
    installed = []
    for reldir, artifactnames in dirstruct.items():
        destdir = os.path.join(installdir, reldir)
        if not os.path.exists(destdir):
            log.warn(msg="Making install directory %s" % destdir)
            os.makedirs(destdir)
        else:
            assert os.path.isdir(destdir)
        for artifactname in artifactnames:
            destpath = os.path.abspath(os.path.join(destdir, artifactname))
            if artifactname in artifacts.keys():
                # The artifact must be loaded from jenkins
                theartifact = artifacts[artifactname]
            else:
                # It's probably a static file, we can get it from the static collection
                staticurl = urlparse.urljoin(basestaticurl, artifactname)
                theartifact = Artifact(artifactname, staticurl)
            theartifact.save(destpath)
            installed.append(destpath)
    return installed
开发者ID:wgaggioli,项目名称:jenkinsapi,代码行数:25,代码来源:api.py


示例12: notify

    def notify(cls, alert, *args, **kwargs):

        current_span = extract_span_from_kwargs(**kwargs)

        url = cls._config.get('notifications.hipchat.url')
        token = kwargs.get('token', cls._config.get('notifications.hipchat.token'))
        repeat = kwargs.get('repeat', 0)
        notify = kwargs.get('notify', False)
        alert_def = alert['alert_def']
        message_format = kwargs.get('message_format', 'html')

        current_span.set_tag('alert_id', alert_def['id'])

        entity = alert.get('entity')
        is_changed = alert.get('alert_changed', False)
        is_alert = alert.get('is_alert', False)

        current_span.set_tag('entity', entity['id'])
        current_span.set_tag('alert_changed', bool(is_changed))
        current_span.set_tag('is_alert', is_alert)

        current_span.log_kv({'room': kwargs.get('room')})

        color = 'green' if alert and not alert.get('is_alert') else kwargs.get('color', 'red')

        message_text = cls._get_subject(alert, custom_message=kwargs.get('message'))

        if kwargs.get('link', False):
            zmon_host = kwargs.get('zmon_host', cls._config.get('zmon.host'))
            alert_id = alert['alert_def']['id']
            alert_url = urlparse.urljoin(zmon_host, '/#/alert-details/{}'.format(alert_id)) if zmon_host else ''
            link_text = kwargs.get('link_text', 'go to alert')
            if message_format == 'html':
                message_text += ' -- <a href="{}" target="_blank">{}</a>'.format(alert_url, link_text)
            else:
                message_text += ' -- {} - {}'.format(link_text, alert_url)

        message = {
            'message': message_text,
            'color': color,
            'notify': notify,
            'message_format': message_format
        }

        try:
            logger.info(
                'Sending to: ' + '{}/v2/room/{}/notification?auth_token={}'.format(url, urllib.quote(kwargs['room']),
                                                                                   token) + ' ' + json.dumps(message))
            r = requests.post(
                '{}/v2/room/{}/notification'.format(url, urllib.quote(kwargs['room'])),
                json=message, params={'auth_token': token}, headers={'Content-type': 'application/json'})
            r.raise_for_status()
        except Exception:
            current_span.set_tag('error', True)
            current_span.log_kv({'exception': traceback.format_exc()})
            logger.exception('Hipchat write failed!')

        return repeat
开发者ID:drummerwolli,项目名称:zmon-worker,代码行数:58,代码来源:hipchat.py


示例13: show

 def show(self):
     slcs_login_url = urlparse.urljoin(self.settings.slcs, 'login')
     idp_keys = list_idps(slcs_login_url).keys()
     idp_keys.sort()
     for i in idp_keys:
         self.idps.append_text(i)
         if i == self.settings.idp:
             self.idps.set_active(len(self.idps.get_model())-1)
     self.window.show_all()
开发者ID:grith,项目名称:slick.gui,代码行数:9,代码来源:main.py


示例14: relative_to_full_url

def relative_to_full_url(original_url, url):
    """
    Resolve the URL based on the original_url
    """
    from urllib2 import urlparse
    parsed = urlparse.urlparse(url)
    if not parsed.netloc:
        url = urlparse.urljoin(original_url, parsed.path)
    return url
开发者ID:callowayproject,项目名称:django-vintage,代码行数:9,代码来源:archiveurl.py


示例15: urlIterator

def urlIterator(startUrl, nextCssSelector):
    '''Yields the url of a page while there is a next one found by the cssSelector'''
    #This function takes time because it has to parse the dom to get the next url
    url = startUrl
    while url:
        yield url
        nextTags = getElementsFromUrl(url, nextCssSelector)
        url = None

        for possibleNext in nextTags:
            if possibleNext.tag == 'a':
                href = possibleNext.get('href')
                # Absolute href
                url = urlparse.urljoin(startUrl, href)
                break
            else:
                newTag = possibleNext.find('a')
                if newTag != None:
                    href = newTag.get('href')
                    url = urlparse.urljoin(startUrl, href)
                    break
开发者ID:niroyb,项目名称:ScrapMoodle,代码行数:21,代码来源:scraptools.py


示例16: domIterator

def domIterator(startUrl, nextCssSelector):
    dom = getDOM(startUrl)
    nextSelector = CSSSelector(nextCssSelector)
    while dom is not None:
        yield dom
        nextTags = nextSelector(dom)
        dom = None
        for possibleNext in nextTags:
            if possibleNext.tag == 'a':
                url = possibleNext.get('href')
                url = urlparse.urljoin(startUrl, url)
                dom = getDOM(url)
                break
开发者ID:niroyb,项目名称:ScrapMoodle,代码行数:13,代码来源:scraptools.py


示例17: __init__

    def __init__(self, service_url, infrastructure_account, verify=True, oauth2=False):

        if not service_url:
            raise ConfigurationError('EntitiesWrapper improperly configured. URL is missing!')

        self.infrastructure_account = infrastructure_account
        self.__service_url = urlparse.urljoin(service_url, 'api/v1/')
        self.__session = requests.Session()

        self.__session.headers.update({'User-Agent': get_user_agent()})
        self.__session.verify = verify

        if oauth2:
            self.__session.headers.update({'Authorization': 'Bearer {}'.format(tokens.get('uid'))})
开发者ID:drummerwolli,项目名称:zmon-worker,代码行数:14,代码来源:entities_wrapper.py


示例18: find_correct_element_url

def find_correct_element_url(params, el):
    els = el.xpath('//div[@class="wx-rb bg-blue wx-rb_v1 _item"]')
    print '---------------------------'
    print len(els)
    print '---------------------------'
    for cur_el in els:
        nick_name = cur_el.xpath('//div[@class="txt-box"]/h3/em/text()')[0]
        print nick_name
        if params.name == nick_name.encode('utf8'):
            url = cur_el.xpath('@href')[0]
            url = urlparse.urljoin(base_url, url)
            return url

    return ""
开发者ID:seraphln,项目名称:wheel,代码行数:14,代码来源:bak_crawler.py


示例19: main

def main(url):
    # Example URL: http://picturesofshit.com/v/2009/10-15_-_Dudescademy/
    img_size_qry_string = '?g2_imageViewsIndex=1'

    # Go to gallery and grab links to high resolution photos
    gallery = urlopen(url)
    soup = BeautifulSoup(gallery.read())
    links = [tag.attrMap['href'] + img_size_qry_string for tag in soup.findAll(href=re.compile('JPG.html'))]

    # Create download directory based on url
    dir = re.search('[_+]([a-zA-Z0-9]+)/$', url).groups()[0]
    if not os.path.exists(dir): os.makedirs(dir)

    # Go to each link, grab the image source, and download
    links = [urlparse.urljoin(url, link) for link in links]
    for link in links:
        gallery_image = urlopen(link)
        soup = BeautifulSoup(gallery_image.read())
        image_url = urlparse.urljoin(url, soup.find('img', 'ImageFrame_none').attrMap['src'])
        file_name = re.search('([^/]+)$', image_url).groups()[0]
        file = os.path.join(dir, file_name)
        print 'Downloading %s' % file_name
        urlretrieve(image_url, file)
    print '--- Downloads Complete ---'
开发者ID:aroscoe,项目名称:scripts,代码行数:24,代码来源:pos_scrapper.py


示例20: request

 def request(self):
     for ticker, i in self.tickers.items():
         if i:
             path = self.real_time_path.format(ticker.lower())
             req = self.sess.get(urlparse.urljoin(self.base_url, path))
             if req.ok:
                 try:
                     price = self.parse(req.text)
                     self.callback(json.dumps({ticker.upper(): price}))
                     yield {ticker: price}
                 except Exception as e:
                     logging.error(e)
                     del self.tickers[ticker]
             else:
                 logging.error(req.reason)
         else:
             del self.tickers[ticker]
开发者ID:sterling312,项目名称:nasdaq,代码行数:17,代码来源:nasdaq_stock.py



注:本文中的urllib2.urlparse.urljoin函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python urlparse.urlparse函数代码示例发布时间:2022-05-27
下一篇:
Python urllib2.Request类代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap